Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[c++] Fix memory-management issues in new domainish helpers #3030

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 115 additions & 22 deletions libtiledbsoma/src/utils/arrow_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,58 +39,88 @@ namespace tiledbsoma {
using namespace tiledb;

void ArrowAdapter::release_schema(struct ArrowSchema* schema) {
std::string name_for_log(
schema->name == nullptr ? "anonymous" : schema->name);
if (schema->name != nullptr)
LOG_DEBUG(
fmt::format("[ArrowAdapter] release_schema for {}", schema->name));
LOG_DEBUG(fmt::format(
"[ArrowAdapter] release_schema start for {}", schema->name));

if (schema->name != nullptr) {
LOG_TRACE("[ArrowAdapter] release_schema schema->name");
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema schema->name {}", schema->name));
free((void*)schema->name);
schema->name = nullptr;
}
if (schema->format != nullptr) {
LOG_TRACE("[ArrowAdapter] release_schema schema->format");
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema name {} schema->format {}",
name_for_log,
schema->format));
free((void*)schema->format);
schema->format = nullptr;
}
if (schema->metadata != nullptr) {
LOG_TRACE("[ArrowAdapter] release_schema schema->metadata");
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema name {} schema->metadata",
name_for_log));
free((void*)schema->metadata);
schema->metadata = nullptr;
}

if (schema->children != nullptr) {
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema name {} n_children {} begin "
"recurse ",
name_for_log,
schema->n_children));

for (auto i = 0; i < schema->n_children; i++) {
if (schema->children[i] != nullptr) {
if (schema->children[i]->release != nullptr) {
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema schema->child {} "
"[ArrowAdapter] release_schema name {} schema->child "
"{} "
"release",
name_for_log,
i));
release_schema(schema->children[i]);
}
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema schema->child {} free", i));
"[ArrowAdapter] release_schema name {} schema->child {} "
"free",
name_for_log,
i));
free(schema->children[i]);
schema->children[i] = nullptr;
}
}
LOG_TRACE("[ArrowAdapter] release_schema schema->children");

LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema name {} n_children {} end recurse ",
name_for_log,
schema->n_children));

free(schema->children);
schema->children = nullptr;
}

if (schema->dictionary != nullptr) {
if (schema->dictionary->release != nullptr) {
LOG_TRACE("[ArrowAdapter] release_schema schema->dict release");
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema name {} schema->dict release",
name_for_log));
release_schema(schema->dictionary);
}
LOG_TRACE("[ArrowAdapter] release_schema schema->dict free");
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema name {} schema->dict free",
name_for_log));
free(schema->dictionary);
schema->dictionary = nullptr;
}

schema->release = nullptr;
LOG_TRACE("[ArrowAdapter] release_schema done");
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema name {} done", name_for_log));
}

void ArrowAdapter::release_array(struct ArrowArray* array) {
Expand Down Expand Up @@ -124,6 +154,7 @@ void ArrowAdapter::release_array(struct ArrowArray* array) {
LOG_TRACE(fmt::format(
"[ArrowAdapter] release_schema array->child {} free", i));
free(array->children[i]);
array->children[i] = nullptr;
}
}
LOG_TRACE("[ArrowAdapter] release_array array->children");
Expand Down Expand Up @@ -155,10 +186,19 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(

std::unique_ptr<ArrowSchema> arrow_schema = std::make_unique<ArrowSchema>();
arrow_schema->format = strdup("+s");
arrow_schema->name = strdup("parent");
arrow_schema->metadata = nullptr;
arrow_schema->flags = 0;
arrow_schema->n_children = ndim + nattr;
arrow_schema->dictionary = nullptr;
arrow_schema->release = &ArrowAdapter::release_schema;
arrow_schema->private_data = nullptr;

arrow_schema->children = (ArrowSchema**)malloc(
arrow_schema->n_children * sizeof(ArrowSchema*));
LOG_DEBUG(fmt::format(
"[ArrowAdapter] arrow_schema_from_tiledb_array n_children {}",
arrow_schema->n_children));

ArrowSchema* child = nullptr;

Expand All @@ -172,9 +212,16 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(
child->metadata = nullptr;
child->flags = 0;
child->n_children = 0;
child->dictionary = nullptr;
child->children = nullptr;
child->dictionary = nullptr;
child->release = &ArrowAdapter::release_schema;
child->private_data = nullptr;
LOG_TRACE(fmt::format(
"[ArrowAdapter] arrow_schema_from_tiledb_array dim {} name {} "
"format {}",
i,
child->format,
child->name));
}

for (uint32_t i = 0; i < nattr; ++i) {
Expand All @@ -185,6 +232,7 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(
ArrowAdapter::to_arrow_format(attr.type()).data());
child->name = strdup(attr.name().c_str());
child->metadata = nullptr;
child->flags = 0;
if (attr.nullable()) {
child->flags |= ARROW_FLAG_NULLABLE;
} else {
Expand All @@ -193,6 +241,15 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::arrow_schema_from_tiledb_array(
child->n_children = 0;
child->children = nullptr;
child->dictionary = nullptr;
child->release = &ArrowAdapter::release_schema;
child->private_data = nullptr;

LOG_TRACE(fmt::format(
"[ArrowAdapter] arrow_schema_from_tiledb_array attr {} name {} "
"format {}",
i,
child->format,
child->name));

auto enmr_name = AttributeExperimental::get_enumeration_name(
*ctx, attr);
Expand Down Expand Up @@ -754,7 +811,9 @@ ArraySchema ArrowAdapter::tiledb_schema_from_arrow_schema(
auto type = ArrowAdapter::to_tiledb_format(child->format);

LOG_DEBUG(fmt::format(
"[ArrowAdapter] schema pass for {}", std::string(child->name)));
"[ArrowAdapter] schema pass for child {} name {}",
sch_idx,
std::string(child->name)));

bool isattr = true;

Expand Down Expand Up @@ -1260,28 +1319,48 @@ std::unique_ptr<ArrowSchema> ArrowAdapter::make_arrow_schema(

if (num_names != num_types) {
throw TileDBSOMAError(fmt::format(
"ArrowAdapter::make_arrow_schema: internal coding error {} != {}",
"ArrowAdapter::make_arrow_schema: internal coding error: num_types "
"{} != num_names {}",
num_names,
num_types));
}

auto arrow_schema = std::make_unique<ArrowSchema>();
arrow_schema->format = "+s"; // structure, i.e. non-leaf node
arrow_schema->format = "+s"; // structure, i.e. non-leaf node
arrow_schema->name = strdup("parent");
arrow_schema->metadata = nullptr;
arrow_schema->flags = 0;
arrow_schema->n_children = num_names; // non-leaf node
arrow_schema->children = (ArrowSchema**)malloc(
arrow_schema->n_children * sizeof(ArrowSchema*));
arrow_schema->dictionary = nullptr;
arrow_schema->release = &ArrowAdapter::release_schema;
arrow_schema->children = new ArrowSchema*[arrow_schema->n_children];
arrow_schema->private_data = nullptr;

LOG_DEBUG(fmt::format(
"[ArrowAdapter] make_arrow_schema n_children {}",
arrow_schema->n_children));

for (int i = 0; i < (int)num_names; i++) {
ArrowSchema* dim_schema = new ArrowSchema;
dim_schema->name = strdup(names[i].c_str());
ArrowSchema* dim_schema = (ArrowSchema*)malloc(sizeof(ArrowSchema));
auto arrow_type_name = ArrowAdapter::tdb_to_arrow_type(
tiledb_datatypes[i]);
dim_schema->name = strdup(names[i].c_str());
dim_schema->format = strdup(arrow_type_name.c_str());
dim_schema->n_children = 0; // leaf node
dim_schema->metadata = nullptr;
dim_schema->flags = 0;
dim_schema->n_children = 0; // leaf node
dim_schema->children = nullptr; // leaf node
dim_schema->dictionary = nullptr;
dim_schema->release = &ArrowAdapter::release_schema;
dim_schema->private_data = nullptr;

arrow_schema->children[i] = dim_schema;
LOG_TRACE(fmt::format(
"[ArrowAdapter] make_arrow_schema child {} format {} name {}",
i,
dim_schema->format,
dim_schema->name));
}

return arrow_schema;
Expand All @@ -1297,18 +1376,32 @@ std::unique_ptr<ArrowArray> ArrowAdapter::make_arrow_array_parent(
arrow_array->null_count = 0;
arrow_array->offset = 0;
arrow_array->n_buffers = 0;
arrow_array->buffers = nullptr;

arrow_array->n_children = num_columns;
arrow_array->buffers = nullptr;
arrow_array->dictionary = nullptr;
arrow_array->release = &ArrowAdapter::release_array;
arrow_array->children = new ArrowArray*[num_columns];
arrow_array->private_data = nullptr;

arrow_array->children = (ArrowArray**)malloc(
num_columns * sizeof(ArrowArray*));
for (int i = 0; i < num_columns; i++) {
arrow_array->children[i] = nullptr;
}

LOG_DEBUG(fmt::format(
"[ArrowAdapter] make_arrow_array n_children {}",
arrow_array->n_children));

return arrow_array;
}

void ArrowAdapter::log_make_arrow_array_child(ArrowArray* child) {
LOG_TRACE(fmt::format(
"[ArrowAdapter] make_arrow_array_child length {} n_buffers {}",
child->length,
child->n_buffers));
}

void ArrowAdapter::_check_shapes(
ArrowArray* arrow_array, ArrowSchema* arrow_schema) {
if (arrow_array->n_children != arrow_schema->n_children) {
Expand Down
32 changes: 24 additions & 8 deletions libtiledbsoma/src/utils/arrow_adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,8 +280,16 @@ class ArrowAdapter {
template <typename T>
static ArrowArray* make_arrow_array_child(const std::pair<T, T>& pair) {
std::vector<T> v({pair.first, pair.second});
return make_arrow_array_child<T>(v);
ArrowArray* child = make_arrow_array_child<T>(v);
return child;
}
/**
* Helper for make_arrow_array_child. We need the templating
* in the header file so it's instantiated at all callsites.
* But fmt::format logging is hard in header files since
* #include <logger.h> is relative to the includer, which varies.
*/
static void log_make_arrow_array_child(ArrowArray* child);

static ArrowArray* make_arrow_array_child_string(
const std::pair<std::string, std::string>& pair) {
Expand All @@ -297,8 +305,8 @@ class ArrowAdapter {
"failure.");
}

// Use new here, not malloc, to match ArrowAdapter::release_array
auto arrow_array = new ArrowArray;
// Use malloc here, not new, to match ArrowAdapter::release_array
auto arrow_array = (ArrowArray*)malloc(sizeof(ArrowArray));

size_t n = v.size();

Expand All @@ -311,8 +319,10 @@ class ArrowAdapter {
// * Slot 1 is data, void* but will be derefrenced as T*
// * There is no offset information
arrow_array->n_buffers = 2;
arrow_array->buffers = new const void*[2];
arrow_array->n_children = 0; // leaf/child node
arrow_array->buffers = (const void**)malloc(2 * sizeof(void*));
arrow_array->children = nullptr; // leaf/child node
arrow_array->dictionary = nullptr;

// The nominal use of these methods as of this writing is for
// low-volume data such as schema information -- less than a
Expand All @@ -322,6 +332,7 @@ class ArrowAdapter {
// look at zero-copy for buffers, with variable approaches
// to memory management.
arrow_array->release = &ArrowAdapter::release_array;
arrow_array->private_data = nullptr;

arrow_array->buffers[0] = nullptr;
// Use malloc here, not new, to match ArrowAdapter::release_array
Expand All @@ -331,6 +342,8 @@ class ArrowAdapter {
}
arrow_array->buffers[1] = (void*)dest;

log_make_arrow_array_child(arrow_array);

return arrow_array;
}

Expand All @@ -345,8 +358,8 @@ class ArrowAdapter {
// We choose the latter.
static ArrowArray* make_arrow_array_child_string(
const std::vector<std::string>& v) {
// Use new here, not malloc, to match ArrowAdapter::release_array
auto arrow_array = new ArrowArray;
// Use malloc here, not new, to match ArrowAdapter::release_array
auto arrow_array = (ArrowArray*)malloc(sizeof(ArrowArray));

size_t n = v.size();

Expand All @@ -360,10 +373,13 @@ class ArrowAdapter {
// or uint64_t* for Arrow large_string
// * Slot 2 is data, void* but will be derefrenced as T*
arrow_array->n_buffers = 3;
arrow_array->buffers = new const void*[3];
arrow_array->n_children = 0; // leaf/child node
arrow_array->buffers = (const void**)malloc(3 * sizeof(void*));
arrow_array->n_children = 0; // leaf/child node
arrow_array->children = nullptr; // leaf/child node
arrow_array->dictionary = nullptr;

arrow_array->release = &ArrowAdapter::release_array;
arrow_array->private_data = nullptr;

size_t nbytes = 0;
for (auto e : v) {
Expand Down
Loading