diff --git a/Makefile b/Makefile index 7daa86c2..b201c792 100644 --- a/Makefile +++ b/Makefile @@ -476,6 +476,10 @@ $(BINDIR)/$(UNITDIR)/splinter_shmem_test: $(UTIL_SYS) \ $(COMMON_UNIT_TESTOBJ) \ $(LIBDIR)/libsplinterdb.so +$(BINDIR)/$(UNITDIR)/splinter_shmem_oom_test: $(UTIL_SYS) \ + $(COMMON_UNIT_TESTOBJ) \ + $(LIBDIR)/libsplinterdb.so + $(BINDIR)/$(UNITDIR)/splinter_ipc_test: $(UTIL_SYS) \ $(COMMON_UNIT_TESTOBJ) @@ -495,8 +499,6 @@ $(BINDIR)/$(UNITDIR)/splinterdb_heap_id_mgmt_test: $(COMMON_TESTOBJ) \ $(OBJDIR)/$(FUNCTIONAL_TESTSDIR)/test_async.o \ $(LIBDIR)/libsplinterdb.so - - ######################################## # Convenience mini unit-test targets unit/util_test: $(BINDIR)/$(UNITDIR)/util_test diff --git a/include/splinterdb/splinterdb.h b/include/splinterdb/splinterdb.h index 58b85ad2..6c7c76b8 100644 --- a/include/splinterdb/splinterdb.h +++ b/include/splinterdb/splinterdb.h @@ -175,7 +175,7 @@ splinterdb_open(const splinterdb_config *cfg, splinterdb **kvs); // Close a splinterdb // // This will flush all data to disk and release all resources -void +int splinterdb_close(splinterdb **kvs); // Register the current thread so that it can be used with splinterdb. diff --git a/src/PackedArray.c b/src/PackedArray.c index 2621fb43..939f1aa3 100644 --- a/src/PackedArray.c +++ b/src/PackedArray.c @@ -380,7 +380,6 @@ void PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(con #include "poison.h" #define PACKEDARRAY_MALLOC(size) platform_malloc(size) -#define PACKEDARRAY_FREE(p) platform_free(p) void PackedArray_pack(uint32* a, const uint32 offset, const uint32* in, uint32 count, size_t bitsPerItem) { diff --git a/src/btree.c b/src/btree.c index 5ab02c4b..d88b5abe 100644 --- a/src/btree.c +++ b/src/btree.c @@ -3103,6 +3103,12 @@ btree_pack_link_extent(btree_pack_req *req, req->num_edges[height] = 0; } +static inline bool +btree_pack_can_fit_tuple(btree_pack_req *req) +{ + return req->num_tuples < req->max_tuples; +} + static inline btree_node * btree_pack_create_next_node(btree_pack_req *req, uint64 height, key pivot) { @@ -3167,8 +3173,8 @@ btree_pack_loop(btree_pack_req *req, // IN/OUT log_trace_key(tuple_key, "btree_pack_loop (bottom)"); if (req->hash) { - platform_assert(req->num_tuples < req->max_tuples); - req->fingerprint_arr[req->num_tuples] = + platform_assert(btree_pack_can_fit_tuple(req)); + fingerprint_start(&req->fingerprint)[req->num_tuples] = req->hash(key_data(tuple_key), key_length(tuple_key), req->seed); } @@ -3216,12 +3222,6 @@ btree_pack_post_loop(btree_pack_req *req, key last_key) mini_release(&req->mini, last_key); } -static bool32 -btree_pack_can_fit_tuple(btree_pack_req *req, key tuple_key, message data) -{ - return req->num_tuples < req->max_tuples; -} - static void btree_pack_abort(btree_pack_req *req) { @@ -3259,7 +3259,7 @@ btree_pack(btree_pack_req *req) while (iterator_can_next(req->itor)) { iterator_curr(req->itor, &tuple_key, &data); - if (!btree_pack_can_fit_tuple(req, tuple_key, data)) { + if (!btree_pack_can_fit_tuple(req)) { platform_error_log("%s(): req->num_tuples=%lu exceeded output size " "limit, req->max_tuples=%lu\n", __func__, diff --git a/src/btree.h b/src/btree.h index 07acec18..89957a68 100644 --- a/src/btree.h +++ b/src/btree.h @@ -151,9 +151,9 @@ typedef struct btree_pack_req { btree_config *cfg; iterator *itor; // the itor which is being packed uint64 max_tuples; - hash_fn hash; // hash function used for calculating filter_hash - unsigned int seed; // seed used for calculating filter_hash - uint32 *fingerprint_arr; // IN/OUT: hashes of the keys in the tree + hash_fn hash; // hash function used for calculating filter_hash + unsigned int seed; // seed used for calculating filter_hash + fp_hdr fingerprint; // IN/OUT: hashes of the keys in the tree // internal data uint16 height; @@ -168,6 +168,7 @@ typedef struct btree_pack_req { uint64 num_tuples; // no. of tuples in the output tree uint64 key_bytes; // total size of keys in tuples of the output tree uint64 message_bytes; // total size of msgs in tuples of the output tree + uint64 line; // Caller's line # } btree_pack_req; struct btree_async_ctxt; @@ -325,6 +326,10 @@ btree_iterator_init(cache *cc, void btree_iterator_deinit(btree_iterator *itor); +/* + * Initialize BTree Pack request structure. May allocate memory for fingerprint + * array. + */ static inline platform_status btree_pack_req_init(btree_pack_req *req, cache *cc, @@ -343,26 +348,27 @@ btree_pack_req_init(btree_pack_req *req, req->hash = hash; req->seed = seed; if (hash != NULL && max_tuples > 0) { - req->fingerprint_arr = - TYPED_ARRAY_ZALLOC(hid, req->fingerprint_arr, max_tuples); + + fingerprint_init(&req->fingerprint, hid, max_tuples); // Allocates memory // When we run with shared-memory configured, we expect that it is sized // big-enough to not get OOMs from here. Hence, only a debug_assert(). - debug_assert(req->fingerprint_arr, + debug_assert(!fingerprint_is_empty(&req->fingerprint), "Unable to allocate memory for %lu tuples", max_tuples); - if (!req->fingerprint_arr) { + if (fingerprint_is_empty(&req->fingerprint)) { return STATUS_NO_MEMORY; } } return STATUS_OK; } +// Free memory if any was allocated for fingerprint array. static inline void btree_pack_req_deinit(btree_pack_req *req, platform_heap_id hid) { - if (req->fingerprint_arr) { - platform_free(hid, req->fingerprint_arr); + if (!fingerprint_is_empty(&req->fingerprint)) { + fingerprint_deinit(hid, &req->fingerprint); } } diff --git a/src/clockcache.c b/src/clockcache.c index b4de061e..6e48840f 100644 --- a/src/clockcache.c +++ b/src/clockcache.c @@ -1818,20 +1818,25 @@ clockcache_init(clockcache *cc, // OUT cc->heap_id = hid; /* lookup maps addrs to entries, entry contains the entries themselves */ - cc->lookup = - TYPED_ARRAY_MALLOC(cc->heap_id, cc->lookup, allocator_page_capacity); + platform_memfrag memfrag_cc_lookup; + cc->lookup = TYPED_ARRAY_MALLOC_MF( + &memfrag_cc_lookup, cc->heap_id, cc->lookup, allocator_page_capacity); if (!cc->lookup) { goto alloc_error; } + cc->lookup_size = memfrag_size(&memfrag_cc_lookup); + for (i = 0; i < allocator_page_capacity; i++) { cc->lookup[i] = CC_UNMAPPED_ENTRY; } - cc->entry = - TYPED_ARRAY_ZALLOC(cc->heap_id, cc->entry, cc->cfg->page_capacity); + platform_memfrag memfrag_cc_entry; + cc->entry = TYPED_ARRAY_ZALLOC_MF( + &memfrag_cc_entry, cc->heap_id, cc->entry, cc->cfg->page_capacity); if (!cc->entry) { goto alloc_error; } + cc->entry_size = memfrag_size(&memfrag_cc_entry); platform_status rc = STATUS_NO_MEMORY; @@ -1860,11 +1865,13 @@ clockcache_init(clockcache *cc, // OUT cc->refcount = platform_buffer_getaddr(&cc->rc_bh); /* Separate ref counts for pins */ - cc->pincount = - TYPED_ARRAY_ZALLOC(cc->heap_id, cc->pincount, cc->cfg->page_capacity); + platform_memfrag memfrag_cc_pincount; + cc->pincount = TYPED_ARRAY_ZALLOC_MF( + &memfrag_cc_pincount, cc->heap_id, cc->pincount, cc->cfg->page_capacity); if (!cc->pincount) { goto alloc_error; } + cc->pincount_size = memfrag_size(&memfrag_cc_pincount); /* The hands and associated page */ cc->free_hand = 0; @@ -1873,13 +1880,16 @@ clockcache_init(clockcache *cc, // OUT cc->per_thread[thr_i].free_hand = CC_UNMAPPED_ENTRY; cc->per_thread[thr_i].enable_sync_get = TRUE; } + platform_memfrag memfrag_cc_batch_busy; cc->batch_busy = - TYPED_ARRAY_ZALLOC(cc->heap_id, - cc->batch_busy, - cc->cfg->page_capacity / CC_ENTRIES_PER_BATCH); + TYPED_ARRAY_ZALLOC_MF(&memfrag_cc_batch_busy, + cc->heap_id, + cc->batch_busy, + (cc->cfg->page_capacity / CC_ENTRIES_PER_BATCH)); if (!cc->batch_busy) { goto alloc_error; } + cc->batch_busy_size = memfrag_size(&memfrag_cc_batch_busy); return STATUS_OK; @@ -1907,10 +1917,12 @@ clockcache_deinit(clockcache *cc) // IN/OUT } if (cc->lookup) { - platform_free(cc->heap_id, cc->lookup); + platform_free_mem(cc->heap_id, cc->lookup, cc->lookup_size); + cc->lookup = NULL; } if (cc->entry) { - platform_free(cc->heap_id, cc->entry); + platform_free_mem(cc->heap_id, cc->entry, cc->entry_size); + cc->entry = NULL; } debug_only platform_status rc = STATUS_TEST_FAILED; @@ -1929,11 +1941,15 @@ clockcache_deinit(clockcache *cc) // IN/OUT cc->refcount = NULL; } + platform_memfrag mf = {0}; if (cc->pincount) { - platform_free_volatile(cc->heap_id, cc->pincount); + memfrag_init(&mf, cc->heap_id, (void *)cc->pincount, cc->pincount_size); + platform_free_volatile(cc->heap_id, &mf); } if (cc->batch_busy) { - platform_free_volatile(cc->heap_id, cc->batch_busy); + memfrag_init( + &mf, cc->heap_id, (void *)cc->batch_busy, cc->batch_busy_size); + platform_free_volatile(cc->heap_id, &mf); } } diff --git a/src/clockcache.h b/src/clockcache.h index 647abc33..fd414bb1 100644 --- a/src/clockcache.h +++ b/src/clockcache.h @@ -139,15 +139,17 @@ struct clockcache { // Stats cache_stats stats[MAX_THREADS]; + size_t lookup_size; + size_t entry_size; + size_t pincount_size; + size_t batch_busy_size; }; - /* *----------------------------------------------------------------------------- * Function declarations *----------------------------------------------------------------------------- */ - void clockcache_config_init(clockcache_config *cache_config, io_config *io_cfg, diff --git a/src/memtable.c b/src/memtable.c index 92a66b99..0fbeaeb9 100644 --- a/src/memtable.c +++ b/src/memtable.c @@ -307,15 +307,19 @@ memtable_context_create(platform_heap_id hid, process_fn process, void *process_ctxt) { + platform_memfrag memfrag_ctxt = {0}; memtable_context *ctxt = TYPED_FLEXIBLE_STRUCT_ZALLOC(hid, ctxt, mt, cfg->max_memtables); - ctxt->cc = cc; + ctxt->mf_size = memfrag_size(&memfrag_ctxt); + ctxt->cc = cc; memmove(&ctxt->cfg, cfg, sizeof(ctxt->cfg)); platform_mutex_init( &ctxt->incorporation_mutex, platform_get_module_id(), hid); - ctxt->rwlock = TYPED_MALLOC(hid, ctxt->rwlock); + platform_memfrag memfrag_rwlock = {0}; + ctxt->rwlock = TYPED_MALLOC_MF(&memfrag_rwlock, hid, ctxt->rwlock); platform_batch_rwlock_init(ctxt->rwlock); + ctxt->rwlock_mf_size = memfrag_size(&memfrag_rwlock); for (uint64 mt_no = 0; mt_no < cfg->max_memtables; mt_no++) { uint64 generation = mt_no; @@ -343,9 +347,8 @@ memtable_context_destroy(platform_heap_id hid, memtable_context *ctxt) } platform_mutex_destroy(&ctxt->incorporation_mutex); - platform_free(hid, ctxt->rwlock); - - platform_free(hid, ctxt); + platform_free_mem(hid, ctxt->rwlock, ctxt->rwlock_mf_size); + platform_free_mem(hid, ctxt, ctxt->mf_size); } void diff --git a/src/memtable.h b/src/memtable.h index 25586864..329a9843 100644 --- a/src/memtable.h +++ b/src/memtable.h @@ -131,7 +131,9 @@ typedef struct memtable_context { // read lock to read and write lock to modify. volatile uint64 generation_retired; - bool32 is_empty; + bool is_empty; + size_t mf_size; // # of bytes of memory allocated to this struct + size_t rwlock_mf_size; // # of bytes of memory allocated to rwlock // Effectively thread local, no locking at all: btree_scratch scratch[MAX_THREADS]; diff --git a/src/merge.c b/src/merge.c index 0b6ddded..5f35b3ef 100644 --- a/src/merge.c +++ b/src/merge.c @@ -545,6 +545,7 @@ merge_iterator_create(platform_heap_id hid, == ARRAY_SIZE(merge_itor->ordered_iterators), "size mismatch"); + platform_memfrag memfrag_merge_itor; merge_itor = TYPED_ZALLOC(PROCESS_PRIVATE_HEAP_ID, merge_itor); if (merge_itor == NULL) { return STATUS_NO_MEMORY; @@ -598,7 +599,7 @@ platform_status merge_iterator_destroy(platform_heap_id hid, merge_iterator **merge_itor) { merge_accumulator_deinit(&(*merge_itor)->merge_buffer); - platform_free(PROCESS_PRIVATE_HEAP_ID, *merge_itor); + platform_free_heap(PROCESS_PRIVATE_HEAP_ID, *merge_itor); *merge_itor = NULL; return STATUS_OK; diff --git a/src/pcq.h b/src/pcq.h index a4e3ac9e..19918674 100644 --- a/src/pcq.h +++ b/src/pcq.h @@ -14,8 +14,9 @@ typedef struct { uint32 num_elems; - cache_aligned_uint32 tail; // Producers enqueue to here - cache_aligned_uint32 head; // Consumer dequeues from here + cache_aligned_uint32 tail; // Producers enqueue to here + cache_aligned_uint32 head; // Consumer dequeues from here + size_t mf_size; // of memory fragment allocated for this struct void *elems[]; } pcq; @@ -28,9 +29,11 @@ pcq_alloc(platform_heap_id hid, size_t num_elems) { pcq *q; + platform_memfrag memfrag_q; q = TYPED_FLEXIBLE_STRUCT_ZALLOC(hid, q, elems, num_elems); if (q != NULL) { q->num_elems = num_elems; + q->mf_size = memfrag_size(&memfrag_q); } return q; @@ -61,11 +64,12 @@ pcq_is_full(const pcq *q) return pcq_count(q) == q->num_elems; } -// Deallocate a PCQ +// Deallocate a PCQ, and NULL out input handle static inline void -pcq_free(platform_heap_id hid, pcq *q) +pcq_free(platform_heap_id hid, pcq **q) { - platform_free(hid, q); + platform_free_mem(hid, *q, (*q)->mf_size); + *q = NULL; } // Enqueue an elem to a PCQ. Element must not be NULL diff --git a/src/platform_linux/laio.c b/src/platform_linux/laio.c index 1b5346e7..02b354d4 100644 --- a/src/platform_linux/laio.c +++ b/src/platform_linux/laio.c @@ -227,16 +227,21 @@ io_handle_init(laio_handle *io, io_config *cfg, platform_heap_id hid) * structures. Each request struct nests within it async_max_pages * pages on which IO can be outstanding. */ + platform_memfrag memfrag_io_req; req_size = sizeof(io_async_req) + cfg->async_max_pages * sizeof(struct iovec); total_req_size = req_size * cfg->async_queue_size; - io->req = TYPED_MANUAL_ZALLOC(io->heap_id, io->req, total_req_size); + + io->req = TYPED_MANUAL_ZALLOC( + &memfrag_io_req, io->heap_id, io->req, total_req_size); platform_assert((io->req != NULL), "Failed to allocate memory for array of %lu Async IO" " request structures, for %ld outstanding IOs on pages.", cfg->async_queue_size, cfg->async_max_pages); + io->req_mf_size = memfrag_size(&memfrag_io_req); + // Initialize each Async IO request structure for (int i = 0; i < cfg->async_queue_size; i++) { req = laio_get_kth_req(io, i); @@ -275,7 +280,9 @@ io_handle_deinit(laio_handle *io) } platform_assert(status == 0); - platform_free(io->heap_id, io->req); + platform_free_mem(io->heap_id, io->req, io->req_mf_size); + io->req = NULL; + io->req_mf_size = 0; } /* diff --git a/src/platform_linux/laio.h b/src/platform_linux/laio.h index 5350fddd..92b69dd0 100644 --- a/src/platform_linux/laio.h +++ b/src/platform_linux/laio.h @@ -56,7 +56,8 @@ typedef struct laio_handle { uint64 req_hand_base; uint64 req_hand[MAX_THREADS]; platform_heap_id heap_id; - int fd; // File descriptor to Splinter device/file. + int fd; // File descriptor to Splinter device/file. + size_t req_mf_size; // of memory fragment allocated for req array } laio_handle; platform_status diff --git a/src/platform_linux/platform.c b/src/platform_linux/platform.c index b180495b..b115ffd4 100644 --- a/src/platform_linux/platform.c +++ b/src/platform_linux/platform.c @@ -80,13 +80,14 @@ platform_heap_create(platform_module_id UNUSED_PARAM(module_id), return STATUS_OK; } -void +platform_status platform_heap_destroy(platform_heap_id *heap_id) { // If shared segment was allocated, it's being tracked thru heap ID. if (*heap_id) { return platform_shmdestroy(heap_id); } + return STATUS_OK; } /* @@ -533,12 +534,14 @@ platform_histo_create(platform_heap_id heap_id, const int64 *const bucket_limits, platform_histo_handle *histo) { + platform_memfrag memfrag_hh; platform_histo_handle hh; - hh = TYPED_MANUAL_MALLOC( + hh = TYPED_ARRAY_MALLOC( heap_id, hh, sizeof(hh) + num_buckets * sizeof(hh->count[0])); if (!hh) { return STATUS_NO_MEMORY; } + hh->mf_size = memfrag_size(&memfrag_hh); hh->num_buckets = num_buckets; hh->bucket_limits = bucket_limits; hh->total = 0; @@ -557,7 +560,7 @@ platform_histo_destroy(platform_heap_id heap_id, { platform_assert(histo_out); platform_histo_handle histo = *histo_out; - platform_free(heap_id, histo); + platform_free_mem(heap_id, histo, histo->mf_size); *histo_out = NULL; } diff --git a/src/platform_linux/platform.h b/src/platform_linux/platform.h index 1eb62303..47543904 100644 --- a/src/platform_linux/platform.h +++ b/src/platform_linux/platform.h @@ -140,7 +140,6 @@ typedef void (*platform_thread_worker)(void *); typedef int (*platform_sort_cmpfn)(const void *a, const void *b, void *arg); - /* * Helper macro that takes a pointer, type of the container, and the * name of the member the pointer refers to. The macro expands to a @@ -185,7 +184,6 @@ typedef struct { extern bool32 platform_use_hugetlb; extern bool32 platform_use_mlock; - /* * Section 3: * Shared types/typedefs that rely on platform-specific types/typedefs @@ -195,7 +193,6 @@ extern bool32 platform_use_mlock; extern platform_log_handle *Platform_default_log_handle; extern platform_log_handle *Platform_error_log_handle; - /* * Section 4: * Shared function declarations. @@ -320,15 +317,17 @@ extern platform_heap_id Heap_id; * calling aligned_alloc manually (or create a separate macro) * * Parameters: + * mf - platform_memfrag *, to return memory allocation information. * hid - Platform heap-ID to allocate memory from. * v - Structure to allocate memory for. * n - Number of bytes of memory to allocate. * ----------------------------------------------------------------------------- */ -#define TYPED_MANUAL_MALLOC(hid, v, n) \ +#define TYPED_MANUAL_MALLOC(mf, hid, v, n) \ ({ \ debug_assert((n) >= sizeof(*(v))); \ - (typeof(v))platform_aligned_malloc(hid, \ + (typeof(v))platform_aligned_malloc((mf), \ + hid, \ PLATFORM_CACHELINE_SIZE, \ (n), \ STRINGIFY(v), \ @@ -336,10 +335,12 @@ extern platform_heap_id Heap_id; __FILE__, \ __LINE__); \ }) -#define TYPED_MANUAL_ZALLOC(hid, v, n) \ + +#define TYPED_MANUAL_ZALLOC(mf, hid, v, n) \ ({ \ debug_assert((n) >= sizeof(*(v))); \ - (typeof(v))platform_aligned_zalloc(hid, \ + (typeof(v))platform_aligned_zalloc((mf), \ + hid, \ PLATFORM_CACHELINE_SIZE, \ (n), \ STRINGIFY(v), \ @@ -356,24 +357,32 @@ extern platform_heap_id Heap_id; * the difference that the alignment is caller-specified. * * Parameters: + * mf - platform_memfrag *, to return memory allocation information. * hid - Platform heap-ID to allocate memory from. * a - Alignment needed for allocated memory. * v - Structure to allocate memory for. * n - Number of bytes of memory to allocate. */ -#define TYPED_ALIGNED_MALLOC(hid, a, v, n) \ +#define TYPED_ALIGNED_MALLOC_MF(mf, hid, a, v, n) \ ({ \ debug_assert((n) >= sizeof(*(v))); \ (typeof(v))platform_aligned_malloc( \ - hid, (a), (n), STRINGIFY(v), __func__, __FILE__, __LINE__); \ + (mf), hid, (a), (n), STRINGIFY(v), __func__, __FILE__, __LINE__); \ }) -#define TYPED_ALIGNED_ZALLOC(hid, a, v, n) \ + +#define TYPED_ALIGNED_MALLOC(hid, a, v, n) \ + TYPED_ALIGNED_MALLOC_MF(&memfrag_##v, hid, a, v, n) + +#define TYPED_ALIGNED_ZALLOC_MF(mf, hid, a, v, n) \ ({ \ debug_assert((n) >= sizeof(*(v))); \ (typeof(v))platform_aligned_zalloc( \ - hid, (a), (n), STRINGIFY(v), __func__, __FILE__, __LINE__); \ + (mf), hid, (a), (n), STRINGIFY(v), __func__, __FILE__, __LINE__); \ }) +#define TYPED_ALIGNED_ZALLOC(hid, a, v, n) \ + TYPED_ALIGNED_ZALLOC_MF(&memfrag_##v, hid, a, v, n) + /* * FLEXIBLE_STRUCT_SIZE(): Compute the size of a structure 'v' with a nested * flexible array member, array_field_name, with 'n' members. @@ -416,12 +425,16 @@ extern platform_heap_id Heap_id; * ----------------------------------------------------------------------------- */ #define TYPED_FLEXIBLE_STRUCT_MALLOC(hid, v, array_field_name, n) \ - TYPED_MANUAL_MALLOC( \ - hid, (v), FLEXIBLE_STRUCT_SIZE((v), array_field_name, (n))) + TYPED_MANUAL_MALLOC(&memfrag_##v, \ + hid, \ + (v), \ + FLEXIBLE_STRUCT_SIZE((v), array_field_name, (n))) #define TYPED_FLEXIBLE_STRUCT_ZALLOC(hid, v, array_field_name, n) \ - TYPED_MANUAL_ZALLOC( \ - hid, (v), FLEXIBLE_STRUCT_SIZE((v), array_field_name, (n))) + TYPED_MANUAL_ZALLOC(&memfrag_##v, \ + hid, \ + (v), \ + FLEXIBLE_STRUCT_SIZE((v), array_field_name, (n))) /* * TYPED_ARRAY_MALLOC(), TYPED_ARRAY_ZALLOC() @@ -431,22 +444,47 @@ extern platform_heap_id Heap_id; * hid - Platform heap-ID to allocate memory from. * v - Structure to allocate memory for. * n - Number of members of type 'v' in array. + * + * Caller is expected to declare an on-stack platform_memfrag{} struct + * named memfrag_. This is used as output struct to return memory frag info. */ #define TYPED_ARRAY_MALLOC(hid, v, n) \ - TYPED_MANUAL_MALLOC(hid, (v), (n) * sizeof(*(v))) + TYPED_MANUAL_MALLOC(&memfrag_##v, hid, (v), (n) * sizeof(*(v))) + #define TYPED_ARRAY_ZALLOC(hid, v, n) \ - TYPED_MANUAL_ZALLOC(hid, (v), (n) * sizeof(*(v))) + TYPED_MANUAL_ZALLOC(&memfrag_##v, hid, (v), (n) * sizeof(*(v))) + +#define TYPED_ARRAY_MALLOC_MF(mf, hid, v, n) \ + TYPED_MANUAL_MALLOC(mf, hid, (v), (n) * sizeof(*(v))) + +#define TYPED_ARRAY_ZALLOC_MF(mf, hid, v, n) \ + TYPED_MANUAL_ZALLOC(mf, hid, (v), (n) * sizeof(*(v))) /* - * TYPED_ARRAY_MALLOC(), TYPED_ARRAY_ZALLOC() + * TYPED_MALLOC(), TYPED_ZALLOC() * Allocate memory for one element of structure 'v'. * * Parameters: * hid - Platform heap-ID to allocate memory from. * v - Structure to allocate memory for. */ -#define TYPED_MALLOC(hid, v) TYPED_ARRAY_MALLOC(hid, v, 1) -#define TYPED_ZALLOC(hid, v) TYPED_ARRAY_ZALLOC(hid, v, 1) +#define TYPED_MALLOC(hid, v) TYPED_ARRAY_MALLOC_MF(&memfrag_##v, hid, v, 1) + +#define TYPED_ZALLOC(hid, v) TYPED_ARRAY_ZALLOC_MF(&memfrag_##v, hid, v, 1) + +/* + * TYPED_MALLOC_MF(), TYPED_ZALLOC_MF(): + * Allocate a single-element of structure of type 'v', using a named memory + * frag. + + * Parameters: + * mf - Addr of memfrag to return memory allocation information. + * hid - Platform heap-ID to allocate memory from. + * v - Structure to allocate memory for. + */ +#define TYPED_MALLOC_MF(mf, hid, v) TYPED_ARRAY_MALLOC_MF(mf, hid, v, 1) + +#define TYPED_ZALLOC_MF(mf, hid, v) TYPED_ARRAY_ZALLOC_MF(mf, hid, v, 1) /* * ----------------------------------------------------------------------------- @@ -668,12 +706,15 @@ platform_heap_create(platform_module_id module_id, bool use_shmem, platform_heap_id *heap_id); -void +platform_status platform_heap_destroy(platform_heap_id *heap_id); void platform_shm_set_splinterdb_handle(platform_heap_id heap_id, void *addr); +void * +platform_heap_get_splinterdb_handle(const platform_heap_id heap_id); + shmem_heap * platform_heap_id_to_shmaddr(platform_heap_id hid); @@ -726,6 +767,19 @@ platform_strtok_r(char *str, const char *delim, platform_strtok_ctx *ctx); * Non-inline implementations belong in a .c file in the platform_* directory. * Declarations for the non-inline functions can go in platform_inline.h */ +/* + * Structure to encapsulate a {memory-addr, memory-size} pair. Used to track + * allocation and, more importantly, free of memory fragments for opaque + * "objects". Used typically to manage memory for arrays of things. + * The 'addr' field is intentionally -not- the 1st field, to reduce lazy + * programming which might try to bypass provided interfaces. + */ +typedef struct platform_memfrag { + platform_heap_id hid; + size_t size; + void *addr; +} platform_memfrag; + #include @@ -734,51 +788,161 @@ platform_strtok_r(char *str, const char *delim, platform_strtok_ctx *ctx); * Non-platform-specific inline implementations */ +/* + * Utility macro to test if an argument to platform_free() is a + * platform_memfrag *. + */ +#define IS_MEM_FRAG(x) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof((platform_memfrag *)0), typeof(x)), \ + 1, \ + 0) + +/* Helper methods to do some common operations */ +#define memfrag_start(mf) ((mf)->addr) +#define memfrag_size(mf) ((mf)->size) + +// platform_memfrag initializer. +static inline void +memfrag_init(platform_memfrag *mf, + platform_heap_id hid, + void *ptr, + size_t nbytes) +{ + mf->hid = hid; + mf->addr = ptr; + mf->size = nbytes; +} + +static inline bool +memfrag_is_empty(const platform_memfrag *mf) +{ + return ((mf->addr == NULL) && (mf->size == 0)); +} + +static inline void +memfrag_set_empty(platform_memfrag *mf) +{ + debug_assert(!memfrag_is_empty(mf)); + ZERO_STRUCT(*mf); +} + +/* Move the memory fragment ownership from src to dst memory fragment */ +static inline void +memfrag_move(platform_memfrag *dst, platform_memfrag *src) +{ + platform_assert(memfrag_is_empty(dst)); + platform_assert(!memfrag_is_empty(src)); + + dst->hid = src->hid; + dst->addr = src->addr; + dst->size = src->size; + ZERO_STRUCT(*src); +} /* - * Similar to the TYPED_MALLOC functions, for all the free functions we need to - * call platform_get_heap_id() from a macro instead of an inline function - * (which may or may not end up inlined) - * Wrap free and free_volatile: + * ---------------------------------------------------------------------------- + * void = platform_free(platform_memfrag *mf) + * + * Similar to the TYPED_MALLOC functions, for all the free functions we need + * to call platform_get_heap_id() from a macro instead of an inline function + * (which may or may not end up inlined). Wrap free and free_volatile. + * ---------------------------------------------------------------------------- */ -#define platform_free(id, p) \ +#define platform_free(mf) \ do { \ - platform_free_from_heap( \ - id, (p), STRINGIFY(p), __func__, __FILE__, __LINE__); \ - (p) = NULL; \ + debug_assert(((mf) != NULL), \ + "Attempt to free a NULL ptr from '%s', line=%d", \ + __func__, \ + __LINE__); \ + platform_do_free((mf)->hid, (mf)->addr, (mf)->size, STRINGIFY(mf)); \ + ZERO_STRUCT(*mf); \ } while (0) +/* + * ---------------------------------------------------------------------------- + * void = platform_free_mem(platform_heap_id hid, void *p, size_t size);, + * void = platform_do_free(platform_heap_id hid, void *p, size_t size, + * const char *objname); + * + * Free a memory fragment at address 'p' of size 'size' bytes. + * + * These exist to facilitate re-cycling of free'd fragments in a shared-memory + * usage. That machinery works off of the fragment's 'size', hence we need to + * provide 'size' through this interface. + * ---------------------------------------------------------------------------- + */ +// clang-format off +#define platform_free_mem(hid, p, size) \ + platform_do_free((hid), (p), (size), STRINGIFY(p)) -#define platform_free_volatile(id, p) \ +#define platform_do_free(hid, p, size, objname) \ + platform_free_from_heap((hid), (p), (size), \ + (objname), __func__, __FILE__, __LINE__) + +// clang-format on + +/* + * ---------------------------------------------------------------------------- + * void = platform_free_volatile(platform_heap_id hid, + * platform_memfrag *p) + * + * Similar to platform_free(), except it exists to free volatile ptr to + * allocated memory. The interface expects that the (single-) caller has + * packaged the memory fragment to-be-freed in a platform_memfrag *p * arg. + * There is just one consumer of this interface, so we don't go to the full + * distance as its sibling interface, to do error checking of args etc. + * ---------------------------------------------------------------------------- + */ +#define platform_free_volatile(hid, p) \ do { \ - platform_free_volatile_from_heap( \ - id, (p), STRINGIFY(p), __func__, __FILE__, __LINE__); \ - (p) = NULL; \ + debug_assert(((p) != NULL), \ + "Attempt to free a NULL ptr from '%s', line=%d", \ + __func__, \ + __LINE__); \ + platform_assert(IS_MEM_FRAG(p), \ + "Attempt to free volatile memory ptr with an invalid" \ + " arg, from '%s', line=%d", \ + __func__, \ + __LINE__); \ + platform_memfrag *_mf = (platform_memfrag *)(p); \ + platform_free_volatile_from_heap(hid, \ + _mf->addr, \ + _mf->size, \ + STRINGIFY(p), \ + __func__, \ + __FILE__, \ + __LINE__); \ + _mf->addr = NULL; \ + _mf->size = 0; \ } while (0) // Convenience function to free something volatile static inline void platform_free_volatile_from_heap(platform_heap_id heap_id, volatile void *ptr, + const size_t size, const char *objname, const char *func, const char *file, int lineno) { // Ok to discard volatile qualifier for free - platform_free_from_heap(heap_id, (void *)ptr, objname, func, file, lineno); + platform_free_from_heap( + heap_id, (void *)ptr, size, objname, func, file, lineno); } static inline void * -platform_aligned_zalloc(platform_heap_id heap_id, - size_t alignment, - size_t size, - const char *objname, - const char *func, - const char *file, - int lineno) +platform_aligned_zalloc(platform_memfrag *memfrag, // IN/OUT + platform_heap_id heap_id, + size_t alignment, + size_t size, + const char *objname, + const char *func, + const char *file, + int lineno) { void *x = platform_aligned_malloc( - heap_id, alignment, size, objname, func, file, lineno); + memfrag, heap_id, alignment, size, objname, func, file, lineno); if (LIKELY(x)) { memset(x, 0, size); } @@ -791,6 +955,14 @@ max_size_t(size_t a, size_t b) return a > b ? a : b; } +// Return absolute diff between two unsigned long +// values. +static inline size_t +diff_size_t(size_t a, size_t b) +{ + return ((a > b) ? (a - b) : (b - a)); +} + static inline bool32 SUCCESS(const platform_status s) { diff --git a/src/platform_linux/platform_inline.h b/src/platform_linux/platform_inline.h index 7eed6b34..2ef1c850 100644 --- a/src/platform_linux/platform_inline.h +++ b/src/platform_linux/platform_inline.h @@ -40,10 +40,11 @@ platform_checksum_is_equal(checksum128 left, checksum128 right) static void platform_free_from_heap(platform_heap_id UNUSED_PARAM(heap_id), void *ptr, + const size_t size, const char *objname, const char *func, const char *file, - int lineno); + int line); static inline timestamp platform_get_timestamp(void) @@ -282,7 +283,7 @@ platform_close_log_stream(platform_stream_handle *stream, fputs(stream->str, log_handle); fflush(log_handle); platform_free_from_heap( - NULL, stream->str, "stream", __func__, __FILE__, __LINE__); + NULL, stream->str, 0, "stream", __func__, __FILE__, __LINE__); } static inline platform_log_handle * @@ -438,15 +439,20 @@ platform_align_bytes_reqd(const size_t alignment, const size_t size) * this supports alignments up to a cache-line. * If Splinter is configured to run with shared memory, we will invoke the * shmem-allocation function, working off of the (non-NULL) platform_heap_id. + * + * Returns ptr to allocated memory. If 'memfrag' is supplied, return the + * allocated memory fragment's info (addr & size). This is needed to support + * 'free' when using shared memory based allocation. */ static inline void * -platform_aligned_malloc(const platform_heap_id heap_id, +platform_aligned_malloc(platform_memfrag *memfrag, // IN/OUT + const platform_heap_id heap_id, const size_t alignment, // IN const size_t size, // IN const char *objname, const char *func, const char *file, - const int lineno) + const int line) { // Requirement for aligned_alloc platform_assert(IS_POWER_OF_2(alignment)); @@ -463,35 +469,60 @@ platform_aligned_malloc(const platform_heap_id heap_id, const size_t padding = platform_align_bytes_reqd(alignment, size); const size_t required = (size + padding); - void *retptr = - (heap_id - ? platform_shm_alloc(heap_id, required, objname, func, file, lineno) - : aligned_alloc(alignment, required)); + void *retptr = NULL; + if (heap_id == PROCESS_PRIVATE_HEAP_ID) { + retptr = aligned_alloc(alignment, required); + if (memfrag) { + memfrag->hid = heap_id; + memfrag->addr = retptr; + memfrag->size = required; + } + } else { + retptr = platform_shm_alloc( + memfrag, heap_id, required, objname, func, file, line); + } return retptr; } /* * platform_realloc() - Reallocate 'newsize' bytes and copy over old contents. * + * Caller-macro to invoke lower-level reallocation method. + */ +#define platform_realloc(mf, newsize) \ + platform_do_realloc((mf), (newsize), __func__, __FILE__, __LINE__) + +/* + * platform_do_realloc() - Reallocate 'newsize' bytes and copy over old + * contents. + * * This is a wrapper around C-realloc() but farms over to shared-memory * based realloc, when needed. * - * The interface is intentional to avoid inadvertently swapping 'oldsize' and - * 'newsize' in the call, if they were to appear next to each other. - * * Reallocing to size 0 must be equivalent to freeing. * Reallocing from NULL must be equivalent to allocing. + * + * Returns ptr to reallocated memory fragment. In case of shared memory, + * returns the newsize padded-up to cache-line alignment bytes. */ static inline void * -platform_realloc(const platform_heap_id heap_id, - const size_t oldsize, - void *ptr, // IN - const size_t newsize) // IN +platform_do_realloc(platform_memfrag *mf, // IN/OUT + size_t newsize, // IN + const char *func, + const char *file, + const int line) { /* FIXME: alignment? */ // Farm control off to shared-memory based realloc, if it's configured - if (heap_id) { + if (mf->hid == PROCESS_PRIVATE_HEAP_ID) { + void *retptr = realloc(mf->addr, newsize); + if (retptr) { + mf->addr = retptr; + mf->size = newsize; + } + return retptr; + } else { // The shmem-based allocator is expecting all memory requests to be of // aligned sizes, as that's what platform_aligned_malloc() does. So, to // keep that allocator happy, align this memory request if needed. @@ -499,26 +530,38 @@ platform_realloc(const platform_heap_id heap_id, // align at platform's natural cacheline boundary. const size_t padding = platform_align_bytes_reqd(PLATFORM_CACHELINE_SIZE, newsize); - const size_t required = (newsize + padding); - return platform_shm_realloc( - heap_id, ptr, oldsize, required, __func__, __FILE__, __LINE__); - } else { - return realloc(ptr, newsize); + newsize += padding; + return platform_shm_realloc(mf, newsize, func, file, line); } } +static inline void +platform_free_heap(platform_heap_id heap_id, void *ptr) +{ + debug_assert(heap_id == PROCESS_PRIVATE_HEAP_ID); + debug_assert(ptr != NULL); + free(ptr); +} + +/* + * platform_free_from_heap() - Free memory from the heap. + * + * If Splinter is running with shared memory configured, this calls into + * shared-memory based free() method. Othewise, run standard free(). + */ static inline void platform_free_from_heap(platform_heap_id heap_id, void *ptr, + const size_t size, const char *objname, const char *func, const char *file, - int lineno) + int line) { - if (heap_id) { - platform_shm_free(heap_id, ptr, objname, func, file, lineno); - } else { + if (heap_id == PROCESS_PRIVATE_HEAP_ID) { free(ptr); + } else { + platform_shm_free(heap_id, ptr, size, objname, func, file, line); } } diff --git a/src/platform_linux/platform_types.h b/src/platform_linux/platform_types.h index c21eb97a..cf22834c 100644 --- a/src/platform_linux/platform_types.h +++ b/src/platform_linux/platform_types.h @@ -168,7 +168,8 @@ typedef struct { unsigned int num_buckets; const long *bucket_limits; long min, max, total; - unsigned long num; // no. of elements + unsigned long num; // no. of elements + size_t mf_size; // of memory fragment allocated unsigned long count[]; } * platform_histo_handle; diff --git a/src/platform_linux/shmem.c b/src/platform_linux/shmem.c index a76f8c16..f75aabd1 100644 --- a/src/platform_linux/shmem.c +++ b/src/platform_linux/shmem.c @@ -2,10 +2,43 @@ // SPDX-License-Identifier: Apache-2.0 /* + * --------------------------------------------------------------------------- * shmem.c -- * * This file contains the implementation for managing shared memory created * for use by SplinterDB and all its innards. + * + * Here's a quick code-flow of important entry point functions: + * + * - platform_shmcreate(), platform_shmdestroy() + * Bootstrap and dismantle shared memory segment. + * + * platform_shm_alloc() - Main allocation interface + * │ + * ├──► platform_shm_find_large() - Find (or recycle) a large free fragment + * │ + * └──► platform_shm_find_small() - Find (or recycle) a small free fragment + * + * + * platform_shm_free() - Main free interface + * │ + * └─► platform_shm_track_free() - Manage free'd fragment in lists + * │ + * └─► platform_shm_hook_free_frag() - Add small fragment to its list + * + * platform_shm_realloc() - Main realloc() interface + * │ + * ├─► platform_shm_alloc() + * │ + * └─► platform_shm_free() + * + * platform_shm_print_usage() - Useful to dump shm-usage metrics + * │ + * └─► platform_shm_print_usage_stats() + * + * There are many other debugging, tracing and diagnostics functions. + * Best to read them inline in code. + * --------------------------------------------------------------------------- */ #include "platform.h" #include "shmem.h" @@ -36,7 +69,32 @@ static bool Trace_large_frags = FALSE; * NOTE: {to_pid, to_tid} and {by_pid, by_tid} fields go hand-in-hand. * We track both for improved debugging. * - * Lifecyle: + * Here is a figure showing how large-fragments are tracked + * + * ┌──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┬──┐ + * ┌┼--│ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ + * ├───┴─┬┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┴──┘ + * │ │ + * │ │ + * ┌───▼─┐ │ + * │ │ ▼ ┌─────────────────────────┐ + * │ │ ┌─────────┐ │ │ + * │ │ │ │◄─────────────┼─addr │ + * │ │ │ │ │ size = 32K │ + * │ │ │ │ │ │ + * └─────┘ │ │ │ allocated_to_tid = T1 │ + * │ │ │ │ + * │ │ │ allocated_to_pid = PID1 │ + * │ │ │ │ + * └─────────┘ │ freed_by_pid = 0 │ + * │ │ + * │ freed_by_tid = 0 │ + * │ │ + * └─────────────────────────┘ + * + * ---- Lifecyle: ---- + * + * - Initially all frag_addr will be NULL => tracking fragment is empty * - When a large fragment is initially allocated, frag_addr / frag_size will * be set. * - (allocated_to_pid != 0) && (freed_by_pid == 0) - Fragment is in use. @@ -44,31 +102,19 @@ static bool Trace_large_frags = FALSE; * --------------------------------------------------------------------------- */ typedef struct shm_large_frag_info { - void *frag_addr; // Start address of this memory fragment - // NULL => tracking fragment is empty - size_t frag_size; // bytes (Used in re-allocation logic.) + void *frag_addr; // Start address of this memory fragment + // NULL => tracking fragment is empty + const char *frag_func; // Calling func which lead to creation + size_t frag_size; // bytes (Used in re-allocation logic.) // Following fields are used mainly for assertions and diagnostics. - int frag_allocated_to_pid; // Allocated to this OS-pid threadid frag_allocated_to_tid; // Allocated to this Splinter thread-ID - int frag_freed_by_pid; // OS-pid that freed this large fragment threadid frag_freed_by_tid; // Splinter thread-ID that freed fragment + int frag_allocated_to_pid; // Allocated to this OS-pid + int frag_freed_by_pid; // OS-pid that freed this large fragment + int frag_line; } shm_large_frag_info; -/* - * All memory allocations of this size or larger will be tracked in the - * above fragment tracker array. For large inserts workload, we allocate large - * memory chunks for fingerprint array, which is more than a MiB. For scans, - * splinterdb_iterator_init() allocates memory for an iterator which is ~92+KiB. - * Set this to a lower value so we can re-cycle free fragments for iterators - * also. - */ -#if SPLINTER_DEBUG -# define SHM_LARGE_FRAG_SIZE (90 * KiB) -#else -# define SHM_LARGE_FRAG_SIZE (38 * KiB) -#endif // SPLINTER_DEBUG - /* * In the worst case we may have all threads performing activities that need * such large memory fragments. We track up to twice the # of configured @@ -76,28 +122,81 @@ typedef struct shm_large_frag_info { */ #define SHM_NUM_LARGE_FRAGS (MAX_THREADS * 2) +/* + * Currently, we track free-fragments in lists limited to these sizes. + * Each such free-list has a head-list field in shared memory control block. + * of sizes. This list of tracked small-fragments can be easily expanded. + */ +#define SHM_SMALL_FRAG_MIN_SIZE 64 // Will not track for size < min bytes +#define SHM_SMALL_FRAG_MAX_SIZE 512 // Will not track for size > max bytes + +/* + * We track a small number of allocated fragments, mainly to assert that the + * free() call is being correctly issued with the right ptr / size arguments. + */ +typedef struct frag_hdr { + void *frag_addr; + size_t frag_size; +} frag_hdr; + +// Small number of allocated small fragments are tracked, for assertion +// checking of fragment-addr-vs-its-size at the time of free. +#define SHM_NUM_SMALL_FRAGS (MAX_THREADS * 32) + +/* + * Each free small fragment that is hooked into the free-list is described + * by this tiny tracking structure. + */ +typedef struct free_frag_hdr { + struct free_frag_hdr *free_frag_next; + size_t free_frag_size; +} free_frag_hdr; + /* * ------------------------------------------------------------------------ * Shared-memory usage statistics & metrics: * - * Set of usage-stats fields copied from shmem_info{} struct, so that we + * Set of usage-stats fields copied from shmem_heap{} struct, so that we * can print these after shared segment has been destroyed. * ------------------------------------------------------------------------ */ typedef struct shminfo_usage_stats { + int shmid; // Shared memory ID returned by shmget() size_t total_bytes; // Total size of shared segment allocated initially. size_t used_bytes; // Used bytes of memory left (that were allocated) size_t free_bytes; // Free bytes of memory left (that can be allocated) + size_t bytes_freed; // Bytes of memory that underwent 'free' (can be + // reallocated). size_t used_bytes_HWM; // High-water mark of memory used bytes - size_t nfrees; // # of calls to free memory + size_t nfrees; // Total # of calls to free memory size_t nfrees_last_frag; // Freed last small-fragment + size_t nfrags_allocated; // Tracked in shm_allocated_frag[] array. + + // Distribution of 'free' calls based on fragment size + size_t nfrees_eq0; + size_t nfrees_le32; + size_t nfrees_le64; + size_t nfrees_le128; + size_t nfrees_le256; + size_t nfrees_le512; + size_t nfrees_le1K; + size_t nfrees_le2K; + size_t nfrees_le4K; + size_t nfrees_rest; + size_t nfrees_large_frags; + size_t nf_search_skipped; size_t used_by_large_frags_bytes; // Actually reserved + + // # of times search in large-fragments array found array full. + // Non-zero counter implies that there were more concurrent + // requesters to track a large fragment than we have room to track. + size_t nlarge_frags_full; + uint32 nlarge_frags_tracked; uint32 nlarge_frags_inuse; uint32 nlarge_frags_inuse_HWM; int nlarge_frags_found_in_use; - int shmid; } shminfo_usage_stats; /* @@ -114,22 +213,31 @@ typedef struct shminfo_usage_stats { * ----------------------------------------------------------------------------- */ typedef struct shmem_heap { - void *shm_start; // Points to start address of shared segment. - void *shm_end; // Points to end address; one past end of sh segment - void *shm_next; // Points to next 'free' address to allocate from. - void *shm_last_alloc; // Points to address most-recently allocated + void *shm_start; // Points to start address of shared segment. + void *shm_end; // Points to end address; one past end of shared segment + void *shm_next; // Points to next 'free' address to allocate from. + + // Every alloc() and free() will be tracked in this array. + frag_hdr shm_allocated_frag[SHM_NUM_SMALL_FRAGS]; + + // Various chains of freed-fragments bucketted by fragment size. + free_frag_hdr *shm_free_le64; // size <= 64 bytes + free_frag_hdr *shm_free_le128; // size <= 128 bytes + free_frag_hdr *shm_free_le256; // size <= 256 bytes + free_frag_hdr *shm_free_le512; // size <= 512 bytes + void *shm_splinterdb_handle; void *shm_large_frag_hip; // Highest addr of large-fragments tracked - platform_spinlock shm_mem_lock; // To sync alloc / free + platform_mutex shm_mem_mutex; // To synchronize alloc & free - platform_spinlock shm_mem_frags_lock; - // Protected by shm_mem_frags_lock. Must hold to read or modify. - shm_large_frag_info shm_large_frags[SHM_NUM_LARGE_FRAGS]; + platform_mutex shm_largemem_frags_mutex; + // Protected by shm_largemem_frags_mutex. Must hold to read or modify. + shm_large_frag_info shm_largemem_frags[SHM_NUM_LARGE_FRAGS]; + int shm_id; // Shared memory ID returned by shmget() shminfo_usage_stats usage; uint64 shm_magic; // Magic identifier for shared memory segment - int shm_id; // Shared memory ID returned by shmget() } PLATFORM_CACHELINE_ALIGNED shmem_heap; @@ -139,23 +247,37 @@ typedef struct shmem_heap { // Function prototypes static void -platform_shm_track_large_alloc(shmem_heap *shm, void *addr, size_t size); +platform_shm_track_large_alloc(shmem_heap *shm, + void *addr, + size_t size, + const char *func, + const int line); static void -platform_shm_track_free(shmem_heap *shm, - void *addr, - const char *objname, - const char *func, - const char *file, - const int lineno); +platform_shm_track_free(shmem_heap *shm, + void *addr, + const size_t size, + const char *objname, + const char *func, + const char *file, + const int line); + +static void * +platform_shm_find_large(shmem_heap *shm, + size_t size, + platform_memfrag *memfrag, + const char *objname, + const char *func, + const char *file, + const int line); static void * -platform_shm_find_large(shmem_heap *shm, +platform_shm_find_small(shmem_heap *shm, size_t size, const char *objname, const char *func, const char *file, - const int lineno); + const int line); static void platform_shm_trace_allocs(shmem_heap *shm, @@ -165,7 +287,7 @@ platform_shm_trace_allocs(shmem_heap *shm, const char *objname, const char *func, const char *file, - const int lineno); + const int line); static int platform_trace_large_frags(shmem_heap *shm); @@ -174,11 +296,11 @@ bool platform_shm_heap_valid(shmem_heap *shmheap); /* - * PLATFORM_HEAP_ID_TO_SHMADDR() -- + * platform_heap_id_to_shmaddr() -- * * The shared memory create function returns the address of shmem_heap->shm_id * as the platform_heap_id heap-ID to the caller. Rest of Splinter will use this - * heap-ID as a 'handle' to manage / allocate shared memory. This macro converts + * heap-ID as a 'handle' to manage / allocate shared memory. This macro maps * the heap-ID handle to the shared memory's start address, from which the * location of the next-free-byte can be tracked. */ @@ -210,35 +332,35 @@ platform_shm_hip(platform_heap_id hid) static inline void shm_lock_mem(shmem_heap *shm) { - platform_spin_lock(&shm->shm_mem_lock); + platform_mutex_lock(&shm->shm_mem_mutex); } static inline void shm_unlock_mem(shmem_heap *shm) { - platform_spin_unlock(&shm->shm_mem_lock); + platform_mutex_unlock(&shm->shm_mem_mutex); } static inline void shm_lock_mem_frags(shmem_heap *shm) { - platform_spin_lock(&shm->shm_mem_frags_lock); + platform_mutex_lock(&shm->shm_largemem_frags_mutex); } static inline void shm_unlock_mem_frags(shmem_heap *shm) { - platform_spin_unlock(&shm->shm_mem_frags_lock); + platform_mutex_unlock(&shm->shm_largemem_frags_mutex); } /* - * platform_valid_addr_in_heap(), platform_valid_addr_in_shm() + * platform_isvalid_addr_in_heap(), platform_isvalid_addr_in_shm() * * Address 'addr' is valid if it's just past end of control block and within * shared segment. */ static inline bool -platform_valid_addr_in_shm(shmem_heap *shmaddr, const void *addr) +platform_isvalid_addr_in_shm(shmem_heap *shmaddr, const void *addr) { return ((addr >= ((void *)shmaddr + platform_shm_ctrlblock_size())) && (addr < shmaddr->shm_end)); @@ -249,10 +371,10 @@ platform_valid_addr_in_shm(shmem_heap *shmaddr, const void *addr) * region. */ bool -platform_valid_addr_in_heap(platform_heap_id heap_id, const void *addr) +platform_isvalid_addr_in_heap(platform_heap_id heap_id, const void *addr) { - return platform_valid_addr_in_shm(platform_heap_id_to_shmaddr(heap_id), - addr); + return platform_isvalid_addr_in_shm(platform_heap_id_to_shmaddr(heap_id), + addr); } /* @@ -263,14 +385,16 @@ platform_shm_print_usage_stats(shminfo_usage_stats *usage) { fraction used_bytes_pct; fraction used_bytes_HWM_pct; - fraction free_bytes_pct; + fraction free_bytes_pct; // # of bytes that are free now + fraction bytes_freed_pct; // # of bytes that were freed over time fraction freed_last_frag_pct = zero_fraction; fraction nf_search_skipped_pct = zero_fraction; used_bytes_pct = init_fraction(usage->used_bytes, usage->total_bytes); used_bytes_HWM_pct = init_fraction(usage->used_bytes_HWM, usage->total_bytes); - free_bytes_pct = init_fraction(usage->free_bytes, usage->total_bytes); + free_bytes_pct = init_fraction(usage->free_bytes, usage->total_bytes); + bytes_freed_pct = init_fraction(usage->bytes_freed, usage->total_bytes); if (usage->nfrees) { freed_last_frag_pct = init_fraction(usage->nfrees_last_frag, usage->nfrees); @@ -278,6 +402,22 @@ platform_shm_print_usage_stats(shminfo_usage_stats *usage) init_fraction(usage->nf_search_skipped, usage->nfrees); } + fraction nf_le64_pct; + fraction nf_le128_pct; + fraction nf_le256_pct; + fraction nf_le512_pct; + fraction nf_le1K_pct; + fraction nf_le2K_pct; + fraction nf_le4K_pct; + + nf_le64_pct = init_fraction(usage->nfrees_le64, usage->nfrees); + nf_le128_pct = init_fraction(usage->nfrees_le128, usage->nfrees); + nf_le256_pct = init_fraction(usage->nfrees_le256, usage->nfrees); + nf_le512_pct = init_fraction(usage->nfrees_le512, usage->nfrees); + nf_le1K_pct = init_fraction(usage->nfrees_le1K, usage->nfrees); + nf_le2K_pct = init_fraction(usage->nfrees_le2K, usage->nfrees); + nf_le4K_pct = init_fraction(usage->nfrees_le4K, usage->nfrees); + // clang-format off platform_default_log( "Shared memory usage stats shmid=%d:" @@ -285,11 +425,23 @@ platform_shm_print_usage_stats(shminfo_usage_stats *usage) ", Used=%lu bytes (%s, " FRACTION_FMT(4, 2) " %%)" ", UsedHWM=%lu bytes (%s, " FRACTION_FMT(4, 2) " %%)" ", Free=%lu bytes (%s, " FRACTION_FMT(4, 2) " %%)" + ", Freed=%lu bytes (%s, " FRACTION_FMT(4, 2) " %%)" ", nfrees=%lu" ", nfrees-last-small-frag=%lu (" FRACTION_FMT(4, 2) " %%)" ", nf_search_skipped=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_eq0=%lu" + ", nfrees_le32=%lu" + ", nfrees_le64=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_le128=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_le256=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_le512=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_le1K=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_le2K=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_le4K=%lu (" FRACTION_FMT(4, 2) " %%)" + ", nfrees_rest=%lu " + ", nlarge_frags_inuse=%u" ", Large fragments in-use HWM=%u (found in-use=%d)" - ", consumed=%lu bytes (%s)" + ", consumed=%lu bytes (%s), nlarge_frags_full=%lu" ".\n", usage->shmid, usage->total_bytes, size_str(usage->total_bytes), @@ -303,6 +455,10 @@ platform_shm_print_usage_stats(shminfo_usage_stats *usage) usage->free_bytes, size_str(usage->free_bytes), (FRACTION_ARGS(free_bytes_pct) * 100), + usage->bytes_freed, + size_str(usage->bytes_freed), + (FRACTION_ARGS(bytes_freed_pct) * 100), + usage->nfrees, usage->nfrees_last_frag, (FRACTION_ARGS(freed_last_frag_pct) * 100), @@ -310,10 +466,29 @@ platform_shm_print_usage_stats(shminfo_usage_stats *usage) usage->nf_search_skipped, (FRACTION_ARGS(nf_search_skipped_pct) * 100), + usage->nfrees_eq0, + usage->nfrees_le32, + usage->nfrees_le64, + (FRACTION_ARGS(nf_le64_pct) * 100), + usage->nfrees_le128, + (FRACTION_ARGS(nf_le128_pct) * 100), + usage->nfrees_le256, + (FRACTION_ARGS(nf_le256_pct) * 100), + usage->nfrees_le512, + (FRACTION_ARGS(nf_le512_pct) * 100), + usage->nfrees_le1K, + (FRACTION_ARGS(nf_le1K_pct) * 100), + usage->nfrees_le2K, + (FRACTION_ARGS(nf_le2K_pct) * 100), + usage->nfrees_le4K, + (FRACTION_ARGS(nf_le4K_pct) * 100), + usage->nfrees_rest, + usage->nlarge_frags_inuse, usage->nlarge_frags_inuse_HWM, usage->nlarge_frags_found_in_use, usage->used_by_large_frags_bytes, - size_str(usage->used_by_large_frags_bytes)); + size_str(usage->used_by_large_frags_bytes), + usage->nlarge_frags_full); // clang-format on } @@ -340,6 +515,7 @@ platform_save_usage_stats(shminfo_usage_stats *usage, shmem_heap *shm) * Interface to print shared memory usage stats. (Callable from the debugger) * This is mainly intended as a diagnostics tool, so we don't work too hard * to grab metrics under exclusive access. + * ----------------------------------------------------------------------------- */ void platform_shm_print_usage(platform_heap_id hid) @@ -392,6 +568,7 @@ platform_shmcreate(size_t size, // Setup shared segment's control block at head of shared segment. shmem_heap *shm = (shmem_heap *)shmaddr; + memset(shm, 0, sizeof(*shm)); shm->shm_start = shmaddr; shm->shm_end = (shmaddr + size); @@ -408,12 +585,11 @@ platform_shmcreate(size_t size, *heap_id = (platform_heap_id *)shmaddr; } - platform_spinlock_init( - &shm->shm_mem_lock, platform_get_module_id(), *heap_id); + // Initialize mutexes needed to access shared memory & fragments tracker + platform_mutex_init(&shm->shm_mem_mutex, platform_get_module_id(), *heap_id); - // Initialize spinlock needed to access memory fragments tracker - platform_spinlock_init( - &shm->shm_mem_frags_lock, platform_get_module_id(), *heap_id); + platform_mutex_init( + &shm->shm_largemem_frags_mutex, platform_get_module_id(), *heap_id); // Always trace creation of shared memory segment. platform_default_log("Completed setup of shared memory of size " @@ -433,13 +609,13 @@ platform_shmcreate(size_t size, * platform_shmdestroy() -- Destroy a shared memory created for SplinterDB. * ----------------------------------------------------------------------------- */ -void +platform_status platform_shmdestroy(platform_heap_id *hid_out) { if (!hid_out) { platform_error_log( - "Error! Attempt to destroy shared memory with NULL heap ID!"); - return; + "Error! Attempt to destroy shared memory with NULL heap handle!"); + return STATUS_BAD_PARAM; } const void *shmaddr = (const void *)platform_heap_id_to_shmaddr(*hid_out); @@ -465,21 +641,25 @@ platform_shmdestroy(platform_heap_id *hid_out) hid_out, shm->shm_magic, SPLINTERDB_SHMEM_MAGIC); - return; + return STATUS_BAD_PARAM; } // Retain some memory usage stats before releasing shmem shminfo_usage_stats usage; - platform_save_usage_stats(&usage, shm); + int nfrags_in_use = platform_save_usage_stats(&usage, shm); + + platform_status rc = platform_mutex_destroy(&shm->shm_largemem_frags_mutex); + platform_assert(SUCCESS(rc)); int shmid = shm->shm_id; int rv = shmdt(shmaddr); if (rv != 0) { platform_error_log("Failed to detach from shared segment at address " - "%p, shmid=%d.\n", + "%p, shmid=%d: %s.\n", shmaddr, - shmid); - return; + shmid, + strerror(rv)); + return CONST_STATUS(rv); } // Externally, heap_id is pointing to this field. In anticipation that the @@ -490,14 +670,15 @@ platform_shmdestroy(platform_heap_id *hid_out) rv = shmctl(shmid, IPC_RMID, NULL); if (rv != 0) { - platform_error_log( - "shmctl failed to remove shared segment at address %p, shmid=%d.\n", - shmaddr, - shmid); + platform_error_log("shmctl failed to remove shared segment at address %p" + ", shmid=%d: %s.\n", + shmaddr, + shmid, + strerror(rv)); // restore state shm->shm_id = shmid; - return; + return CONST_STATUS(rv); } // Reset globals to NULL; to avoid accessing stale handles. @@ -510,6 +691,11 @@ platform_shmdestroy(platform_heap_id *hid_out) shmid); platform_shm_print_usage_stats(&usage); + + // If any fragments were found in-use, that's likely due to something + // going wrong while free()'ing memory. (This could lead to bloated + // shared memory usage, if not rectified.) + return CONST_STATUS(nfrags_in_use); } /* @@ -523,12 +709,13 @@ platform_shmdestroy(platform_heap_id *hid_out) */ // RESOLVE: Pass down user requested alignment and handle it here. void * -platform_shm_alloc(platform_heap_id hid, - const size_t size, - const char *objname, - const char *func, - const char *file, - const int lineno) +platform_shm_alloc(platform_memfrag *memfrag, // IN/OUT + platform_heap_id hid, + const size_t size, + const char *objname, + const char *func, + const char *file, + const int line) { shmem_heap *shm = platform_heap_id_to_shmaddr(hid); @@ -536,15 +723,12 @@ platform_shm_alloc(platform_heap_id hid, "Shared memory heap ID at %p is not a valid shared memory ptr.", hid); - debug_assert(((size % PLATFORM_CACHELINE_SIZE) == 0), - "size=%lu is not aligned to PLATFORM_CACHELINE_SIZE", - size); platform_assert(((((uint64)shm->shm_next) % PLATFORM_CACHELINE_SIZE) == 0), "[%s:%d] Next free-addr is not aligned: " "shm_next=%p, total_bytes=%lu, used_bytes=%lu" ", free_bytes=%lu", file, - lineno, + line, shm->shm_next, shm->usage.total_bytes, shm->usage.used_bytes, @@ -552,16 +736,32 @@ platform_shm_alloc(platform_heap_id hid, void *retptr = NULL; + if (memfrag) { + memfrag->hid = hid; + } // See if we can satisfy requests for large memory fragments from a cached // list of used/free fragments that are tracked separately. - if ((size >= SHM_LARGE_FRAG_SIZE) - && ((retptr = - platform_shm_find_large(shm, size, objname, func, file, lineno)) - != NULL)) - { - return retptr; + if (size >= SHM_LARGE_FRAG_SIZE) { + retptr = + platform_shm_find_large(shm, size, memfrag, objname, func, file, line); + // Else, fall-back to allocating a new large fragment + if (retptr != NULL) { + return retptr; + } + } else { + // Try to satisfy small memory fragments based on requested size, from + // cached list of free-fragments. + retptr = platform_shm_find_small(shm, size, objname, func, file, line); + if (retptr) { + // Return fragment's details to caller. We may have recycled a free + // fragment that is larger than the requested size. + if (memfrag) { + memfrag->addr = (void *)retptr; + memfrag->size = ((free_frag_hdr *)retptr)->free_frag_size; + } + return retptr; + } } - _Static_assert(sizeof(void *) == sizeof(size_t), "check our casts are valid"); @@ -577,32 +777,46 @@ platform_shm_alloc(platform_heap_id hid, platform_error_log( "[%s:%d::%s()]: Insufficient memory in shared segment" - " to allocate %lu bytes for '%s'. Approx free space=%lu bytes." - " nlarge_frags_tracked=%u, nlarge_frags_inuse=%u (HWM=%u).\n", + " to allocate %lu bytes (%s) for '%s'. Approx free" + " space=%lu bytes (%s)." + " shm_num_large_frags_tracked=%u, nm_frags_inuse=%u (HWM=%u).\n", file, - lineno, + line, func, size, + size_str(size), objname, shm->usage.free_bytes, + size_str(shm->usage.free_bytes), shm->usage.nlarge_frags_tracked, shm->usage.nlarge_frags_inuse, shm->usage.nlarge_frags_inuse_HWM); - platform_trace_large_frags(shm); + + // Trace diagnostics + if (Trace_large_frags) { + platform_shm_print_usage(hid); + } return NULL; } - shm->shm_last_alloc = retptr; // Track approx memory usage metrics; mainly for troubleshooting __sync_fetch_and_add(&shm->usage.used_bytes, size); __sync_fetch_and_sub(&shm->usage.free_bytes, size); if (shm->usage.used_bytes > shm->usage.used_bytes_HWM) { shm->usage.used_bytes_HWM = shm->usage.used_bytes; } + // Track new small fragment being allocated ... + if (shm->usage.nfrags_allocated < SHM_NUM_SMALL_FRAGS) { + frag_hdr *newfrag = &shm->shm_allocated_frag[shm->usage.nfrags_allocated]; + newfrag->frag_addr = retptr; + newfrag->frag_size = size; + shm->usage.nfrags_allocated++; + } + shm_unlock_mem(shm); if (size >= SHM_LARGE_FRAG_SIZE) { - platform_shm_track_large_alloc(shm, retptr, size); + platform_shm_track_large_alloc(shm, retptr, size, func, line); } // Trace shared memory allocation; then return memory ptr. @@ -616,7 +830,12 @@ platform_shm_alloc(platform_heap_id hid, objname, func, file, - lineno); + line); + } + // A new fragment was carved out of shm. Inform caller of its properties. + if (memfrag) { + memfrag->size = size; + memfrag->addr = retptr; } return retptr; } @@ -625,70 +844,54 @@ platform_shm_alloc(platform_heap_id hid, * ----------------------------------------------------------------------------- * platform_shm_realloc() -- Re-allocate n-bytes from shared segment. * - * Functionally is similar to 'realloc' system call. We allocate required # of - * bytes, copy over the old contents (if any), and do a fake free of the oldptr. + * Functionally is similar to 'realloc' system call. We allocate requested # + * of bytes, *newsize, copy over the old contents (if any), and free the + * memory for the oldptr. + * + * NOTE(s): + * - This interface does -not- do any cache-line alignment for '*newsize'. + * Caller is expected to do so. platform_realloc() takes care of it. + * - However, it is quite likely that for a fragment request, we might be + * recycling a (small/large) free-fragment, whose size may be bigger + * than requested 'newsize' (but will guranteed to be cache line aligned). + * + * Returns ptr to re-allocated memory. May return a bigger *newsize, if + * a free fragment was recycled and re-allocated. * ----------------------------------------------------------------------------- */ void * -platform_shm_realloc(platform_heap_id hid, - void *oldptr, - const size_t oldsize, - const size_t newsize, - const char *func, - const char *file, - const int lineno) +platform_shm_realloc(platform_memfrag *mf, // IN/OUT + size_t newsize, // IN + const char *func, + const char *file, + const int line) { - debug_assert(((oldptr == NULL) && (oldsize == 0)) || (oldptr && oldsize), - "oldptr=%p, oldsize=%lu", - oldptr, - oldsize); - - // We can only realloc from an oldptr that's allocated from shmem - debug_assert(!oldptr || platform_valid_addr_in_heap(hid, oldptr), - "oldptr=%p is not allocated from shared memory", - oldptr); - - void *retptr = - platform_shm_alloc(hid, newsize, "Unknown", func, file, lineno); + static const char *unknown_obj = "UnknownObj"; + + platform_memfrag realloc_memfrag = {0}; + + // clang-format off + void *retptr = platform_shm_alloc(&realloc_memfrag, mf->hid, newsize, + unknown_obj, func, file, line); + // clang-format on if (retptr) { - // Copy over old contents, if any, and free that memory piece - if (oldptr) { + void *oldptr = mf->addr; + size_t oldsize = mf->size; + // Copy over old contents, if any, and free that old memory piece + if (oldptr && oldsize) { memcpy(retptr, oldptr, oldsize); - platform_shm_free(hid, oldptr, "Unknown", func, file, lineno); + platform_shm_free( + mf->hid, oldptr, oldsize, unknown_obj, func, file, line); } - } else { - // Report approx memory usage metrics w/o spinlock (diagnostics) - shmem_heap *shm = platform_heap_id_to_shmaddr(hid); - size_t total_bytes = shm->usage.total_bytes; - size_t used_bytes = shm->usage.used_bytes; - size_t free_bytes = shm->usage.free_bytes; - size_t num_frees = shm->usage.nfrees; - fraction used_bytes_pct; - fraction free_bytes_pct; - used_bytes_pct = init_fraction(used_bytes, total_bytes); - free_bytes_pct = init_fraction(free_bytes, total_bytes); - - // clang-format off - platform_error_log("%s() failed to reallocate newsize=%lu bytes (%s)" - ", oldsize=%lu bytes (%s)" - ", Used=%lu bytes (%s, " FRACTION_FMT(4, 2) - " %%), Free=%lu bytes (%s, " FRACTION_FMT(4, 2) - " %%)" - ", num-free-calls=%lu\n", - __func__, - newsize, - size_str(newsize), - oldsize, - size_str(oldsize), - used_bytes, - size_str(used_bytes), - (FRACTION_ARGS(used_bytes_pct) * 100), - free_bytes, - size_str(free_bytes), - (FRACTION_ARGS(free_bytes_pct) * 100), - num_frees); - // clang-format off + // Memory fragment is now tracking newly allocated piece of memory + mf->addr = retptr; + + // A larger free-fragment might have been recycled. Its size may be + // bigger than the requested newsize. Return new size to caller. + // (This is critical, otherwise, asserts will trip when an attempt + // is eventually made by the caller to free this fragment.) + mf->size = memfrag_size(&realloc_memfrag); } return retptr; } @@ -707,66 +910,59 @@ platform_shm_realloc(platform_heap_id hid, void platform_shm_free(platform_heap_id hid, void *ptr, + const size_t size, const char *objname, const char *func, const char *file, - const int lineno) + const int line) { shmem_heap *shm = platform_heap_id_to_shmaddr(hid); debug_assert( (platform_shm_heap_valid(shm) == TRUE), - "Shared memory heap ID at %p is not a valid shared memory handle.", + "[%s:%d::%s()] Attempt to free memory at %p for object '%s' failed." + " Shared memory heap ID at %p is not a valid shared memory handle.", + file, + line, + func, + ptr, + objname, hid); - if (!platform_valid_addr_in_heap(hid, ptr)) { - platform_error_log("[%s:%d::%s()] -> %s: Requesting to free memory" - " at %p, for object '%s' which is a memory chunk not" - " allocated from shared memory {start=%p, end=%p}.\n", - file, - lineno, - func, - __func__, - ptr, - objname, - platform_shm_lop(hid), - platform_shm_hip(hid)); + if (!platform_isvalid_addr_in_heap(hid, ptr)) { + platform_assert(FALSE, + "[%s:%d::%s()] -> %s: Requesting to free memory at %p" + ", for object '%s' which is a memory fragment not" + " allocated from shared memory {start=%p, end=%p}.\n", + file, + line, + func, + __func__, + ptr, + objname, + platform_shm_lop(hid), + platform_shm_hip(hid)); return; } - // Micro-optimization for very-last-fragment-allocated being freed - bool maybe_large_frag = TRUE; - size_t frag_size = 0; - - shm_lock_mem(shm); - shm->usage.nfrees++; - if (shm->shm_last_alloc == ptr) { - debug_assert( - shm->shm_next > ptr, "shm_next=%p, free-ptr=%p", shm->shm_next, ptr); - frag_size = (shm->shm_next - ptr); - if (frag_size < SHM_LARGE_FRAG_SIZE) { - // Recycle the most-recently-allocated-small-fragment, now being freed. - shm->shm_next = ptr; - shm->shm_last_alloc = NULL; - shm->usage.free_bytes += frag_size; - shm->usage.used_bytes -= frag_size; - shm->usage.nfrees_last_frag += 1; - - // We know fragment being freed is not a large fragment - maybe_large_frag = FALSE; - } - } - shm_unlock_mem(shm); + debug_assert((size > 0), + "[%s:%d::%s()] -> %s: Attempting to free memory fragment at %p" + " of size=%lu bytes, for object '%s'.", + file, + line, + func, + __func__, + ptr, + size, + objname); - if (maybe_large_frag) { - platform_shm_track_free(shm, ptr, objname, func, file, lineno); - } + platform_shm_track_free(shm, ptr, size, objname, func, file, line); if (Trace_shmem || Trace_shmem_frees) { platform_default_log(" [%s:%d::%s()] -> %s: Request to free memory at " "%p for object '%s'.\n", file, - lineno, + line, func, __func__, ptr, @@ -777,13 +973,17 @@ platform_shm_free(platform_heap_id hid, /* * ----------------------------------------------------------------------------- - * platform_shm_track_large_alloc() - Track the allocation of this large fragment. - * 'Tracking' here means we record this large-fragment in an array tracking - * large-memory fragments allocated. + * platform_shm_track_large_alloc() - Track the allocation of this large + * fragment. 'Tracking' here means we record this large-fragment in an array + * tracking large-memory fragments allocated. * ----------------------------------------------------------------------------- */ static void -platform_shm_track_large_alloc(shmem_heap *shm, void *addr, size_t size) +platform_shm_track_large_alloc(shmem_heap *shm, + void *addr, + size_t size, + const char *func, + const int line) { debug_assert( (size >= SHM_LARGE_FRAG_SIZE), @@ -792,12 +992,15 @@ platform_shm_track_large_alloc(shmem_heap *shm, void *addr, size_t size) size, SHM_LARGE_FRAG_SIZE); + bool found_free_slot = FALSE; + shm_large_frag_info *frag = shm->shm_largemem_frags; + int fctr = 0; - // Iterate through the list of memory fragments being tracked. - int fctr = 0; - shm_large_frag_info *frag = shm->shm_large_frags; shm_lock_mem_frags(shm); - while ((fctr < ARRAY_SIZE(shm->shm_large_frags)) && frag->frag_addr) { + + // Iterate through the list of memory fragments being tracked. + while ((fctr < ARRAY_SIZE(shm->shm_largemem_frags)) && frag->frag_addr) { + // As this is a newly allocated fragment being tracked, it should // not be found elsewhere in the tracker array. platform_assert((frag->frag_addr != addr), @@ -815,8 +1018,10 @@ platform_shm_track_large_alloc(shmem_heap *shm, void *addr, size_t size) fctr++; frag++; } + // If we found a free slot, track our memory fragment at fctr'th slot. - if (fctr < ARRAY_SIZE(shm->shm_large_frags)) { + if (fctr < ARRAY_SIZE(shm->shm_largemem_frags)) { + found_free_slot = TRUE; shm->usage.nlarge_frags_tracked++; shm->usage.nlarge_frags_inuse++; shm->usage.used_by_large_frags_bytes += size; @@ -833,6 +1038,9 @@ platform_shm_track_large_alloc(shmem_heap *shm, void *addr, size_t size) frag->frag_allocated_to_pid = platform_getpid(); frag->frag_allocated_to_tid = platform_get_tid(); + frag->frag_func = func; + frag->frag_line = line; + // The freed_by_pid/freed_by_tid == 0 means fragment is still allocated. // Track highest address of large-fragment that is being tracked. @@ -840,47 +1048,302 @@ platform_shm_track_large_alloc(shmem_heap *shm, void *addr, size_t size) shm->shm_large_frag_hip = addr; } } - + if (!found_free_slot) { + shm->usage.nlarge_frags_full++; + } shm_unlock_mem_frags(shm); } +/* + * Connect a free fragment to the chain provided. Record free-fragment's + * size, so we can find it when next required by-size. + */ +static inline void +platform_shm_hook_free_frag(free_frag_hdr **here, void *ptr, size_t size) +{ + ((free_frag_hdr *)ptr)->free_frag_next = *here; + ((free_frag_hdr *)ptr)->free_frag_size = size; + *here = ptr; +} + +/* + * ----------------------------------------------------------------------------- + * Helper functions for finding 'free' fragment and to walk free-lists. + * ----------------------------------------------------------------------------- + */ +/* + * Simple lookup routine to return the free-fragment header off of which + * free-fragments of a specific 'size' will be hung off of. + * No mutex is required here, as we are simply mapping size to a field's addr. + */ +static free_frag_hdr ** +platform_shm_free_frag_hdr(const shmem_heap *shm, size_t size) +{ + free_frag_hdr **next_frag; + if (size <= 64) { + next_frag = (free_frag_hdr **)&shm->shm_free_le64; + } else if (size <= 128) { + next_frag = (free_frag_hdr **)&shm->shm_free_le128; + } else if (size <= 256) { + next_frag = (free_frag_hdr **)&shm->shm_free_le256; + } else if (size <= 512) { + next_frag = (free_frag_hdr **)&shm->shm_free_le512; + } else { + // Currently unsupported fragment size for recycling + // Small fragments > 512 bytes will be 'lost'; i.e., they can't be + // recycled. This may result in a slow memory leak. We do track metrics + // of # of such frees occurring. See platform_shm_track_free_small_frag(). + // The idea is: IF we do see large #s of such small fragments being + // freed (for some workloads), we can easily add a new tracking list. + next_frag = NULL; + } + return next_frag; +} + +/* + * ----------------------------------------------------------------------------- + * When a memory fragment is being free'd, check if this fragment is already + * in some free-list. If found, it means we are [incorrectly] doing a + * double-free, which indicates a code error. User has possibly messed-up their + * handling of memfrag handles to this memory fragment. + * + * NOTE: This, being a convenience routine, provides for next_frag, which is + * the start of the free-list for given 'size'. If caller has established it, + * pass that here. Otherwise, we will establish it in this routine. + * + * Shared memory mutex is expected to be held for this function. + * ----------------------------------------------------------------------------- + */ +static void * +platform_shm_find_frag_in_free_list(const shmem_heap *shm, + free_frag_hdr **next_frag, + const void *ptr, + const size_t size) +{ + if (!next_frag) { + free_frag_hdr **next_frag = platform_shm_free_frag_hdr(shm, size); + // We are searching for a fragment whose size is not tracked. + if (next_frag == NULL) { // Nothing found. + return NULL; + } + } + + // Walk the free-list to see if our being-free'd ptr lives there already + while (*next_frag && ((*next_frag) != ptr)) { + next_frag = &(*next_frag)->free_frag_next; + } + // Returns the 'ptr' if found; null otherwise. + return (*next_frag); +} + /* * ----------------------------------------------------------------------------- - * platform_shm_track_free() - See if this memory fragment being freed is - * already being tracked. If so, it's a large fragment allocation, which can be - * re-cycled after this free. Do the book-keeping accordingly to record that - * this large-fragment is no longer in-use and can be recycled. + * Diagnostic routine: Iterate through all small-fragment free-lists that + * we currently manage and try to find if the small fragment at address 'ptr' + * is found in any such list. That means, the fragment was previously freed. + * + * Returns: The size of the free-fragment list in which this 'ptr' was found. + * 0, otherwise; (i.e. 'ptr' is not an already-freed-fragment.) + * Optionally, the size marked in this freed-fragment is returned via + * 'freed_size'. If a client incorrectly specified the memfrag's size at + * the time of free(), that will be reported here, and can be detected. + * ----------------------------------------------------------------------------- + */ +static size_t +platform_shm_find_frag_in_freed_lists(const shmem_heap *shm, + const void *ptr, + size_t *freed_size) +{ + size_t free_list_size = SHM_SMALL_FRAG_MIN_SIZE; + + // Process all free-list sizes, till we find the being-freed fragment + while (free_list_size <= SHM_SMALL_FRAG_MAX_SIZE) { + free_frag_hdr **next_frag = + platform_shm_free_frag_hdr(shm, free_list_size); + + free_frag_hdr *found_free_frag; + if ((found_free_frag = platform_shm_find_frag_in_free_list( + shm, next_frag, ptr, free_list_size))) + { + // Return the size as marked on the fragment when it was freed. + if (freed_size) { + *freed_size = found_free_frag->free_frag_size; + } + // We found this fragment 'ptr' in this free-fragment-list! + return free_list_size; + } + free_list_size *= 2; + } + return 0; +} + +/* + * ----------------------------------------------------------------------------- + * Walk the list tracking allocated small fragments to see if our fragment at + * 'addr' (which is being freed) is an allocated fragment. + * Shared-memory alloc/free mutex should be held on entry. + * + * Return the index of this tracked fragment, if found. -1, otherwise. + * ----------------------------------------------------------------------------- + */ +static int +platform_shm_find_small_frag_in_allocated_list(shmem_heap *shm, void *addr) +{ + int ictr = (shm->usage.nfrags_allocated - 1); + while ((ictr >= 0) && (shm->shm_allocated_frag[ictr].frag_addr != addr)) { + ictr--; + } + return ictr; +} + +/* + * ----------------------------------------------------------------------------- + * platform_shm_track_free_small_frag() - Track 'free' of small fragments. + * + * Free this small fragment, and using the 'size' specified, connect it to + * the free-list tracking fragments of this size. * ----------------------------------------------------------------------------- */ static void -platform_shm_track_free(shmem_heap *shm, - void *addr, - const char *objname, - const char *func, - const char *file, - const int lineno) +platform_shm_track_free_small_frag(shmem_heap *shm, + void *addr, + const size_t size, + const char *objname, + const char *func, + const char *file, + const int line) { - shm_lock_mem_frags(shm); + shm_lock_mem(shm); - // If we are freeing a fragment beyond the high-address of all - // large fragments tracked, then this is certainly not a large - // fragment. So, no further need to see if it's a tracked fragment. - if (addr > shm->shm_large_frag_hip) { - shm_unlock_mem_frags(shm); - return; + int frag_idx = platform_shm_find_small_frag_in_allocated_list(shm, addr); + + if (frag_idx >= 0) { + // A fragment should be freed with the same size it was allocated with. + platform_assert((shm->shm_allocated_frag[frag_idx].frag_size == size), + "%s:%d:%s(), objname=%s: " + "Attempt to free fragment at %p of size=%lu bytes" + ", but the size of the small fragment, %lu bytes" + ", tracked at index=%d, does not match the " + "requested size.", + file, + line, + func, + objname, + addr, + size, + shm->shm_allocated_frag[frag_idx].frag_size, + frag_idx); + + // Allocated fragment is being freed; shuffle remaining items to the left. + int items_to_move = (shm->usage.nfrags_allocated - (frag_idx + 1)); + debug_assert((items_to_move >= 0), "items_to_move=%d", items_to_move); + if (items_to_move > 0) { + memmove(&shm->shm_allocated_frag[frag_idx], + &shm->shm_allocated_frag[frag_idx + 1], + (items_to_move * sizeof(shm->shm_allocated_frag[0]))); + } + + shm->usage.nfrags_allocated--; } + + shm->usage.nfrees++; + shm->usage.bytes_freed += size; + + // If this fragment-being-free'd is one of a size we track, find + // the free-list into which the free'd-fragment should be linked. + free_frag_hdr **next_frag = platform_shm_free_frag_hdr(shm, size); + if (next_frag) { + + // clang-format off + debug_code(size_t found_in_free_list_size = 0); + debug_code(size_t free_frag_size = 0); + debug_assert(((found_in_free_list_size + = platform_shm_find_frag_in_freed_lists(shm, + addr, + &free_frag_size)) + == 0), + "%s:%d:%s(), objname=%s: " + "Memory fragment being-freed, %p, of size=%lu bytes" + " was found in freed-fragment-list of size=%lu bytes" + ", and marked as %lu bytes size.", + file, line, func, objname, + addr, size, found_in_free_list_size, free_frag_size); + // clang-format on + + // Hook this now-free fragment into its free-list. + platform_shm_hook_free_frag(next_frag, addr, size); + } + + // Maintain metrics here onwards + shm->usage.nf_search_skipped++; // Track # of optimizations done + + if (size == 0) { + shm->usage.nfrees_eq0++; + } else if (size <= 32) { + shm->usage.nfrees_le32++; + } else if (size <= 64) { + shm->usage.nfrees_le64++; + } else if (size <= 128) { + shm->usage.nfrees_le128++; + } else if (size <= 256) { + shm->usage.nfrees_le256++; + } else if (size <= 512) { + shm->usage.nfrees_le512++; + } else if (size <= KiB) { + shm->usage.nfrees_le1K++; + } else if (size <= (2 * KiB)) { + shm->usage.nfrees_le2K++; + } else if (size <= (4 * KiB)) { + shm->usage.nfrees_le4K++; + } else { + shm->usage.nfrees_rest++; + } + shm->usage.used_bytes -= size; + shm->usage.free_bytes += size; + + shm_unlock_mem(shm); + return; +} + +/* + * ----------------------------------------------------------------------------- + * platform_shm_track_free_large_frag() - Track 'free' of large fragments. + * + * See if this large memory fragment being freed is already being tracked. If + * so, it can be re-cycled after this free. Do the book-keeping accordingly to + * record that this large-fragment is no longer in-use and can be recycled. + * ----------------------------------------------------------------------------- + */ +static void +platform_shm_track_free_large_frag(shmem_heap *shm, + void *addr, + const size_t size, + const char *objname, + const char *func, + const char *file, + const int line) +{ + shm_lock_mem_frags(shm); + shm->usage.nfrees++; + shm->usage.bytes_freed += size; + shm->usage.nfrees_large_frags++; + bool found_tracked_frag = FALSE; bool trace_shmem = (Trace_shmem || Trace_shmem_frees); - shm_large_frag_info *frag = shm->shm_large_frags; - int fctr = 0; - while ((fctr < ARRAY_SIZE(shm->shm_large_frags)) + shm_large_frag_info *frag = shm->shm_largemem_frags; + int fctr = 0; + + // Search the large-fragment tracking array for this fragment being freed. + // If found, mark its tracker that this fragment is free & can be recycled. + while ((fctr < ARRAY_SIZE(shm->shm_largemem_frags)) && (!frag->frag_addr || (frag->frag_addr != addr))) { fctr++; frag++; } - if (fctr < ARRAY_SIZE(shm->shm_large_frags)) { + + if (fctr < ARRAY_SIZE(shm->shm_largemem_frags)) { debug_assert(frag->frag_addr == addr); found_tracked_frag = TRUE; @@ -892,12 +1355,31 @@ platform_shm_track_free(shmem_heap *shm, debug_assert(frag->frag_allocated_to_pid != 0); debug_assert(frag->frag_size != 0); - shm->usage.nlarge_frags_inuse--; + // ----------------------------------------------------------------- + // If a client allocated a large-fragment previously, it should be + // freed with the right original size (for hygiene). It's not + // really a correctness error as the fragment's size has been + // recorded initially when it was allocated. Initially, we tried + // to make this a strict "size == frag->frag_size" check. + // But, it trips for various legit reasons. One example is when + // we call TYPED_MALLOC() to allocate memory for a large struct. + // This request may have gone through large free-fragment recycling + // scheme, in which case we could have allocated a free-fragment with + // a size, frag->frag_size, much larger than requested 'size'. + // ----------------------------------------------------------------- + debug_assert((size <= frag->frag_size), + "Attempt to free a large fragment, %p, with size=%lu" + ", but fragment has size of %lu bytes (%s).", + addr, + size, + frag->frag_size, + size_str(frag->frag_size)); // Mark the fragment as in-use by recording the process/thread that's // doing the free. frag->frag_freed_by_pid = platform_getpid(); frag->frag_freed_by_tid = platform_get_tid(); + shm->usage.nlarge_frags_inuse--; if (trace_shmem) { platform_default_log("OS-pid=%d, ThreadID=%lu" @@ -917,34 +1399,90 @@ platform_shm_track_free(shmem_heap *shm, } shm_unlock_mem_frags(shm); + // We expect that callers invoke the free correctly with the right memory + // fragment handle. Not finding a large fragment requested to be freed + // indicates some coding error. + debug_assert(found_tracked_frag, + "[%s:%d:%s()] Request to track large fragment failed." + " Fragment %p, %lu bytes, for object '%s' is not tracked\n", + file, + line, + func, + addr, + size, + objname); + if (!found_tracked_frag && trace_shmem) { platform_default_log("[OS-pid=%d, ThreadID=%lu, %s:%d::%s()] " ", Fragment %p for object '%s' is not tracked\n", platform_getpid(), platform_get_tid(), file, - lineno, + line, func, addr, objname); } } +/* + * ----------------------------------------------------------------------------- + * platform_shm_track_free() - Track 'free' of small and large fragments. + * ----------------------------------------------------------------------------- + */ +static void +platform_shm_track_free(shmem_heap *shm, + void *addr, + const size_t size, + const char *objname, + const char *func, + const char *file, + const int line) +{ + // All callers of either platform_free() or platform_free_mem() are required + // to declare the size of the memory fragment being freed. We use that info + // to manage free lists. + platform_assert((size > 0), + "%s:%d:%s(), objname=%s: size=%lu must be > 0.", + file, + line, + func, + objname, + size); + + // If we are freeing a fragment beyond the high-address of all + // large fragments tracked, then this is certainly not a large + // fragment. So, no further need to see if it's a tracked large-fragment. + if ((addr > shm->shm_large_frag_hip) || (size && size < SHM_LARGE_FRAG_SIZE)) + { + /* **** Tracking 'free' on smaller fragments. **** */ + platform_shm_track_free_small_frag( + shm, addr, size, objname, func, file, line); + } else { + + /* **** Tracking 'free' on large fragments. **** */ + platform_shm_track_free_large_frag( + shm, addr, size, objname, func, file, line); + } +} + /* * ----------------------------------------------------------------------------- * platform_shm_find_large() - Search the array of large-fragments being tracked * to see if there is an already allocated and now-free large memory fragment. + * As the array of free large-fragments is small, we do a best-fit search. * If so, allocate that fragment to this requester. Do the book-keeping * accordingly. * ----------------------------------------------------------------------------- */ static void * -platform_shm_find_large(shmem_heap *shm, - size_t size, - const char *objname, - const char *func, - const char *file, - const int lineno) +platform_shm_find_large(shmem_heap *shm, + size_t size, + platform_memfrag *memfrag, + const char *objname, + const char *func, + const char *file, + const int line) { debug_assert((size >= SHM_LARGE_FRAG_SIZE), "Incorrect usage of this interface for requested" @@ -952,9 +1490,9 @@ platform_shm_find_large(shmem_heap *shm, size, SHM_LARGE_FRAG_SIZE); - void *retptr = NULL; - shm_large_frag_info *frag = shm->shm_large_frags; - int local_in_use = 0; // Tracked while iterating in this fn, locally + void *retptr = NULL; + shm_large_frag_info *frag = shm->shm_largemem_frags; + int local_in_use = 0; // Tracked while iterating in this fn, locally int found_at_fctr = -1; bool found_tracked_frag = FALSE; @@ -964,7 +1502,8 @@ platform_shm_find_large(shmem_heap *shm, uint32 nlarge_frags_tracked = shm->usage.nlarge_frags_tracked; uint32 nlarge_frags_inuse = shm->usage.nlarge_frags_inuse; - for (int fctr = 0; fctr < ARRAY_SIZE(shm->shm_large_frags); fctr++, frag++) { + for (int fctr = 0; fctr < ARRAY_SIZE(shm->shm_largemem_frags); + fctr++, frag++) { if (!frag->frag_addr || (frag->frag_size < size)) { continue; } @@ -972,9 +1511,14 @@ platform_shm_find_large(shmem_heap *shm, // Skip fragment if it's still in-use if (frag->frag_freed_by_pid == 0) { platform_assert((frag->frag_freed_by_tid == 0), + "%s:%d:%s() objname:%s " "Invalid state found for fragment at index %d," "freed_by_pid=%d but freed_by_tid=%lu " "(which should also be 0)\n", + file, + line, + func, + objname, fctr, frag->frag_freed_by_pid, frag->frag_freed_by_tid); @@ -982,6 +1526,11 @@ platform_shm_find_large(shmem_heap *shm, local_in_use++; continue; } + + // Fragment is free, but is it big enough for current request? + if (frag->frag_size < size) { + continue; + } found_tracked_frag = TRUE; found_at_fctr = fctr; @@ -999,7 +1548,14 @@ platform_shm_find_large(shmem_heap *shm, frag->frag_freed_by_pid = 0; frag->frag_freed_by_tid = 0; + frag->frag_func = func; + frag->frag_line = line; + retptr = frag->frag_addr; + if (memfrag) { + memfrag->addr = retptr; + memfrag->size = frag->frag_size; + } // Zero out the recycled large-memory fragment, just to be sure ... memset(retptr, 0, frag->frag_size); @@ -1041,11 +1597,73 @@ platform_shm_find_large(shmem_heap *shm, local_in_use); } platform_shm_trace_allocs( - shm, size, msg, retptr, objname, func, file, lineno); + shm, size, msg, retptr, objname, func, file, line); } return retptr; } +/* + * ----------------------------------------------------------------------------- + * platform_shm_find_small() - Find a small free-fragment in a cached list of + * free fragments that we track, for specific buckets of fragment sizes. + * If one is found of the suitable size, detach it from the list and return + * start address of the free fragment. Otherwise, return NULL. + * + * NOTE: As the free-fragments are linked using free_frag_hdr{}, we return + * the address of the recycled free-fragment, which can be temporarily + * read as free_frag_hdr{} *. + * ----------------------------------------------------------------------------- + */ +static void * +platform_shm_find_small(shmem_heap *shm, + size_t size, + const char *objname, + const char *func, + const char *file, + const int line) +{ + // Currently, we have only implemented tracking small free fragments of + // 'known' sizes that appear in our workloads. + if ((size < SHM_SMALL_FRAG_MIN_SIZE) || (size > SHM_SMALL_FRAG_MAX_SIZE)) { + return NULL; + } + + // If we are not tracking fragments of this size, nothing further to do. + free_frag_hdr **next_frag = platform_shm_free_frag_hdr(shm, size); + if (next_frag == NULL) { + return NULL; + } + shm_lock_mem(shm); + + // Find the next free frag which is big enough + while (*next_frag && ((*next_frag)->free_frag_size < size)) { + next_frag = &(*next_frag)->free_frag_next; + } + + // If we ran thru the list of free fragments, we are done + free_frag_hdr *retptr = *next_frag; + if (!retptr) { + shm_unlock_mem(shm); + return NULL; + } + *next_frag = retptr->free_frag_next; + retptr->free_frag_next = NULL; + + shm->usage.used_bytes += retptr->free_frag_size; + shm->usage.free_bytes -= retptr->free_frag_size; + + // Track new fragment being allocated ... + if (shm->usage.nfrags_allocated < SHM_NUM_SMALL_FRAGS) { + frag_hdr *newfrag = &shm->shm_allocated_frag[shm->usage.nfrags_allocated]; + newfrag->frag_addr = retptr; + newfrag->frag_size = retptr->free_frag_size; + shm->usage.nfrags_allocated++; + } + + shm_unlock_mem(shm); + return (void *)retptr; +} + /* * ----------------------------------------------------------------------------- * platform_trace_large_frags() - Walk through large-fragments tracking array @@ -1058,13 +1676,14 @@ platform_shm_find_large(shmem_heap *shm, static int platform_trace_large_frags(shmem_heap *shm) { - int local_in_use = 0; // Tracked while iterating in this fn, locally - shm_large_frag_info *frag = shm->shm_large_frags; + int local_in_use = 0; // Tracked while iterating in this fn, locally + shm_large_frag_info *frag = shm->shm_largemem_frags; + + const threadid tid = platform_get_tid(); - threadid thread_tid = platform_get_tid(); - bool print_new_line = false; // Walk the tracked-fragments array looking for an in-use fragment - for (int fctr = 0; fctr < ARRAY_SIZE(shm->shm_large_frags); fctr++, frag++) { + for (int fctr = 0; fctr < ARRAY_SIZE(shm->shm_largemem_frags); + fctr++, frag++) { if (!frag->frag_addr) { continue; } @@ -1087,21 +1706,17 @@ platform_trace_large_frags(shmem_heap *shm) frag->frag_freed_by_tid); } - if (!print_new_line) { - platform_error_log("\n**** [TID=%lu] Large fragment usage " - "diagnostics:\n", - thread_tid); - print_new_line = true; - } - - platform_error_log(" **** [TID=%lu] Fragment at slot=%d, addr=%p" - ", size=%lu (%s) is in-use, allocated_to_pid=%d" - ", allocated_to_tid=%lu\n", - thread_tid, + platform_error_log(" **** [TID=%lu] Large Fragment at slot=%d" + ", addr=%p, size=%lu (%s)." + " Allocated at func=%s, line=%d, is in-use" + ", allocated_to_pid=%d, allocated_to_tid=%lu\n", + tid, fctr, frag->frag_addr, frag->frag_size, size_str(frag->frag_size), + frag->frag_func, + frag->frag_line, frag->frag_allocated_to_pid, frag->frag_allocated_to_tid); } @@ -1163,7 +1778,7 @@ void platform_shm_set_splinterdb_handle(platform_heap_id heap_id, void *addr) { debug_assert(platform_shm_heap_id_valid(heap_id)); - shmem_heap *shm = platform_heap_id_to_shmaddr(heap_id); + shmem_heap *shm = platform_heap_id_to_shmaddr(heap_id); shm->shm_splinterdb_handle = addr; } @@ -1283,6 +1898,29 @@ platform_shm_next_free_addr(platform_heap_id heap_id) return (platform_heap_id_to_shmaddr(heap_id)->shm_next); } +bool +platform_shm_next_free_cacheline_aligned(platform_heap_id heap_id) +{ + return ( + (((uint64)platform_shm_next_free_addr(heap_id)) % PLATFORM_CACHELINE_SIZE) + == 0); +} + +/* + * Test helper-method: Find out if a memory fragment is found in any + * free-lists? + * + * Returns - If found, the 'size' that free-list tracks. 0, otherwise. + */ +size_t +platform_shm_find_freed_frag(platform_heap_id heap_id, + const void *addr, + size_t *freed_frag_size) +{ + shmem_heap *shm = platform_heap_id_to_shmaddr(heap_id); + return platform_shm_find_frag_in_freed_lists(shm, addr, freed_frag_size); +} + static void platform_shm_trace_allocs(shmem_heap *shm, const size_t size, @@ -1291,7 +1929,7 @@ platform_shm_trace_allocs(shmem_heap *shm, const char *objname, const char *func, const char *file, - const int lineno) + const int line) { platform_default_log(" [OS-pid=%d,ThreadID=%lu, %s:%d::%s()] " "-> %s: %s size=%lu bytes (%s)" @@ -1300,7 +1938,7 @@ platform_shm_trace_allocs(shmem_heap *shm, platform_getpid(), platform_get_tid(), file, - lineno, + line, func, __func__, verb, diff --git a/src/platform_linux/shmem.h b/src/platform_linux/shmem.h index c9e33924..86cf17a1 100644 --- a/src/platform_linux/shmem.h +++ b/src/platform_linux/shmem.h @@ -6,33 +6,47 @@ #include #include +/* + * All memory allocations of this size or larger will be tracked in the + * large fragment tracker array. For large inserts workload, we allocate large + * memory chunks for fingerprint array, which is more than a MiB. For scans, + * splinterdb_iterator_init() allocates memory for an iterator which is ~42+KiB. + * Set this to a lower value so we can re-cycle free fragments for iterators + * also. (Keep the limit same for release/debug builds to get consistent + * behaviour.) + */ +#define SHM_LARGE_FRAG_SIZE (32 * KiB) + platform_status platform_shmcreate(size_t size, platform_heap_id *heap_id); -void +platform_status platform_shmdestroy(platform_heap_id *heap_id); /* * Allocate memory fragment from the shared memory of requested 'size'. */ void * -platform_shm_alloc(platform_heap_id hid, - const size_t size, - const char *objname, - const char *func, - const char *file, - const int lineno); +platform_shm_alloc(platform_memfrag *memfrag, // IN/OUT + platform_heap_id hid, + const size_t size, + const char *objname, + const char *func, + const char *file, + const int line); /* - * Free the memory fragment at 'ptr' address. + * Free the memory fragment of 'size' bytes at 'ptr' address. This interface + * deals with free of both small and large-memory fragments. */ void platform_shm_free(platform_heap_id hid, void *ptr, + const size_t size, const char *objname, const char *func, const char *file, - const int lineno); + const int line); /* * Reallocate the memory (fragment) at 'oldptr' of size 'oldsize' bytes. @@ -44,13 +58,11 @@ platform_shm_free(platform_heap_id hid, * Returns ptr to re-allocated memory of 'newsize' bytes. */ void * -platform_shm_realloc(platform_heap_id hid, - void *oldptr, - const size_t oldsize, - const size_t newsize, - const char *func, - const char *file, - const int lineno); +platform_shm_realloc(platform_memfrag *mf, + size_t newsize, + const char *func, + const char *file, + const int line); void platform_shm_tracing_init(const bool trace_shmem, @@ -101,7 +113,12 @@ void * platform_shm_next_free_addr(platform_heap_id heap_id); bool -platform_valid_addr_in_heap(platform_heap_id heap_id, const void *addr); +platform_isvalid_addr_in_heap(platform_heap_id heap_id, const void *addr); -void * -platform_heap_get_splinterdb_handle(platform_heap_id heap_id); +bool +platform_shm_next_free_cacheline_aligned(platform_heap_id heap_id); + +size_t +platform_shm_find_freed_frag(platform_heap_id heap_id, + const void *addr, + size_t *freed_frag_size); diff --git a/src/rc_allocator.c b/src/rc_allocator.c index 1fdc33ec..bd6fce27 100644 --- a/src/rc_allocator.c +++ b/src/rc_allocator.c @@ -240,13 +240,16 @@ rc_allocator_init_meta_page(rc_allocator *al) platform_assert((1 + RC_ALLOCATOR_MAX_ROOT_IDS) * al->cfg->io_cfg->page_size <= al->cfg->io_cfg->extent_size); - al->meta_page = TYPED_ALIGNED_ZALLOC(al->heap_id, - al->cfg->io_cfg->page_size, - al->meta_page, - al->cfg->io_cfg->page_size); + platform_memfrag memfrag_meta_page; + al->meta_page = TYPED_ALIGNED_ZALLOC_MF(&memfrag_meta_page, + al->heap_id, + al->cfg->io_cfg->page_size, + al->meta_page, + al->cfg->io_cfg->page_size); if (al->meta_page == NULL) { return STATUS_NO_MEMORY; } + al->meta_page_mf_size = memfrag_size(&memfrag_meta_page); memset(al->meta_page->splinters, INVALID_ALLOCATOR_ROOT_ID, @@ -356,7 +359,7 @@ rc_allocator_init(rc_allocator *al, rc = platform_buffer_init(&al->bh, buffer_size); if (!SUCCESS(rc)) { platform_mutex_destroy(&al->lock); - platform_free(al->heap_id, al->meta_page); + platform_free_mem(al->heap_id, al->meta_page, al->meta_page_mf_size); platform_error_log("Failed to create buffer for ref counts\n"); return STATUS_NO_MEMORY; } @@ -388,7 +391,7 @@ rc_allocator_deinit(rc_allocator *al) platform_buffer_deinit(&al->bh); al->ref_count = NULL; platform_mutex_destroy(&al->lock); - platform_free(al->heap_id, al->meta_page); + platform_free_mem(al->heap_id, al->meta_page, al->meta_page_mf_size); } /* @@ -438,7 +441,7 @@ rc_allocator_mount(rc_allocator *al, buffer_size = ROUNDUP(buffer_size, cfg->io_cfg->page_size); status = platform_buffer_init(&al->bh, buffer_size); if (!SUCCESS(status)) { - platform_free(al->heap_id, al->meta_page); + platform_free_mem(al->heap_id, al->meta_page, al->meta_page_mf_size); platform_mutex_destroy(&al->lock); platform_error_log("Failed to create buffer to load ref counts\n"); return STATUS_NO_MEMORY; diff --git a/src/rc_allocator.h b/src/rc_allocator.h index 54ed22eb..aeb12356 100644 --- a/src/rc_allocator.h +++ b/src/rc_allocator.h @@ -73,6 +73,7 @@ typedef struct rc_allocator { // Stats -- not distributed for now rc_allocator_stats stats; + size_t meta_page_mf_size; } rc_allocator; platform_status diff --git a/src/routing_filter.c b/src/routing_filter.c index 80af6e92..d621a2af 100644 --- a/src/routing_filter.c +++ b/src/routing_filter.c @@ -400,8 +400,8 @@ routing_filter_add(cache *cc, ROUTING_FPS_PER_PAGE + // old_fp_buffer ROUTING_FPS_PER_PAGE / 32; // encoding_buffer debug_assert(temp_buffer_count < 100000000); - uint32 *temp = - TYPED_ARRAY_ZALLOC(PROCESS_PRIVATE_HEAP_ID, temp, temp_buffer_count); + uint32 *temp = TYPED_ARRAY_ZALLOC_MF( + NULL, PROCESS_PRIVATE_HEAP_ID, temp, temp_buffer_count); if (temp == NULL) { return STATUS_NO_MEMORY; @@ -591,7 +591,15 @@ routing_filter_add(cache *cc, // Set the index_no // ALEX: for now the indices must fit in a single extent - debug_assert(index_no / addrs_per_page < pages_per_extent); + platform_assert((index_no / addrs_per_page < pages_per_extent), + "index_no=%u, addrs_per_page=%lu" + ", (index_no / addrs_per_page)=%lu" + ", pages_per_extent=%lu", + index_no, + addrs_per_page, + (index_no / addrs_per_page), + pages_per_extent); + uint64 index_page_no = index_no / addrs_per_page; uint64 index_offset = index_no % addrs_per_page; uint64 *index_cursor = (uint64 *)(index_page[index_page_no]->data); @@ -630,7 +638,7 @@ routing_filter_add(cache *cc, mini_release(&mini, NULL_KEY); - platform_free(PROCESS_PRIVATE_HEAP_ID, temp); + platform_free_heap(PROCESS_PRIVATE_HEAP_ID, temp); return STATUS_OK; } @@ -691,9 +699,17 @@ routing_filter_estimate_unique_fp(cache *cc, } uint32 buffer_size = total_num_fp / 12; uint32 alloc_size = buffer_size + cfg->index_size; - uint32 *local = TYPED_ARRAY_ZALLOC(hid, local, alloc_size * sizeof(uint32)); - uint32 *fp_arr = local; - uint32 *count = local + buffer_size; + size_t size = (alloc_size * sizeof(uint32)); + fp_hdr local; + uint32 *fp_arr = fingerprint_init(&local, hid, size); + if (!fp_arr) { + platform_error_log("Initialization of fingerprint for %lu tuples" + " failed, likely due to insufficient memory.", + size); + return 0; + } + + uint32 *count = fingerprint_nth(&local, buffer_size); uint32 src_fp_no = 0; uint32 dst_fp_no = 0; @@ -804,7 +820,7 @@ routing_filter_estimate_unique_fp(cache *cc, num_unique++; } - platform_free(hid, local); + fingerprint_deinit(hid, &local); return num_unique * 16; } diff --git a/src/shard_log.c b/src/shard_log.c index 7249bb6e..f2ab0f14 100644 --- a/src/shard_log.c +++ b/src/shard_log.c @@ -326,9 +326,11 @@ shard_log_compare(const void *p1, const void *p2, void *unused) log_handle * log_create(cache *cc, log_config *lcfg, platform_heap_id hid) { + platform_memfrag memfrag_slog; shard_log_config *cfg = (shard_log_config *)lcfg; shard_log *slog = TYPED_MALLOC(hid, slog); - platform_status rc = shard_log_init(slog, cc, cfg); + slog->mf_size = memfrag_size(&memfrag_slog); + platform_status rc = shard_log_init(slog, cc, cfg); platform_assert(SUCCESS(rc)); return (log_handle *)slog; } @@ -374,11 +376,21 @@ shard_log_iterator_init(cache *cc, extent_addr = next_extent_addr; } + size_t num_contents = 0; + finished_first_pass: + num_contents = (num_valid_pages * shard_log_page_size(cfg)); + + platform_memfrag memfrag; + itor->contents = + TYPED_ARRAY_MALLOC_MF(&memfrag, hid, itor->contents, num_contents); + debug_assert(itor->contents); + itor->contents_mf_size = memfrag_size(&memfrag); - itor->contents = TYPED_ARRAY_MALLOC( - hid, itor->contents, num_valid_pages * shard_log_page_size(cfg)); - itor->entries = TYPED_ARRAY_MALLOC(hid, itor->entries, itor->num_entries); + itor->entries = + TYPED_ARRAY_MALLOC_MF(&memfrag, hid, itor->entries, itor->num_entries); + debug_assert(itor->entries); + itor->entries_mf_size = memfrag_size(&memfrag); // traverse the log extents again and copy the kv pairs log_entry *cursor = (log_entry *)itor->contents; @@ -427,8 +439,13 @@ shard_log_iterator_init(cache *cc, void shard_log_iterator_deinit(platform_heap_id hid, shard_log_iterator *itor) { - platform_free(hid, itor->contents); - platform_free(hid, itor->entries); + platform_free_mem(hid, itor->contents, itor->contents_mf_size); + itor->contents = NULL; + itor->contents_mf_size = 0; + + platform_free_mem(hid, itor->entries, itor->entries_mf_size); + itor->entries = NULL; + itor->entries_mf_size = 0; } void diff --git a/src/shard_log.h b/src/shard_log.h index 980c291a..771ccf0a 100644 --- a/src/shard_log.h +++ b/src/shard_log.h @@ -42,6 +42,7 @@ typedef struct shard_log { uint64 addr; uint64 meta_head; uint64 magic; + size_t mf_size; } shard_log; typedef struct log_entry log_entry; @@ -53,6 +54,8 @@ typedef struct shard_log_iterator { log_entry **entries; uint64 num_entries; uint64 pos; + size_t contents_mf_size; // # bytes allocated to contents array + size_t entries_mf_size; // # bytes allocated to entries array } shard_log_iterator; /* diff --git a/src/splinterdb.c b/src/splinterdb.c index b767144b..3c5ceb40 100644 --- a/src/splinterdb.c +++ b/src/splinterdb.c @@ -52,6 +52,7 @@ typedef struct splinterdb { platform_heap_id heap_id; data_config *data_cfg; bool we_created_heap; + size_t mf_size; } splinterdb; @@ -256,13 +257,14 @@ splinterdb_create_or_open(const splinterdb_config *kvs_cfg, // IN (open_existing ? "open existing" : "initialize"), kvs_cfg->filename, platform_status_to_string(status)); - goto deinit_kvhandle; + goto heap_create_failed; } we_created_heap = TRUE; } platform_assert(kvs_out != NULL); + platform_memfrag memfrag_kvs; kvs = TYPED_ZALLOC(use_this_heap_id, kvs); if (kvs == NULL) { status = STATUS_NO_MEMORY; @@ -270,6 +272,7 @@ splinterdb_create_or_open(const splinterdb_config *kvs_cfg, // IN } // Remember, so at close() we only destroy heap if we created it here. kvs->we_created_heap = we_created_heap; + kvs->mf_size = memfrag_size(&memfrag_kvs); // All memory allocation after this call should -ONLY- use heap handles // from the handle to the running Splinter instance; i.e. 'kvs'. @@ -280,7 +283,7 @@ splinterdb_create_or_open(const splinterdb_config *kvs_cfg, // IN (open_existing ? "open existing" : "initialize"), kvs_cfg->filename, platform_status_to_string(status)); - goto deinit_kvhandle; + goto init_config_failed; } // All future memory allocation should come from shared memory, if so @@ -376,6 +379,8 @@ splinterdb_create_or_open(const splinterdb_config *kvs_cfg, // IN deinit_iohandle: io_handle_deinit(&kvs->io_handle); io_handle_init_failed: +init_config_failed: + platform_free(&memfrag_kvs); deinit_kvhandle: // Depending on the place where a configuration / setup error lead // us to here via a 'goto', heap_id handle, if in use, may be in a @@ -384,10 +389,10 @@ splinterdb_create_or_open(const splinterdb_config *kvs_cfg, // IN // => Caller did not setup a platform-heap on entry. debug_assert(kvs_cfg->heap_id == NULL); - platform_free(use_this_heap_id, kvs); platform_heap_destroy(&use_this_heap_id); } +heap_create_failed: return platform_status_to_int(status); } @@ -421,7 +426,7 @@ splinterdb_open(const splinterdb_config *cfg, // IN * None. *----------------------------------------------------------------------------- */ -void +int splinterdb_close(splinterdb **kvs_in) // IN { splinterdb *kvs = *kvs_in; @@ -445,11 +450,18 @@ splinterdb_close(splinterdb **kvs_in) // IN // Free resources carefully to avoid ASAN-test failures platform_heap_id heap_id = kvs->heap_id; bool we_created_heap = kvs->we_created_heap; - platform_free(kvs->heap_id, kvs); + platform_free_mem(kvs->heap_id, kvs, kvs->mf_size); + platform_status rc = STATUS_OK; if (we_created_heap) { - platform_heap_destroy(&heap_id); + rc = platform_heap_destroy(&heap_id); } *kvs_in = (splinterdb *)NULL; + + // Report any errors encountered while dismanting shared memory + // Usually, one might expect to find stray fragments lying around + // which were not freed at run-time. The shared-memory dismantling + // will check and report such violations. + return (SUCCESS(rc) ? 0 : 1); } @@ -650,6 +662,7 @@ struct splinterdb_iterator { trunk_range_iterator sri; platform_status last_rc; const splinterdb *parent; + size_t mf_size; }; int @@ -658,12 +671,14 @@ splinterdb_iterator_init(const splinterdb *kvs, // IN slice user_start_key // IN ) { + platform_memfrag memfrag_it; splinterdb_iterator *it = TYPED_MALLOC(kvs->spl->heap_id, it); if (it == NULL) { platform_error_log("TYPED_MALLOC error\n"); return platform_status_to_int(STATUS_NO_MEMORY); } it->last_rc = STATUS_OK; + it->mf_size = memfrag_size(&memfrag_it); trunk_range_iterator *range_itor = &(it->sri); key start_key; @@ -682,7 +697,8 @@ splinterdb_iterator_init(const splinterdb *kvs, // IN greater_than_or_equal, UINT64_MAX); if (!SUCCESS(rc)) { - platform_free(kvs->spl->heap_id, *iter); + // Backout: Release memory alloc'ed for iterator above. + platform_free(&memfrag_it); return platform_status_to_int(rc); } it->parent = kvs; @@ -698,7 +714,7 @@ splinterdb_iterator_deinit(splinterdb_iterator *iter) trunk_range_iterator_deinit(range_itor); trunk_handle *spl = range_itor->spl; - platform_free(spl->heap_id, range_itor); + platform_free_mem(spl->heap_id, iter, iter->mf_size); } _Bool diff --git a/src/task.c b/src/task.c index 566b2f8d..0a9d5a3b 100644 --- a/src/task.c +++ b/src/task.c @@ -120,7 +120,6 @@ task_deallocate_threadid(task_system *ts, threadid tid) } } - /* * Return the max thread-index across all active tasks. * Mainly intended as a testing hook. @@ -198,6 +197,7 @@ typedef struct { task_system *ts; threadid tid; platform_heap_id heap_id; + size_t mf_size; } thread_invoke; /* @@ -230,7 +230,8 @@ task_invoke_with_hooks(void *func_and_args) // For background threads, also, IO-deregistration will happen here. task_deregister_this_thread(thread_started->ts); - platform_free(thread_started->heap_id, func_and_args); + platform_free_mem( + thread_started->heap_id, thread_started, thread_started->mf_size); } /* @@ -256,16 +257,19 @@ task_create_thread_with_hooks(platform_thread *thread, return STATUS_BUSY; } + platform_memfrag memfrag_scratch = {0}; if (0 < scratch_size) { - char *scratch = TYPED_MANUAL_ZALLOC(ts->heap_id, scratch, scratch_size); + char *scratch = TYPED_ARRAY_MALLOC(ts->heap_id, scratch, scratch_size); if (scratch == NULL) { ret = STATUS_NO_MEMORY; goto dealloc_tid; } - ts->thread_scratch[newtid] = scratch; + ts->thread_scratch[newtid] = scratch; + ts->scratch_mf_size[newtid] = memfrag_size(&memfrag_scratch); } - thread_invoke *thread_to_create = TYPED_ZALLOC(hid, thread_to_create); + platform_memfrag memfrag_thread_to_create = {0}; + thread_invoke *thread_to_create = TYPED_ZALLOC(hid, thread_to_create); if (thread_to_create == NULL) { ret = STATUS_NO_MEMORY; goto free_scratch; @@ -276,6 +280,7 @@ task_create_thread_with_hooks(platform_thread *thread, thread_to_create->heap_id = hid; thread_to_create->ts = ts; thread_to_create->tid = newtid; + thread_to_create->mf_size = memfrag_size(&memfrag_thread_to_create); ret = platform_thread_create( thread, detached, task_invoke_with_hooks, thread_to_create, hid); @@ -286,9 +291,9 @@ task_create_thread_with_hooks(platform_thread *thread, return ret; free_thread: - platform_free(hid, thread_to_create); + platform_free(&memfrag_thread_to_create); free_scratch: - platform_free(ts->heap_id, ts->thread_scratch[newtid]); + platform_free(&memfrag_scratch); dealloc_tid: task_deallocate_threadid(ts, newtid); return ret; @@ -372,13 +377,15 @@ task_register_thread(task_system *ts, "Scratch space should not yet exist for tid %lu.", thread_tid); + platform_memfrag memfrag_scratch = {0}; if (0 < scratch_size) { - char *scratch = TYPED_MANUAL_ZALLOC(ts->heap_id, scratch, scratch_size); + char *scratch = TYPED_ARRAY_ZALLOC(ts->heap_id, scratch, scratch_size); if (scratch == NULL) { task_deallocate_threadid(ts, thread_tid); return STATUS_NO_MEMORY; } - ts->thread_scratch[thread_tid] = scratch; + ts->thread_scratch[thread_tid] = scratch; + ts->scratch_mf_size[thread_tid] = memfrag_size(&memfrag_scratch); } platform_set_tid(thread_tid); @@ -414,8 +421,9 @@ task_deregister_thread(task_system *ts, // scratch space. So, check before trying to free memory. void *scratch = ts->thread_scratch[tid]; if (scratch != NULL) { - platform_free(ts->heap_id, scratch); - ts->thread_scratch[tid] = NULL; + platform_free_mem(ts->heap_id, scratch, ts->scratch_mf_size[tid]); + ts->thread_scratch[tid] = NULL; + ts->scratch_mf_size[tid] = 0; } task_system_io_deregister_thread(ts); @@ -464,7 +472,6 @@ task_group_get_next_task(task_group *group) platform_assert((outstanding_tasks == 1), "outstanding_tasks=%lu\n", outstanding_tasks); - ; } return assigned_task; @@ -520,16 +527,18 @@ task_worker_thread(void *arg) while (group->bg.stop != TRUE) { /* Invariant: we hold the lock */ - task *task_to_run = NULL; - task_to_run = task_group_get_next_task(group); + task *task_to_run = task_group_get_next_task(group); if (task_to_run != NULL) { __sync_fetch_and_add(&group->current_executing_tasks, 1); task_group_unlock(group); const threadid tid = platform_get_tid(); group->stats[tid].total_bg_task_executions++; + task_group_run_task(group, task_to_run); - platform_free(group->ts->heap_id, task_to_run); + platform_free_mem( + group->ts->heap_id, task_to_run, task_to_run->mf_size); + rc = task_group_lock(group); platform_assert(SUCCESS(rc)); __sync_fetch_and_sub(&group->current_executing_tasks, 1); @@ -627,13 +636,15 @@ task_enqueue(task_system *ts, void *arg, bool32 at_head) { - task *new_task = TYPED_ZALLOC(ts->heap_id, new_task); + platform_memfrag memfrag_new_task = {0}; + task *new_task = TYPED_ZALLOC(ts->heap_id, new_task); if (new_task == NULL) { return STATUS_NO_MEMORY; } - new_task->func = func; - new_task->arg = arg; - new_task->ts = ts; + new_task->func = func; + new_task->arg = arg; + new_task->ts = ts; + new_task->mf_size = memfrag_size(&memfrag_new_task); task_group *group = &ts->group[type]; task_queue *tq = &group->tq; @@ -641,7 +652,7 @@ task_enqueue(task_system *ts, rc = task_group_lock(group); if (!SUCCESS(rc)) { - platform_free(ts->heap_id, new_task); + platform_free(&memfrag_new_task); return rc; } @@ -686,8 +697,7 @@ task_group_perform_one(task_group *group, uint64 queue_scale_percent) platform_status rc; task *assigned_task = NULL; - /* We do the queue size comparison in this round-about way to avoid - integer overflow. */ + /* Round-about queue-size comparison done to avoid integer overflow */ if (queue_scale_percent && 100 * group->current_waiting_tasks / queue_scale_percent < group->bg.num_threads) @@ -721,7 +731,8 @@ task_group_perform_one(task_group *group, uint64 queue_scale_percent) group->stats[tid].total_fg_task_executions++; task_group_run_task(group, assigned_task); __sync_fetch_and_sub(&group->current_executing_tasks, 1); - platform_free(group->ts->heap_id, assigned_task); + platform_free_mem( + group->ts->heap_id, assigned_task, assigned_task->mf_size); } else { rc = STATUS_TIMEDOUT; } @@ -871,7 +882,8 @@ task_system_create(platform_heap_id hid, return rc; } - task_system *ts = TYPED_ZALLOC(hid, ts); + platform_memfrag memfrag_ts = {0}; + task_system *ts = TYPED_ZALLOC(hid, ts); if (ts == NULL) { *system = NULL; return STATUS_NO_MEMORY; @@ -879,6 +891,7 @@ task_system_create(platform_heap_id hid, ts->cfg = cfg; ts->ioh = ioh; ts->heap_id = hid; + ts->mf_size = memfrag_size(&memfrag_ts); task_init_tid_bitmask(&ts->tid_bitmask); // task initialization @@ -941,7 +954,7 @@ task_system_destroy(platform_heap_id hid, task_system **ts_in) tid, ts->tid_bitmask); } - platform_free(hid, ts); + platform_free_mem(hid, ts, ts->mf_size); *ts_in = (task_system *)NULL; } diff --git a/src/task.h b/src/task.h index 1b3bea17..a57dc067 100644 --- a/src/task.h +++ b/src/task.h @@ -17,6 +17,7 @@ typedef struct task { void *arg; task_system *ts; timestamp enqueue_time; + size_t mf_size; } task; /* @@ -121,11 +122,14 @@ struct task_system { // max thread id so far. threadid max_tid; void *thread_scratch[MAX_THREADS]; + size_t scratch_mf_size[MAX_THREADS]; + ; // task groups task_group group[NUM_TASK_TYPES]; int hook_init_done; int num_hooks; + size_t mf_size; task_hook hooks[TASK_MAX_HOOKS]; }; diff --git a/src/trunk.c b/src/trunk.c index 4bd0c415..a9f7c792 100644 --- a/src/trunk.c +++ b/src/trunk.c @@ -696,13 +696,15 @@ typedef enum trunk_compaction_type { /* *----------------------------------------------------------------------------- - * Compact Bundle Requests + * Compact Bundle Requests: Arguments to a compact_bundle job. * * Arguments and scratch space to compact bundle number in the * node[s] at height spanning the range [start_key, end_key). * is the address of the node where the bundle was initially created, and is * used to revisit the node to create the iterators which become inputs to the * compaction. + * Memory for the fingerprint array will be allocated, when needed, + * and hung off of breq_fingerprint. *----------------------------------------------------------------------------- */ struct trunk_compact_bundle_req { @@ -725,7 +727,12 @@ struct trunk_compact_bundle_req { uint64 output_pivot_kv_byte_count[TRUNK_MAX_PIVOTS]; uint64 tuples_reclaimed; uint64 kv_bytes_reclaimed; - uint32 *fp_arr; + uint32 *fp_arr_unused; + + fp_hdr breq_fingerprint; + uint64 num_tuples; + uint64 enq_line; // Where task was enqueued + size_t mf_size; }; // an iterator which skips masked pivots @@ -3449,15 +3456,23 @@ trunk_memtable_compact_and_build_filter(trunk_handle *spl, greater_than_or_equal, FALSE, FALSE); - btree_pack_req req; - btree_pack_req_init(&req, - spl->cc, - &spl->cfg.btree_cfg, - itor, - spl->cfg.max_tuples_per_node, - spl->cfg.filter_cfg.hash, - spl->cfg.filter_cfg.seed, - spl->heap_id); + btree_pack_req req; + platform_status rc = btree_pack_req_init(&req, + spl->cc, + &spl->cfg.btree_cfg, + itor, + spl->cfg.max_tuples_per_node, + spl->cfg.filter_cfg.hash, + spl->cfg.filter_cfg.seed, + spl->heap_id); + if (!SUCCESS(rc)) { + platform_error_log("[%d] btree_pack_req_init failed: %s\n", + __LINE__, + platform_status_to_string(rc)); + trunk_memtable_iterator_deinit(spl, &btree_itor, FALSE, FALSE); + return NULL; + } + req.line = __LINE__; uint64 pack_start; if (spl->cfg.use_stats) { spl->stats[tid].root_compactions++; @@ -3488,24 +3503,36 @@ trunk_memtable_compact_and_build_filter(trunk_handle *spl, filter_build_start = platform_get_timestamp(); } - cmt->req = TYPED_ZALLOC(spl->heap_id, cmt->req); - cmt->req->spl = spl; - cmt->req->fp_arr = req.fingerprint_arr; - cmt->req->type = TRUNK_COMPACTION_TYPE_MEMTABLE; - uint32 *dup_fp_arr = - TYPED_ARRAY_MALLOC(spl->heap_id, dup_fp_arr, req.num_tuples); - memmove(dup_fp_arr, cmt->req->fp_arr, req.num_tuples * sizeof(uint32)); - routing_filter empty_filter = {0}; + platform_memfrag memfrag_req = {0}; + cmt->req = TYPED_ZALLOC_MF(&memfrag_req, spl->heap_id, cmt->req); + cmt->req->spl = spl; + cmt->req->type = TRUNK_COMPACTION_TYPE_MEMTABLE; + cmt->req->mf_size = memfrag_size(&memfrag_req); - platform_status rc = routing_filter_add(spl->cc, - &spl->cfg.filter_cfg, - &empty_filter, - &cmt->filter, - cmt->req->fp_arr, - req.num_tuples, - 0); + // Alias to the BTree-pack's fingerprint, for use further below. + fingerprint_alias(&cmt->req->breq_fingerprint, &req.fingerprint); + // Save off the fingerprint, before building the routing filter, below + fp_hdr dup_fp_arr; + if (!fingerprint_init(&dup_fp_arr, spl->heap_id, req.num_tuples)) { + rc = STATUS_NO_MEMORY; + platform_assert(SUCCESS(rc), + "Init of duplicate fingerprint array failed" + ", likely due to insufficient memory."); + } + fingerprint_copy(&dup_fp_arr, &cmt->req->breq_fingerprint); + + routing_filter empty_filter = {0}; + + rc = routing_filter_add(spl->cc, + &spl->cfg.filter_cfg, + &empty_filter, + &cmt->filter, + fingerprint_start(&cmt->req->breq_fingerprint), + req.num_tuples, + 0); platform_assert(SUCCESS(rc)); + if (spl->cfg.use_stats) { spl->stats[tid].root_filter_time_ns += platform_timestamp_elapsed(filter_build_start); @@ -3513,8 +3540,15 @@ trunk_memtable_compact_and_build_filter(trunk_handle *spl, spl->stats[tid].root_filter_tuples += req.num_tuples; } + // Will free memory allocated for BTree-pack's fingerprint object + fingerprint_unalias(&cmt->req->breq_fingerprint); btree_pack_req_deinit(&req, spl->heap_id); - cmt->req->fp_arr = dup_fp_arr; + + // Restore old-copy of fingerprint, and free old-copy's memory + // As dup_fp_arr is on-stack and its fingerprint has been moved over, + // there is no further need to deinit(dup_fp_arr); + fingerprint_move(&cmt->req->breq_fingerprint, &dup_fp_arr); + if (spl->cfg.use_stats) { uint64 comp_time = platform_timestamp_elapsed(comp_start); spl->stats[tid].root_compaction_time_ns += comp_time; @@ -3743,6 +3777,8 @@ trunk_memtable_incorporate_and_flush(trunk_handle *spl, req->height, req->bundle_no); trunk_close_log_stream_if_enabled(spl, &stream); + + req->enq_line = __LINE__; task_enqueue( spl->ts, TASK_TYPE_NORMAL, trunk_bundle_build_filters, req, TRUE); @@ -3915,6 +3951,10 @@ trunk_dec_filter(trunk_handle *spl, routing_filter *filter) /* * Scratch space used for filter building. + * Memory for the fingerprint array is previously allocated, elsewhere. + * As part of preparing for the job of building a filter, filter_fingerprint + * will be initialized to point to this allocated memory. + * See trunk_filter_req_init(). */ typedef struct trunk_filter_scratch { key_buffer start_key; @@ -3924,16 +3964,40 @@ typedef struct trunk_filter_scratch { routing_filter old_filter[TRUNK_MAX_PIVOTS]; uint16 value[TRUNK_MAX_PIVOTS]; routing_filter filter[TRUNK_MAX_PIVOTS]; - uint32 *fp_arr; + fp_hdr filter_fingerprint; + uint64 num_tuples; } trunk_filter_scratch; -static inline void +/* + * Initialize fingerprint object in input 'filter_req'. + * Here, we use the fingerprint object [already] initialized in the + * compact_req, and alias it (point to it) from filter_req. + * + * Returns: TRUE - if aliasing was successful; FALSE - otherwise. + */ +static inline bool trunk_filter_scratch_init(trunk_compact_bundle_req *compact_req, - trunk_filter_scratch *filter_scratch) + trunk_filter_scratch *filter_req) +{ + debug_assert(fingerprint_is_empty(&filter_req->filter_fingerprint), + "addr=%p, size=%lu, init'ed at line=%u", + fingerprint_start(&filter_req->filter_fingerprint), + fingerprint_size(&filter_req->filter_fingerprint), + fingerprint_line(&filter_req->filter_fingerprint)); + + ZERO_CONTENTS(filter_req); + // Returns start of aliased fingerprint + uint32 *fp_start = fingerprint_alias(&filter_req->filter_fingerprint, + &compact_req->breq_fingerprint); + return (fp_start != NULL); +} + +static inline void +trunk_filter_req_fp_deinit(trunk_filter_scratch *filter_req) { - ZERO_CONTENTS(filter_scratch); - filter_scratch->fp_arr = compact_req->fp_arr; + fingerprint_unalias(&filter_req->filter_fingerprint); } + static inline bool32 trunk_compact_bundle_node_has_split(trunk_handle *spl, trunk_compact_bundle_req *req, @@ -4035,7 +4099,7 @@ trunk_build_filter_should_reenqueue(trunk_compact_bundle_req *req, return FALSE; } -static inline void +static inline bool trunk_prepare_build_filter(trunk_handle *spl, trunk_compact_bundle_req *compact_req, trunk_filter_scratch *filter_scratch, @@ -4045,7 +4109,7 @@ trunk_prepare_build_filter(trunk_handle *spl, platform_assert(compact_req->height == height); platform_assert(compact_req->bundle_no == trunk_start_bundle(spl, node)); - trunk_filter_scratch_init(compact_req, filter_scratch); + bool fp_aliased = trunk_filter_scratch_init(compact_req, filter_scratch); uint16 num_children = trunk_num_children(spl, node); for (uint16 pivot_no = 0; pivot_no < num_children; pivot_no++) { @@ -4070,6 +4134,8 @@ trunk_prepare_build_filter(trunk_handle *spl, key_buffer_init_from_key( &filter_scratch->end_key, spl->heap_id, trunk_max_key(spl, node)); filter_scratch->height = height; + + return fp_aliased; } static inline void @@ -4112,10 +4178,11 @@ trunk_build_filters(trunk_handle *spl, routing_filter old_filter = filter_scratch->old_filter[pos]; uint32 fp_start, fp_end; uint64 generation = compact_req->pivot_generation[pos]; + trunk_process_generation_to_fp_bounds( spl, compact_req, generation, &fp_start, &fp_end); - uint32 *fp_arr = filter_scratch->fp_arr + fp_start; - uint32 num_fingerprints = fp_end - fp_start; + + uint32 num_fingerprints = fp_end - fp_start; if (num_fingerprints == 0) { if (old_filter.addr != 0) { trunk_inc_filter(spl, &old_filter); @@ -4123,6 +4190,21 @@ trunk_build_filters(trunk_handle *spl, filter_scratch->filter[pos] = old_filter; continue; } + + // Early-check; otherwise, assert trips in fingerprint_nth() below. + debug_assert( + (fp_start < fingerprint_ntuples(&filter_scratch->filter_fingerprint)), + "Requested fp_start=%u should be < " + "fingerprint for %lu tuples." + " Compact bundle req type=%d, enqueued at line=%lu", + fp_start, + fingerprint_ntuples(&filter_scratch->filter_fingerprint), + compact_req->type, + compact_req->enq_line); + + uint32 *fp_arr = + fingerprint_nth(&filter_scratch->filter_fingerprint, fp_start); + routing_filter new_filter; routing_config *filter_cfg = &spl->cfg.filter_cfg; uint16 value = filter_scratch->value[pos]; @@ -4236,7 +4318,11 @@ trunk_bundle_build_filters(void *arg, void *scratch) trunk_compact_bundle_req *compact_req = (trunk_compact_bundle_req *)arg; trunk_handle *spl = compact_req->spl; + // Referenced in goto label; so declare at the top. + trunk_filter_scratch filter_scratch = {0}; + bool32 should_continue_build_filters = TRUE; + bool filter_req_inited = FALSE; while (should_continue_build_filters) { trunk_node node; platform_status rc = @@ -4256,6 +4342,7 @@ trunk_bundle_build_filters(void *arg, void *scratch) compact_req->height, compact_req->bundle_no); trunk_log_node_if_enabled(&stream, spl, &node); + if (trunk_build_filter_should_abort(compact_req, &node)) { trunk_log_stream_if_enabled(spl, &stream, "leaf split, aborting\n"); trunk_node_unget(spl->cc, &node); @@ -4268,6 +4355,7 @@ trunk_bundle_build_filters(void *arg, void *scratch) } if (trunk_build_filter_should_reenqueue(compact_req, &node)) { + compact_req->enq_line = __LINE__; task_enqueue(spl->ts, TASK_TYPE_NORMAL, trunk_bundle_build_filters, @@ -4281,8 +4369,11 @@ trunk_bundle_build_filters(void *arg, void *scratch) } debug_assert(trunk_verify_node(spl, &node)); - trunk_filter_scratch filter_scratch = {0}; - trunk_prepare_build_filter(spl, compact_req, &filter_scratch, &node); + + // prepare below will setup the fingerprint in this filter_scratch + // aliased to the fingerprint tracked by compact_req. + filter_req_inited = + trunk_prepare_build_filter(spl, compact_req, &filter_scratch, &node); trunk_node_unget(spl->cc, &node); trunk_build_filters(spl, compact_req, &filter_scratch); @@ -4373,6 +4464,10 @@ trunk_bundle_build_filters(void *arg, void *scratch) // cleanup filter_scratch key_buffer_deinit(&filter_scratch.start_key); key_buffer_deinit(&filter_scratch.end_key); + if (filter_req_inited) { + trunk_filter_req_fp_deinit(&filter_scratch); + filter_req_inited = FALSE; + } next_node: debug_assert(trunk_verify_node(spl, &node)); @@ -4401,10 +4496,14 @@ trunk_bundle_build_filters(void *arg, void *scratch) ; out: - platform_free(spl->heap_id, compact_req->fp_arr); + // Deallocate memory + if (filter_req_inited) { + trunk_filter_req_fp_deinit(&filter_scratch); + } + fingerprint_deinit(spl->heap_id, &compact_req->breq_fingerprint); key_buffer_deinit(&compact_req->start_key); key_buffer_deinit(&compact_req->end_key); - platform_free(spl->heap_id, compact_req); + platform_free_mem(spl->heap_id, compact_req, compact_req->mf_size); trunk_maybe_reclaim_space(spl); return; } @@ -4613,6 +4712,7 @@ trunk_compact_bundle_enqueue(trunk_handle *spl, key start_key = key_buffer_key(&req->start_key); key end_key = key_buffer_key(&req->end_key); platform_assert(trunk_key_compare(spl, start_key, end_key) < 0); + req->enq_line = __LINE__; return task_enqueue( spl->ts, TASK_TYPE_NORMAL, trunk_compact_bundle, req, FALSE); } @@ -4671,19 +4771,30 @@ trunk_flush(trunk_handle *spl, } // flush the branch references into a new bundle in the child - trunk_compact_bundle_req *req = TYPED_ZALLOC(spl->heap_id, req); - trunk_bundle *bundle = + platform_memfrag memfrag_req = {0}; + trunk_compact_bundle_req *req = TYPED_ZALLOC(spl->heap_id, req); + if (!req) { + platform_error_log( + "Failed to allocate memory for trunk_compact_bundle_req{}."); + return STATUS_NO_MEMORY; + } + req->mf_size = memfrag_size(&memfrag_req); + + trunk_bundle *bundle = trunk_flush_into_bundle(spl, parent, &new_child, pdata, req); + trunk_tuples_in_bundle(spl, &new_child, bundle, req->input_pivot_tuple_count, req->input_pivot_kv_byte_count); + trunk_pivot_add_bundle_tuple_counts(spl, &new_child, bundle, req->input_pivot_tuple_count, req->input_pivot_kv_byte_count); + trunk_bundle_inc_pivot_rc(spl, &new_child, bundle); debug_assert(allocator_page_valid(spl->al, req->addr)); req->type = is_space_rec ? TRUNK_COMPACTION_TYPE_FLUSH @@ -4692,7 +4803,7 @@ trunk_flush(trunk_handle *spl, // split child if necessary if (trunk_needs_split(spl, &new_child)) { if (trunk_node_is_leaf(&new_child)) { - platform_free(spl->heap_id, req); + platform_free_mem(spl->heap_id, req, req->mf_size); uint16 child_idx = trunk_pdata_to_pivot_index(spl, parent, pdata); trunk_split_leaf(spl, parent, &new_child, child_idx); return STATUS_OK; @@ -5107,6 +5218,21 @@ trunk_compact_bundle(void *arg, void *scratch_buf) trunk_handle *spl = req->spl; threadid tid; + // We may be enqueueing tasks of this type from several call sites. + // Fingerprint mgmt is done inside here, so we claim that + // the queued task's handle did not have any memory allocated for the + // fingerprint array. (Otherwise, this might lead to memory leaks.) + platform_assert(fingerprint_is_empty(&req->breq_fingerprint), + "Fingerprint array is expected to be empty for this task" + ", enqueued at line=%lu, addr=%lu, height=%u" + ", compaction type=%d." + " Fingerprint object init'ed on line %d.", + req->enq_line, + req->addr, + req->height, + req->type, + fingerprint_line(&req->breq_fingerprint)); + /* * 1. Acquire node read lock */ @@ -5150,7 +5276,9 @@ trunk_compact_bundle(void *arg, void *scratch_buf) key_string(trunk_data_config(spl), key_buffer_key(&req->end_key)), req->height, req->bundle_no); - platform_free(spl->heap_id, req); + + platform_free_mem(spl->heap_id, req, req->mf_size); + if (spl->cfg.use_stats) { spl->stats[tid].compactions_aborted_flushed[height]++; spl->stats[tid].compaction_time_wasted_ns[height] += @@ -5241,14 +5369,21 @@ trunk_compact_bundle(void *arg, void *scratch_buf) trunk_compact_bundle_cleanup_iterators( spl, &merge_itor, num_branches, skip_itor_arr); - platform_free(spl->heap_id, req); + + // Ensure this. Otherwise we may be exiting w/o releasing memory. + debug_assert(fingerprint_is_empty(&req->breq_fingerprint), + "addr=%p, size=%lu, init'ed at line=%u", + fingerprint_start(&req->breq_fingerprint), + fingerprint_size(&req->breq_fingerprint), + fingerprint_line(&req->breq_fingerprint)); + + platform_free_mem(spl->heap_id, req, req->mf_size); goto out; } - req->fp_arr = pack_req.fingerprint_arr; if (spl->cfg.use_stats) { pack_start = platform_get_timestamp(); } - + pack_req.line = __LINE__; platform_status pack_status = btree_pack(&pack_req); if (!SUCCESS(pack_status)) { platform_default_log("btree_pack failed: %s\n", @@ -5256,7 +5391,7 @@ trunk_compact_bundle(void *arg, void *scratch_buf) trunk_compact_bundle_cleanup_iterators( spl, &merge_itor, num_branches, skip_itor_arr); btree_pack_req_deinit(&pack_req, spl->heap_id); - platform_free(spl->heap_id, req); + platform_free_mem(spl->heap_id, req, req->mf_size); goto out; } @@ -5266,10 +5401,12 @@ trunk_compact_bundle(void *arg, void *scratch_buf) } trunk_branch new_branch; - new_branch.root_addr = pack_req.root_addr; - uint64 num_tuples = pack_req.num_tuples; - req->fp_arr = pack_req.fingerprint_arr; - pack_req.fingerprint_arr = NULL; + new_branch.root_addr = pack_req.root_addr; + uint64 num_tuples = pack_req.num_tuples; + + // BTree pack is successful. Prepare to deinit pack request struct. + // But, retain the fingerprint generated by pack for further processing. + fingerprint_move(&req->breq_fingerprint, &pack_req.fingerprint); btree_pack_req_deinit(&pack_req, spl->heap_id); trunk_log_stream_if_enabled( @@ -5336,8 +5473,9 @@ trunk_compact_bundle(void *arg, void *scratch_buf) if (num_tuples != 0) { trunk_dec_ref(spl, &new_branch, FALSE); } - platform_free(spl->heap_id, req->fp_arr); - platform_free(spl->heap_id, req); + // Free fingerprint and req struct memory + fingerprint_deinit(spl->heap_id, &req->breq_fingerprint); + platform_free_mem(spl->heap_id, req, req->mf_size); goto out; } @@ -5416,8 +5554,9 @@ trunk_compact_bundle(void *arg, void *scratch_buf) spl->stats[tid].compaction_time_wasted_ns[height] += platform_timestamp_elapsed(compaction_start); } - platform_free(spl->heap_id, req->fp_arr); - platform_free(spl->heap_id, req); + // Free fingerprint and req struct memory + fingerprint_deinit(spl->heap_id, &req->breq_fingerprint); + platform_free_mem(spl->heap_id, req, req->mf_size); } else { if (spl->cfg.use_stats) { compaction_start = platform_timestamp_elapsed(compaction_start); @@ -5435,6 +5574,7 @@ trunk_compact_bundle(void *arg, void *scratch_buf) key_string(trunk_data_config(spl), key_buffer_key(&req->end_key)), req->height, req->bundle_no); + req->enq_line = __LINE__; task_enqueue( spl->ts, TASK_TYPE_NORMAL, trunk_bundle_build_filters, req, TRUE); } @@ -5550,7 +5690,10 @@ trunk_split_index(trunk_handle *spl, trunk_close_log_stream_if_enabled(spl, &stream); if (req != NULL) { + platform_memfrag memfrag_next_req = {0}; trunk_compact_bundle_req *next_req = TYPED_MALLOC(spl->heap_id, next_req); + next_req->mf_size = memfrag_size(&memfrag_next_req); + memmove(next_req, req, sizeof(trunk_compact_bundle_req)); next_req->addr = right_node.addr; key_buffer_init_from_key( @@ -5943,11 +6086,13 @@ trunk_split_leaf(trunk_handle *spl, /* * 6. Issue compact_bundle for leaf and release */ + platform_memfrag memfrag_req = {0}; trunk_compact_bundle_req *req = TYPED_ZALLOC(spl->heap_id, req); req->spl = spl; req->addr = leaf->addr; req->type = comp_type; req->bundle_no = bundle_no; + req->mf_size = memfrag_size(&memfrag_req); req->max_pivot_generation = trunk_pivot_generation(spl, leaf); req->pivot_generation[0] = trunk_pivot_generation(spl, leaf) - 1; req->input_pivot_tuple_count[0] = trunk_pivot_num_tuples(spl, leaf, 0); @@ -5976,9 +6121,11 @@ trunk_split_leaf(trunk_handle *spl, } // set next_addr of leaf (from last iteration) - trunk_compact_bundle_req *req = TYPED_ZALLOC(spl->heap_id, req); - req->spl = spl; - req->addr = leaf->addr; + platform_memfrag memfrag_req = {0}; + trunk_compact_bundle_req *req = TYPED_ZALLOC(spl->heap_id, req); + req->spl = spl; + req->addr = leaf->addr; + req->mf_size = memfrag_size(&memfrag_req); // req->height already 0 req->bundle_no = bundle_no; req->max_pivot_generation = trunk_pivot_generation(spl, leaf); @@ -6540,9 +6687,11 @@ trunk_compact_leaf(trunk_handle *spl, trunk_node *leaf) trunk_leaf_rebundle_all_branches(spl, leaf, num_tuples, kv_bytes, TRUE); // Issue compact_bundle for leaf and release - trunk_compact_bundle_req *req = TYPED_ZALLOC(spl->heap_id, req); - req->spl = spl; - req->addr = leaf->addr; + platform_memfrag memfrag_req = {0}; + trunk_compact_bundle_req *req = TYPED_ZALLOC(spl->heap_id, req); + req->spl = spl; + req->addr = leaf->addr; + req->mf_size = memfrag_size(&memfrag_req); // req->height already 0 req->bundle_no = bundle_no; req->max_pivot_generation = trunk_pivot_generation(spl, leaf); @@ -7543,6 +7692,7 @@ trunk_range(trunk_handle *spl, tuple_function func, void *arg) { + platform_memfrag memfrag_range_itor; trunk_range_iterator *range_itor = TYPED_MALLOC(PROCESS_PRIVATE_HEAP_ID, range_itor); platform_status rc = trunk_range_iterator_init(spl, @@ -7570,27 +7720,80 @@ trunk_range(trunk_handle *spl, destroy_range_itor: trunk_range_iterator_deinit(range_itor); - platform_free(PROCESS_PRIVATE_HEAP_ID, range_itor); + platform_free_heap(PROCESS_PRIVATE_HEAP_ID, range_itor); return rc; } +/* + * Allocate memory for stats-related structures off the trunk. + */ +static trunk_stats * +trunk_stats_init(trunk_handle *spl) +{ + platform_memfrag memfrag; + spl->stats = + TYPED_ARRAY_ZALLOC_MF(&memfrag, spl->heap_id, spl->stats, MAX_THREADS); + platform_assert(spl->stats); + + // Remember this; it's needed for free + spl->stats_mf_size = memfrag_size(&memfrag); + + for (uint64 i = 0; i < MAX_THREADS; i++) { + platform_status rc; + rc = platform_histo_create(spl->heap_id, + LATENCYHISTO_SIZE + 1, + latency_histo_buckets, + &spl->stats[i].insert_latency_histo); + platform_assert_status_ok(rc); + rc = platform_histo_create(spl->heap_id, + LATENCYHISTO_SIZE + 1, + latency_histo_buckets, + &spl->stats[i].update_latency_histo); + platform_assert_status_ok(rc); + rc = platform_histo_create(spl->heap_id, + LATENCYHISTO_SIZE + 1, + latency_histo_buckets, + &spl->stats[i].delete_latency_histo); + platform_assert_status_ok(rc); + } + return spl->stats; +} /* - *----------------------------------------------------------------------------- - * Create/destroy - * XXX Fix this api to return platform_status - *----------------------------------------------------------------------------- + * Deallocate memory for stats-related structures hanging off the trunk. */ -trunk_handle * -trunk_create(trunk_config *cfg, - allocator *al, - cache *cc, - task_system *ts, - allocator_root_id id, - platform_heap_id hid) +static void +trunk_stats_deinit(trunk_handle *spl) { - trunk_handle *spl = TYPED_FLEXIBLE_STRUCT_ZALLOC( + debug_assert(spl->cfg.use_stats); + for (uint64 i = 0; i < MAX_THREADS; i++) { + platform_histo_destroy(spl->heap_id, &spl->stats[i].insert_latency_histo); + platform_histo_destroy(spl->heap_id, &spl->stats[i].update_latency_histo); + platform_histo_destroy(spl->heap_id, &spl->stats[i].delete_latency_histo); + } + platform_free_mem(spl->heap_id, spl->stats, spl->stats_mf_size); +} + +/* + * Allocate memory for trunk handle and do initialization that is common between + * 'create' and 'mount' operations. + */ +static trunk_handle * +trunk_handle_init(trunk_config *cfg, + allocator *al, + cache *cc, + task_system *ts, + allocator_root_id id, + platform_heap_id hid) +{ + platform_memfrag memfrag_spl; + trunk_handle *spl = TYPED_FLEXIBLE_STRUCT_ZALLOC( hid, spl, compacted_memtable, TRUNK_NUM_MEMTABLES); + platform_assert(spl != NULL); + + // Remember this; it's needed for free + spl->mf_size = memfrag_size(&memfrag_spl); + memmove(&spl->cfg, cfg, sizeof(*cfg)); // Validate configured key-size is within limits. @@ -7601,10 +7804,27 @@ trunk_create(trunk_config *cfg, spl->heap_id = hid; spl->ts = ts; - platform_batch_rwlock_init(&spl->trunk_root_lock); - srq_init(&spl->srq, platform_get_module_id(), hid); + return spl; +} + +/* + *----------------------------------------------------------------------------- + * Create/destroy + * XXX Fix this api to return platform_status + *----------------------------------------------------------------------------- + */ +trunk_handle * +trunk_create(trunk_config *cfg, + allocator *al, + cache *cc, + task_system *ts, + allocator_root_id id, + platform_heap_id hid) +{ + trunk_handle *spl = trunk_handle_init(cfg, al, cc, ts, id, hid); + // get a free node for the root // we don't use the mini allocator for this, since the root doesn't // maintain constant height @@ -7666,26 +7886,7 @@ trunk_create(trunk_config *cfg, trunk_node_unget(spl->cc, &root); if (spl->cfg.use_stats) { - spl->stats = TYPED_ARRAY_ZALLOC(spl->heap_id, spl->stats, MAX_THREADS); - platform_assert(spl->stats); - for (uint64 i = 0; i < MAX_THREADS; i++) { - platform_status rc; - rc = platform_histo_create(spl->heap_id, - LATENCYHISTO_SIZE + 1, - latency_histo_buckets, - &spl->stats[i].insert_latency_histo); - platform_assert_status_ok(rc); - rc = platform_histo_create(spl->heap_id, - LATENCYHISTO_SIZE + 1, - latency_histo_buckets, - &spl->stats[i].update_latency_histo); - platform_assert_status_ok(rc); - rc = platform_histo_create(spl->heap_id, - LATENCYHISTO_SIZE + 1, - latency_histo_buckets, - &spl->stats[i].delete_latency_histo); - platform_assert_status_ok(rc); - } + spl->stats = trunk_stats_init(spl); } return spl; @@ -7702,18 +7903,7 @@ trunk_mount(trunk_config *cfg, allocator_root_id id, platform_heap_id hid) { - trunk_handle *spl = TYPED_FLEXIBLE_STRUCT_ZALLOC( - hid, spl, compacted_memtable, TRUNK_NUM_MEMTABLES); - memmove(&spl->cfg, cfg, sizeof(*cfg)); - - spl->al = al; - spl->cc = cc; - debug_assert(id != INVALID_ALLOCATOR_ROOT_ID); - spl->id = id; - spl->heap_id = hid; - spl->ts = ts; - - srq_init(&spl->srq, platform_get_module_id(), hid); + trunk_handle *spl = trunk_handle_init(cfg, al, cc, ts, id, hid); platform_batch_rwlock_init(&spl->trunk_root_lock); @@ -7740,7 +7930,7 @@ trunk_mount(trunk_config *cfg, super, meta_tail, latest_timestamp); - platform_free(hid, spl); + platform_free_mem(hid, spl, spl->mf_size); return (trunk_handle *)NULL; } uint64 meta_head = spl->root_addr + trunk_page_size(&spl->cfg); @@ -7765,26 +7955,7 @@ trunk_mount(trunk_config *cfg, trunk_set_super_block(spl, FALSE, FALSE, FALSE); if (spl->cfg.use_stats) { - spl->stats = TYPED_ARRAY_ZALLOC(spl->heap_id, spl->stats, MAX_THREADS); - platform_assert(spl->stats); - for (uint64 i = 0; i < MAX_THREADS; i++) { - platform_status rc; - rc = platform_histo_create(spl->heap_id, - LATENCYHISTO_SIZE + 1, - latency_histo_buckets, - &spl->stats[i].insert_latency_histo); - platform_assert_status_ok(rc); - rc = platform_histo_create(spl->heap_id, - LATENCYHISTO_SIZE + 1, - latency_histo_buckets, - &spl->stats[i].update_latency_histo); - platform_assert_status_ok(rc); - rc = platform_histo_create(spl->heap_id, - LATENCYHISTO_SIZE + 1, - latency_histo_buckets, - &spl->stats[i].delete_latency_histo); - platform_assert_status_ok(rc); - } + spl->stats = trunk_stats_init(spl); } return spl; } @@ -7818,7 +7989,8 @@ trunk_prepare_for_shutdown(trunk_handle *spl) // release the log if (spl->cfg.use_log) { - platform_free(spl->heap_id, spl->log); + platform_free_mem( + spl->heap_id, spl->log, ((shard_log *)spl->log)->mf_size); } // release the trunk mini allocator @@ -7880,17 +8052,9 @@ trunk_destroy(trunk_handle *spl) allocator_remove_super_addr(spl->al, spl->id); if (spl->cfg.use_stats) { - for (uint64 i = 0; i < MAX_THREADS; i++) { - platform_histo_destroy(spl->heap_id, - &spl->stats[i].insert_latency_histo); - platform_histo_destroy(spl->heap_id, - &spl->stats[i].update_latency_histo); - platform_histo_destroy(spl->heap_id, - &spl->stats[i].delete_latency_histo); - } - platform_free(spl->heap_id, spl->stats); + trunk_stats_deinit(spl); } - platform_free(spl->heap_id, spl); + platform_free_mem(spl->heap_id, spl, spl->mf_size); } /* @@ -7905,17 +8069,9 @@ trunk_unmount(trunk_handle **spl_in) trunk_prepare_for_shutdown(spl); trunk_set_super_block(spl, FALSE, TRUE, FALSE); if (spl->cfg.use_stats) { - for (uint64 i = 0; i < MAX_THREADS; i++) { - platform_histo_destroy(spl->heap_id, - &spl->stats[i].insert_latency_histo); - platform_histo_destroy(spl->heap_id, - &spl->stats[i].update_latency_histo); - platform_histo_destroy(spl->heap_id, - &spl->stats[i].delete_latency_histo); - } - platform_free(spl->heap_id, spl->stats); + trunk_stats_deinit(spl); } - platform_free(spl->heap_id, spl); + platform_free_mem(spl->heap_id, spl, spl->mf_size); *spl_in = (trunk_handle *)NULL; } @@ -8796,6 +8952,7 @@ trunk_print_insertion_stats(platform_log_handle *log_handle, trunk_handle *spl) trunk_stats *global; + platform_memfrag memfrag_global; global = TYPED_ZALLOC(spl->heap_id, global); if (global == NULL) { platform_error_log("Out of memory for statistics"); @@ -9091,7 +9248,7 @@ trunk_print_insertion_stats(platform_log_handle *log_handle, trunk_handle *spl) platform_log(log_handle, "------------------------------------------------------------------------------------\n"); cache_print_stats(log_handle, spl->cc); platform_log(log_handle, "\n"); - platform_free(spl->heap_id, global); + platform_free(&memfrag_global); } void @@ -9113,6 +9270,7 @@ trunk_print_lookup_stats(platform_log_handle *log_handle, trunk_handle *spl) trunk_stats *global; + platform_memfrag memfrag_global; global = TYPED_ZALLOC(spl->heap_id, global); if (global == NULL) { platform_error_log("Out of memory for stats\n"); @@ -9178,7 +9336,7 @@ trunk_print_lookup_stats(platform_log_handle *log_handle, trunk_handle *spl) } platform_log(log_handle, "------------------------------------------------------------------------------------|\n"); platform_log(log_handle, "\n"); - platform_free(spl->heap_id, global); + platform_free(&memfrag_global); platform_log(log_handle, "------------------------------------------------------------------------------------\n"); cache_print_stats(log_handle, spl->cc); platform_log(log_handle, "\n"); diff --git a/src/trunk.h b/src/trunk.h index 15b6ad3a..78385b35 100644 --- a/src/trunk.h +++ b/src/trunk.h @@ -202,6 +202,7 @@ struct trunk_handle { // stats trunk_stats *stats; + size_t stats_mf_size; // Of allocated memory fragment. // Link inside the splinter list List_Links links; @@ -218,6 +219,7 @@ struct trunk_handle { // space rec queue srq srq; + size_t mf_size; // of memory fragment allocated to init trunk_handle{} trunk_compacted_memtable compacted_memtable[/*cfg.mt_cfg.max_memtables*/]; }; @@ -229,6 +231,7 @@ typedef struct trunk_range_iterator { uint64 num_memtable_branches; uint64 memtable_start_gen; uint64 memtable_end_gen; + size_t mf_size; bool32 compacted[TRUNK_RANGE_ITOR_MAX_BRANCHES]; merge_iterator *merge_itor; bool32 can_prev; diff --git a/src/util.c b/src/util.c index 99c13ab7..f8fcadd9 100644 --- a/src/util.c +++ b/src/util.c @@ -18,22 +18,26 @@ writable_buffer_ensure_space(writable_buffer *wb, uint64 minspace) minspace = 2 * wb->buffer_capacity; } - void *newdata = NULL; - if (wb->can_free) { - newdata = platform_realloc( - wb->heap_id, wb->buffer_capacity, wb->buffer, minspace); - } else { - char *newbuf = TYPED_MANUAL_MALLOC(wb->heap_id, newbuf, minspace); - if (newbuf && writable_buffer_data(wb)) { - memcpy(newbuf, wb->buffer, wb->length); - } - newdata = (void *)newbuf; - } + void *oldptr = wb->can_free ? wb->buffer : NULL; + + platform_memfrag mf = {0}; + memfrag_init(&mf, wb->heap_id, oldptr, wb->buffer_capacity); + + // NOTE: realloc() may adjust-up 'minspace' for alignment + // ALSO: We must supply correct size of fragment being freed, which + // will be the buffer capacity, but not just the oldspace in-use. + // (Otherwise, free-fragment management will run into memory leaks.) + void *newdata = platform_realloc(&mf, minspace); if (newdata == NULL) { return STATUS_NO_MEMORY; } - wb->buffer_capacity = minspace; + if (oldptr == NULL && wb->length != WRITABLE_BUFFER_NULL_LENGTH) { + memcpy(newdata, wb->buffer, wb->length); + } + + // Record allocated buffer capacities + wb->buffer_capacity = mf.size; // May have gone up due to realloc wb->buffer = newdata; wb->can_free = TRUE; return STATUS_OK; diff --git a/src/util.h b/src/util.h index 0eaf72fe..fec35da5 100644 --- a/src/util.h +++ b/src/util.h @@ -141,6 +141,12 @@ writable_buffer_length(const writable_buffer *wb) return wb->length; } +static inline uint64 +writable_buffer_capacity(const writable_buffer *wb) +{ + return wb->buffer_capacity; +} + /* May allocate memory */ platform_status writable_buffer_resize(writable_buffer *wb, uint64 newlength); @@ -212,10 +218,11 @@ static inline void writable_buffer_deinit(writable_buffer *wb) { if (wb->can_free) { - platform_free(wb->heap_id, wb->buffer); + platform_free_mem(wb->heap_id, wb->buffer, wb->buffer_capacity); } - wb->buffer = NULL; - wb->can_free = FALSE; + wb->buffer = NULL; + wb->buffer_capacity = 0; + wb->can_free = FALSE; } static inline void @@ -278,6 +285,278 @@ writable_buffer_append(writable_buffer *wb, uint64 length, const void *newdata) writable_buffer_copy_slice(&dst##wb, src); \ slice dst = writable_buffer_to_slice(&dst##wb); +/* + * ---------------------------------------------------------------------- + * Fingerprint Header: Fingerprints are managed while building routing filters. + * This structure encapsulates a handle to such an allocated array. + * Different modules and functions deal with such arrays. In order to + * free memory fragments allocated for these arrays from shared-segments + * we need to track the size of the memory fragment allocated. + * ---------------------------------------------------------------------- + */ +typedef struct fp_hdr { + platform_memfrag mf; + size_t ntuples; // # of tuples for which fingerprint was created. + uint16 init_line; // Where init()/ deinit() was called from + uint16 copy_line; // Where copy() was called from + uint16 alias_line; // Where alias() / unalias() was called from + uint16 move_line; // Where move() was called from + debug_code(struct fp_hdr *srcfp); + debug_code(uint16 num_aliases); +} fp_hdr; + +/* Consistent message format to identify fp_hdr{} when asserts fail. */ +#define FP_FMT \ + "from line=%d, fp_hdr=%p, fp=%p, init_line=%u, copy_line=%u" \ + ", alias_line=%u, move_line=%u, ntuples=%lu" + +#define FP_FIELDS(p) \ + p, fingerprint_start(p), p->init_line, p->copy_line, p->alias_line, \ + p->move_line, p->ntuples + +/* Return the start of the fingerprint array. */ +static inline uint32 * +fingerprint_start(const fp_hdr *fp) +{ + return (uint32 *)memfrag_start(&fp->mf); +} + +/* + * ---------------------------------------------------------------------------- + * uint32 * = fingerprint_init(fp_hdr *fp, platform_heap_id hid, + * size_t num_tuples) + * + * Initialize a fingerprint object, allocating memory for fingerprint array. + * We know to 'init' an array of uint32 items, which is what fingerprint uses. + * + * Returns: Start of allocated fingerprint. NULL, if we ran out of memory. + * ---------------------------------------------------------------------------- + */ +#define fingerprint_init(fp, hid, num_tuples) \ + fingerprint_do_init((fp), (hid), (num_tuples), __LINE__) + +static inline uint32 * +fingerprint_do_init(fp_hdr *fp, + platform_heap_id hid, + size_t num_tuples, + uint32 line) +{ + ZERO_CONTENTS(fp); + platform_memfrag memfrag_fp_arr; + uint32 *fp_arr = TYPED_ARRAY_ZALLOC(hid, fp_arr, num_tuples); + // Upon successful allocation, save memory fragment info. It will be + // needed when we have to free fingerprint's memory. + if (fp_arr != NULL) { + fp->mf = memfrag_fp_arr; + fp->ntuples = num_tuples; + } + fp->init_line = line; + return fp_arr; +} + +/* Validate that fingerprint object is currently empty; i.e. uninit'ed. */ +static inline bool +fingerprint_is_empty(const fp_hdr *fp) +{ + return memfrag_is_empty(&fp->mf) debug_code(&&(fp->num_aliases == 0)); +} + +/* + * ---------------------------------------------------------------------------- + * void = fingerprint_deinit(platform_heap_id hid, fp_hdr *fp) + * + * Releases the memory allocated for the fingerprint array. + * ---------------------------------------------------------------------------- + */ +#define fingerprint_deinit(hid, fp) fingerprint_do_deinit((hid), (fp), __LINE__) + +static inline void +fingerprint_do_deinit(platform_heap_id hid, fp_hdr *fp, uint32 line) +{ + // Should only be called on a fingerprint that has gone thru init() + debug_assert(!fingerprint_is_empty(fp), FP_FMT, line, FP_FIELDS(fp)); + + debug_assert((fp->num_aliases == 0), + "%u references exist to fingerprint at %p, init'ed at line=%d" + ", alias'ed at line=%d, which may potentially cause illegal" + " memory access after this deinit operation, from line=%d.", + fp->num_aliases, + memfrag_start(&fp->mf), + fp->init_line, + fp->alias_line, + line); + + platform_free(&fp->mf); + fp->ntuples = -1; // Indicates that fingerprint went thru deinit() + fp->init_line = line; +} + +/* Return the size of the fingerprint array, in # of bytes allocated. */ +static inline size_t +fingerprint_size(fp_hdr *fp) +{ + return memfrag_size(&fp->mf); +} + +/* Return the # of tuples for which fingerprint was created */ +static inline size_t +fingerprint_ntuples(fp_hdr *fp) +{ + return fp->ntuples; +} + +/* Return the line # where _init()/deinit() was called on this fingerprint */ +static inline uint32 +fingerprint_line(fp_hdr *fp) +{ + return fp->init_line; +} + +/* Return the start of the n'th piece (tuple) in the fingerprint array. */ +#define fingerprint_nth(dst, src) \ + fingerprint_do_nth((dst), (src), __FILE__, __LINE__) + +static inline uint32 * +fingerprint_do_nth(fp_hdr *fp, + uint32 nth_tuple, + const char *file, + const int line) +{ + // Cannot ask for a location beyond size of fingerprint array + platform_assert((nth_tuple < fingerprint_ntuples(fp)), + "[%s] nth_tuple=%u, ntuples=%lu, " FP_FMT, + file, + nth_tuple, + fingerprint_ntuples(fp), + line, + FP_FIELDS(fp)); + + return ((uint32 *)memfrag_start(&fp->mf) + nth_tuple); +} + +/* + * ---------------------------------------------------------------------------- + * Deep-Copy the contents of 'src' fingerprint object into 'dst' fingerprint. + * 'dst' fingerprint is expected to have been init'ed which would have allocated + * sufficient memory required to copy-over the 'src' fingerprint array. + */ +#define fingerprint_copy(dst, src) fingerprint_do_copy((dst), (src), __LINE__) + +static inline void +fingerprint_do_copy(fp_hdr *dst, fp_hdr *src, uint32 line) +{ + debug_assert(!fingerprint_is_empty(dst)); + memmove( + memfrag_start(&dst->mf), memfrag_start(&src->mf), memfrag_size(&dst->mf)); + dst->copy_line = line; +} + +/* + * ---------------------------------------------------------------------------- + * For some temporary manipulation of fingerprints, point the fingerprint array + * of 'dst' to the one managed by 'src' fingerprint. Memory allocated for the + * fingerprint array will now be pointed to by two fingerprint objects. + * + * Aliasing is a substitute for init'ing of the 'dst', where we don't allocate + * any new memory but "take-over" the fingerprint managed by the 'src' object. + * + * For proper memory management, alias should be followed by an unalias before + * the 'src' fingerprint is deinit'ed. + * + * Returns the start of 'cloned' start of fingerprint. + */ +#define fingerprint_alias(dst, src) fingerprint_do_alias((dst), (src), __LINE__) + +static inline uint32 * +fingerprint_do_alias(fp_hdr *dst, const fp_hdr *src, uint32 line) +{ + debug_assert(fingerprint_is_empty(dst), FP_FMT, line, FP_FIELDS(dst)); + debug_assert(!fingerprint_is_empty(src), FP_FMT, line, FP_FIELDS(src)); + + memfrag_start(&dst->mf) = memfrag_start(&src->mf); + memfrag_size(&dst->mf) = memfrag_size(&src->mf); + dst->ntuples = src->ntuples; + // Remember where src memory was allocated + dst->init_line = src->init_line; + dst->alias_line = line; + + // Update alias refcounts + debug_code(dst->num_aliases++); + debug_code(dst->srcfp = (fp_hdr *)src;); + debug_code(dst->srcfp->num_aliases++); + debug_code(dst->srcfp->alias_line = line); + + return (uint32 *)memfrag_start(&dst->mf); +} + +/* + * ---------------------------------------------------------------------------- + * After a fingerprint has been aliased to point to some other fingerprint, and + * its use is done, we restore the 'src' fingerprint to its un-aliased (empty) + * state. (Memory deallocation of fingerprint will be done elsewhere by the + * object that owns the fingerprint.) + */ +#define fingerprint_unalias(dst) fingerprint_do_unalias((dst), __LINE__) + +static inline uint32 * +fingerprint_do_unalias(fp_hdr *dst, uint32 line) +{ + debug_assert(!fingerprint_is_empty(dst), FP_FMT, line, FP_FIELDS(dst)); + + memfrag_set_empty((platform_memfrag *)&dst->mf); + dst->ntuples = 0; + + // (init_line != alias_line) => 'unalias' was done + dst->init_line = 0; + dst->alias_line = line; + + // Update alias refcounts + debug_code(dst->num_aliases--); + debug_code(dst->srcfp->num_aliases--); + debug_code(dst->srcfp->alias_line = line); + debug_code(dst->srcfp = ((dst->num_aliases == 0) ? NULL : dst->srcfp)); + + return (uint32 *)memfrag_start(&dst->mf); +} + +/* + * ---------------------------------------------------------------------------- + * For some future manipulation of fingerprints, move the fingerprint array + * owned by 'src' to the 'dst' fingerprint. Memory allocated for the 'src' + * fingerprint array will now be owned by 'dst' fingerprint object, and + * will need to be freed off of that. + * 'src' no longer holds fingerprint array after this call. + * + * Returns the start of the 'moved' fingerprint. + */ +#define fingerprint_move(dst, src) fingerprint_do_move((dst), (src), __LINE__) + +static inline uint32 * +fingerprint_do_move(fp_hdr *dst, fp_hdr *src, uint32 line) +{ + debug_assert(fingerprint_is_empty(dst), FP_FMT, line, FP_FIELDS(dst)); + debug_assert(!fingerprint_is_empty(src), FP_FMT, line, FP_FIELDS(src)); + + // We don't want any references to src to be carried over to dst. + debug_assert((src->num_aliases == 0), + "Source fingerprint has %d references. Moving it to" + " another fingerprint will leak memory references, potentially" + " causing bugs. " FP_FMT, + src->num_aliases, + line, + FP_FIELDS(src)); + + // Just move the memory fragment itself (not src's tracking data) + memfrag_move(&dst->mf, &src->mf); + dst->ntuples = src->ntuples; + dst->init_line = src->init_line; // Remember where src memory was allocated + dst->move_line = line; + + src->ntuples = 0; // Reflects that memory fragment has been moved over + src->move_line = line; + return (uint32 *)memfrag_start(&dst->mf); +} + /* * try_string_to_(u)int64 * diff --git a/test.sh b/test.sh index 4015372e..3b374528 100755 --- a/test.sh +++ b/test.sh @@ -71,7 +71,8 @@ Examples:" echo " INCLUDE_SLOW_TESTS=true ./test.sh nightly_cache_perf_tests" echo " INCLUDE_SLOW_TESTS=true ./test.sh run_splinter_functionality_tests" echo " INCLUDE_SLOW_TESTS=true ./test.sh run_splinter_functionality_tests --use-shmem" - echo " INCLUDE_SLOW_TESTS=true ./test.sh run_tests_with_shared_memory" + echo " INCLUDE_SLOW_TESTS=true ./test.sh run_fast_shared_memory_tests" + echo " INCLUDE_SLOW_TESTS=true ./test.sh run_all_shared_memory_tests" } # ################################################################## @@ -237,11 +238,21 @@ function nightly_functionality_stress_tests() { # ############################################################################# function nightly_unit_stress_tests() { local n_mills=10 + + # -------------------------------------------------------------------------- + # RESOLVE: Some of the test cases under this stress test tickle bugs that + # will cause the test to fail. Turn this OFF temporarily so that the rest of + # test execution can continue. Revert this (and 'set -e' below) once the + # tests / code is stabilized. + # -------------------------------------------------------------------------- + set +e + + local n_mills=20 local num_rows=$((n_mills * 1000 * 1000)) local nrows_h="${n_mills} mil" # ---- - local n_threads=32 + local n_threads=8 local test_descr="${nrows_h} rows, ${n_threads} threads" local test_name=large_inserts_stress_test @@ -250,7 +261,7 @@ function nightly_unit_stress_tests() { # with this configuration. The config-params listed below -should- work but # this combination has never been exercised successfully due to lack of hw. echo "$Me: Run ${test_name} with ${n_mills} million rows, ${n_threads} threads" - # RESOLVE: Revert: shellcheck disable=SC2086 + # RESOLVE: Revert: #shellcheck disable=SC2086 # run_with_timing "Large Inserts Stress test ${test_descr}" \ # "$BINDIR"/unit/${test_name} \ # $Use_shmem \ @@ -259,6 +270,29 @@ function nightly_unit_stress_tests() { # --num-threads ${n_threads} \ # --num-memtable-bg-threads 8 \ # --num-normal-bg-threads 20 + + key_size=42 + data_size=200 + msg="Large inserts stress test, ${n_mills}M rows, key=${key_size}, data=${data_size} ${use_msg}" + + # shellcheck disable=SC2086 + run_with_timing "${msg}" \ + "$BINDIR"/unit/large_inserts_stress_test ${Use_shmem} \ + --num-inserts ${num_rows} \ + --key-size ${key_size} --data-size ${data_size} + + n_mills=20 + num_rows=$((n_mills * 1000 * 1000)) + msg="Large inserts stress test trunk_build_filters bug, ${n_mills}M rows ${use_msg}" + # shellcheck disable=SC2086 + run_with_timing "${msg}" \ + "$BINDIR"/unit/large_inserts_stress_test ${Use_shmem} \ + --num-inserts ${num_rows} \ + test_fp_num_tuples_out_of_bounds_bug_trunk_build_filters + + # -------------------------------------------------------------------------- + # RESOLVE: Delete this line once above test execution is stabilized. + set -e } # ############################################################################# @@ -651,22 +685,26 @@ function run_slower_unit_tests() { run_with_timing "${msg}" \ "$BINDIR"/unit/splinter_test ${Use_shmem} test_splinter_print_diags + # -------------------------------------------------------------------------- + # RESOLVE: Some of the test cases under this stress test tickle bugs that + # will cause the test to fail. Turn this OFF temporarily so that the rest of + # test execution can continue. Revert this (and 'set -e' below) once the + # tests / code is stabilized. + # -------------------------------------------------------------------------- + set +e + + # -------------------------------------------------------------------------- # Test runs w/ default of 1M rows for --num-inserts n_mills=1 + local n_threads=8 num_rows=$((n_mills * 1000 * 1000)) msg="Large inserts stress test, ${n_mills}M rows, ${use_msg}" - # -------------------------------------------------------------------------- - # FIXME: Disable script failing upon an error. Re-enable when following is fixed: - # Asserts tripping: - # 813 TEST 7/12 large_inserts_bugs_stress:test_seq_key_fully_packed_value_inserts_threaded_same_start_keyid OS-pid=373371, OS-tid=373385, Thread-ID=6, Assertion failed at src/platform_linux/platform.c:286:platform_batch_rwlock_lock(): "lock->write_lock[lock_idx].claim". - # -------------------------------------------------------------------------- - - set +e - # shellcheck disable=SC2086 run_with_timing "${msg}" \ - "$BINDIR"/unit/large_inserts_stress_test ${Use_shmem} --num-inserts ${num_rows} + "$BINDIR"/unit/large_inserts_stress_test ${Use_shmem} \ + --num-inserts ${num_rows} \ + --num-threads ${n_threads} # Test runs w/ more inserts and enable bg-threads n_mills=2 @@ -677,8 +715,12 @@ function run_slower_unit_tests() { run_with_timing "${msg}" \ "$BINDIR"/unit/large_inserts_stress_test ${Use_shmem} \ --num-inserts ${num_rows} \ + --num-threads ${n_threads} \ --num-normal-bg-threads 4 \ --num-memtable-bg-threads 3 + + # -------------------------------------------------------------------------- + # RESOLVE: Delete this line once above test execution is stabilized. set -e } @@ -702,7 +744,7 @@ function run_slower_forked_process_tests() { # # main pr-clang job also failed with this error: # splinterdb_forked_child:test_multiple_forked_process_doing_IOs OS-pid=1182, OS-tid=1182, Thread-ID=3, Assertion failed at src/trunk.c:5363:trunk_compact_bundle(): "height != 0". - # So -- this test scenario is unearthing some existing bugs. Comment out for now. + # As this test scenario is unearthing some existing bugs, comment it out for now. # -------------------------------------------------------------------------- # # num_forked_procs=4 @@ -719,7 +761,7 @@ function run_slower_forked_process_tests() { --use-shmem \ --fork-child \ --num-inserts ${num_rows} \ - test_seq_key_seq_values_inserts_forked + test_Seq_key_be32_Seq_values_inserts_forked } # ################################################################## @@ -851,6 +893,7 @@ function run_other_driver_tests() { if [ "$Use_shmem" != "" ]; then use_msg=", using shared memory" fi + # shellcheck disable=SC2086 run_with_timing "Cache test${use_msg}" \ "$BINDIR"/driver_test cache_test --seed "$SEED" $Use_shmem @@ -862,23 +905,31 @@ function run_other_driver_tests() { # shellcheck disable=SC2086 run_with_timing "Filter test${use_msg}" \ "$BINDIR"/driver_test filter_test --seed "$SEED" $Use_shmem + + # ----------------------------------------------------------------------- + # If supplied, --perf needs to be the 1st arg as it's parsed-away first. + # The perf-run of filter-test exercises fingerprint handling. Memory mgmt + # for fingerprint arrays was overhauled as part of this commit, so it's + # relevant to exercise this test-sub-case with shared memory enabled. + # Morover, each run of filter_test --perf takes ~22m for debug-builds in + # CI jobs. To keep overall CI runs within timeout limits, only run this + # when shared-memory is configured. + # ----------------------------------------------------------------------- + # shellcheck disable=SC2086 + if [ "$Use_shmem" != "" ]; then + run_with_timing "Filter perf tests${use_msg}" \ + "$BINDIR"/driver_test filter_test --perf \ + --seed "$SEED" $Use_shmem + fi } # ####################################################################### -# Re-run a collection of tests with shared-memory support enabled. -# We strive to run all the tests that are run in a test execution cycle -# with shared memory enabled. However, certain test execution configurations -# may not still be runnable in this mode. So, we will incrementally online -# remaining tests when they can run successfully in this mode. +# Re-run a collection of few tests with shared-memory support enabled, +# that are known to run reasonably fast even on Docker image running on +# a Mac. This test-function is provided so we can get a quick turnaround +# of test-stabilization for any changes affecting shared-memory support. # ####################################################################### -function run_tests_with_shared_memory() { - { - echo " " - echo "-- Tests with shared memory configured --" >> "${test_exec_log_file}" - echo " " - } >> "${test_exec_log_file}" - - shmem_tests_run_start=$SECONDS +function run_fast_shared_memory_tests() { # Run all the unit-tests first, to get basic coverage of shared-memory support. run_with_timing "Fast unit tests using shared memory" "$BINDIR"/unit_test "--use-shmem" @@ -889,12 +940,8 @@ function run_tests_with_shared_memory() { "$BINDIR"/driver_test io_apis_test \ --use-shmem --fork-child - Use_shmem="--use-shmem" run_slower_unit_tests - if [ -f "${UNIT_TESTS_DB_DEV}" ]; then rm "${UNIT_TESTS_DB_DEV}"; fi - Use_shmem="--use-shmem" run_splinter_functionality_tests - run_splinter_perf_tests run_btree_tests run_other_driver_tests @@ -902,6 +949,33 @@ function run_tests_with_shared_memory() { # not needed when invoking them. These tests will fork one or more child # processes. run_slower_forked_process_tests +} + +# ####################################################################### +# Re-run a collection of -ALL- tests with shared-memory support enabled. +# We strive to run all the tests that are run in a test execution cycle +# with shared memory enabled. However, certain test execution configurations +# may not still be runnable in this mode. So, we will incrementally online +# remaining tests when they can run successfully in this mode. +# ####################################################################### +function run_all_shared_memory_tests() { + { + echo " " + echo "-- Tests with shared memory configured --" >> "${test_exec_log_file}" + echo " " + } >> "${test_exec_log_file}" + + shmem_tests_run_start=$SECONDS + + # Run all the unit-tests first, to get basic coverage of shared-memory support. + run_fast_shared_memory_tests + + # Will run splinter_test, large_inserts_stress_test for diff workloads. + Use_shmem="--use-shmem" run_slower_unit_tests + if [ -f "${UNIT_TESTS_DB_DEV}" ]; then rm "${UNIT_TESTS_DB_DEV}"; fi + + # Perf tests take time, so run them in here + run_splinter_perf_tests record_elapsed_time ${shmem_tests_run_start} "Tests with shared memory configured" } @@ -1021,7 +1095,7 @@ UNIT_TESTS_DB_DEV="unit_tests_db" # INCLUDE_SLOW_TESTS=true ./test.sh nightly_unit_stress_tests --use-shmem # # Run collection of tests designed to exercise shared memory support: -# INCLUDE_SLOW_TESTS=true ./test.sh run_tests_with_shared_memory +# INCLUDE_SLOW_TESTS=true ./test.sh run_fast_shared_memory_tests # ------------------------------------------------------------------------ if [ $# -ge 1 ]; then @@ -1060,9 +1134,10 @@ run_btree_tests run_other_driver_tests record_elapsed_time ${testRunStartSeconds} "Tests without shared memory configured" + # ------------------------------------------------------------------------ -# Re-run a collection of tests using shared-memory. -Use_shmem="--use-shmem" run_tests_with_shared_memory +# Re-run a collection of -ALL- tests using shared-memory. +Use_shmem="--use-shmem" run_all_shared_memory_tests record_elapsed_time ${testRunStartSeconds} "All Tests" echo ALL PASSED diff --git a/tests/config.c b/tests/config.c index 447927d9..dbbf448e 100644 --- a/tests/config.c +++ b/tests/config.c @@ -28,9 +28,6 @@ #define TEST_CONFIG_DEFAULT_MAX_BRANCHES_PER_NODE 24 // Deal with reasonable key / message sizes for tests -// There are open issues in some tests for smaller key-sizes. -// For now, restrict tests to use this minimum key-size. -#define TEST_CONFIG_MIN_KEY_SIZE ((int)sizeof(uint64)) #define TEST_CONFIG_DEFAULT_KEY_SIZE 24 #define TEST_CONFIG_DEFAULT_MESSAGE_SIZE 100 @@ -58,9 +55,6 @@ * ******************* EXPERIMENTAL FEATURES ******************** * - use_shmem: Support for shared memory segments. * This functionality is solely meant for internal development uses. - * We don't support free(), so your test / usage will likely run into - * shared-memory OOMs errors. - * * --------------------------------------------------------------------------- */ void @@ -394,6 +388,7 @@ config_parse(master_config *cfg, const uint8 num_config, int argc, char *argv[]) // Test-execution configuration parameters config_set_uint64("seed", cfg, seed) {} config_set_uint64("num-inserts", cfg, num_inserts) {} + config_set_uint64("num-threads", cfg, num_threads) {} config_set_uint64("num-processes", cfg, num_processes) {} config_set_else diff --git a/tests/config.h b/tests/config.h index 90258d92..9137e592 100644 --- a/tests/config.h +++ b/tests/config.h @@ -29,6 +29,10 @@ _Static_assert(TEST_CONFIG_DEFAULT_PAGES_PER_EXTENT <= MAX_PAGES_PER_EXTENT, #define TEST_CONFIG_DEFAULT_EXTENT_SIZE \ (TEST_CONFIG_DEFAULT_PAGES_PER_EXTENT * TEST_CONFIG_DEFAULT_PAGE_SIZE) +// There are open issues in some tests for smaller key-sizes. +// For now, restrict tests to use this minimum key-size. +#define TEST_CONFIG_MIN_KEY_SIZE ((int)sizeof(uint64)) + /* * -------------------------------------------------------------------------- * Convenience structure to hold configuration options for all sub-systems. @@ -102,6 +106,7 @@ typedef struct master_config { // Test-execution configuration parameters uint64 seed; uint64 num_inserts; + uint64 num_threads; uint64 num_processes; // # of [forked] processes bool wait_for_gdb; // To debug child processes. } master_config; diff --git a/tests/functional/btree_test.c b/tests/functional/btree_test.c index 14a626f1..ecf2c7ac 100644 --- a/tests/functional/btree_test.c +++ b/tests/functional/btree_test.c @@ -38,6 +38,7 @@ typedef struct test_memtable_context { platform_heap_id heap_id; memtable_context *mt_ctxt; uint64 max_generation; + size_t mf_size; } test_memtable_context; btree_config * @@ -58,11 +59,13 @@ test_memtable_context_create(cache *cc, uint64 num_mt, platform_heap_id hid) { + platform_memfrag memfrag_ctxt; test_memtable_context *ctxt = TYPED_ZALLOC(hid, ctxt); platform_assert(ctxt); ctxt->cc = cc; ctxt->cfg = cfg; ctxt->heap_id = hid; + ctxt->mf_size = memfrag_size(&memfrag_ctxt); ctxt->mt_ctxt = memtable_context_create( hid, cc, cfg->mt_cfg, test_btree_process_noop, NULL); ctxt->max_generation = num_mt; @@ -73,7 +76,7 @@ void test_memtable_context_destroy(test_memtable_context *ctxt, platform_heap_id hid) { memtable_context_destroy(hid, ctxt->mt_ctxt); - platform_free(hid, ctxt); + platform_free_mem(hid, ctxt, ctxt->mf_size); } platform_status @@ -227,6 +230,7 @@ test_btree_perf(cache *cc, uint64 num_inserts_per_thread = num_inserts / num_threads; platform_status ret = STATUS_OK; + platform_memfrag memfrag_params = {0}; test_btree_thread_params *params = TYPED_ARRAY_ZALLOC(hid, params, num_threads); platform_assert(params); @@ -299,7 +303,7 @@ test_btree_perf(cache *cc, } platform_default_log("\n"); - platform_free(hid, params); + platform_free(&memfrag_params); return ret; } @@ -547,7 +551,8 @@ test_btree_basic(cache *cc, test_memtable_context *ctxt = test_memtable_context_create(cc, cfg, 1, hid); memtable *mt = &ctxt->mt_ctxt->mt[0]; - data_config *data_cfg = mt->cfg->data_cfg; + data_config *data_cfg = mt->cfg->data_cfg; + platform_memfrag memfrag_async_lookup; btree_test_async_lookup *async_lookup = TYPED_MALLOC(hid, async_lookup); platform_assert(async_lookup); @@ -798,7 +803,7 @@ test_btree_basic(cache *cc, platform_default_log("btree_test: btree basic test failed\n"); platform_default_log("\n"); test_memtable_context_destroy(ctxt, hid); - platform_free(hid, async_lookup); + platform_free(&memfrag_async_lookup); merge_accumulator_deinit(&expected_data); return rc; } @@ -1018,22 +1023,26 @@ test_btree_merge_basic(cache *cc, btree_config *btree_cfg = cfg->mt_cfg->btree_cfg; - uint64 *root_addr = TYPED_ARRAY_MALLOC(hid, root_addr, arity); + platform_memfrag memfrag_root_addr = {0}; + uint64 *root_addr = TYPED_ARRAY_MALLOC(hid, root_addr, arity); platform_assert(root_addr); test_btree_create_packed_trees(cc, cfg, hid, arity, root_addr); - uint64 *output_addr = TYPED_ARRAY_MALLOC(hid, output_addr, arity); + platform_memfrag memfrag_output_addr = {0}; + uint64 *output_addr = TYPED_ARRAY_MALLOC(hid, output_addr, arity); platform_assert(output_addr); platform_status rc; uint64 max_key = (uint64)-1; - uint64 *pivot = TYPED_ARRAY_MALLOC(hid, pivot, arity); + platform_memfrag memfrag_pivot = {0}; + uint64 *pivot = TYPED_ARRAY_MALLOC(hid, pivot, arity); platform_assert(pivot); - key_buffer *pivot_key = TYPED_ARRAY_MALLOC(hid, pivot_key, arity + 1); + platform_memfrag memfrag_pivot_key = {0}; + key_buffer *pivot_key = TYPED_ARRAY_MALLOC(hid, pivot_key, arity + 1); platform_assert(pivot_key); for (uint64 pivot_no = 0; pivot_no < arity; pivot_no++) { @@ -1045,11 +1054,13 @@ test_btree_merge_basic(cache *cc, } key_buffer_init_from_key(&pivot_key[arity], hid, POSITIVE_INFINITY_KEY); - btree_iterator *btree_itor_arr = + platform_memfrag memfrag_btree_itor_arr; + btree_iterator *btree_itor_arr = TYPED_ARRAY_MALLOC(hid, btree_itor_arr, arity); platform_assert(btree_itor_arr); - iterator **itor_arr = TYPED_ARRAY_MALLOC(hid, itor_arr, arity); + platform_memfrag memfrag_itor_arr; + iterator **itor_arr = TYPED_ARRAY_MALLOC(hid, itor_arr, arity); platform_assert(itor_arr); for (uint64 pivot_no = 0; pivot_no < arity; pivot_no++) { @@ -1151,15 +1162,17 @@ test_btree_merge_basic(cache *cc, } platform_default_log("\n"); - platform_free(hid, root_addr); - platform_free(hid, output_addr); - platform_free(hid, pivot); + platform_free(&memfrag_root_addr); + platform_free(&memfrag_output_addr); + platform_free(&memfrag_pivot); + for (uint64 pivot_no = 0; pivot_no < arity + 1; pivot_no++) { key_buffer_deinit(&pivot_key[pivot_no]); } - platform_free(hid, pivot_key); - platform_free(hid, btree_itor_arr); - platform_free(hid, itor_arr); + + platform_free(&memfrag_pivot_key); + platform_free(&memfrag_btree_itor_arr); + platform_free(&memfrag_itor_arr); return rc; } @@ -1174,8 +1187,11 @@ test_btree_count_in_range(cache *cc, uint64 root_addr; test_btree_create_packed_trees(cc, cfg, hid, 1, &root_addr); btree_config *btree_cfg = cfg->mt_cfg->btree_cfg; - key_buffer *bound_key = TYPED_ARRAY_MALLOC(hid, bound_key, 2); + + platform_memfrag memfrag_bound_key; + key_buffer *bound_key = TYPED_ARRAY_MALLOC(hid, bound_key, 2); platform_assert(bound_key); + key_buffer_init(&bound_key[0], hid); key_buffer_init(&bound_key[1], hid); @@ -1239,7 +1255,8 @@ test_btree_count_in_range(cache *cc, key_buffer_deinit(&bound_key[0]); key_buffer_deinit(&bound_key[1]); - platform_free(hid, bound_key); + + platform_free(&memfrag_bound_key); if (SUCCESS(rc)) platform_default_log("btree_test: btree_count_in_range test succeeded\n"); else @@ -1256,7 +1273,8 @@ test_btree_rough_iterator(cache *cc, { platform_default_log("btree_test: btree rough iterator test started\n"); - uint64 *root_addr = TYPED_ARRAY_MALLOC(hid, root_addr, num_trees); + platform_memfrag memfrag_root_addr; + uint64 *root_addr = TYPED_ARRAY_MALLOC(hid, root_addr, num_trees); platform_assert(root_addr); test_btree_create_packed_trees(cc, cfg, hid, num_trees, root_addr); @@ -1266,14 +1284,17 @@ test_btree_rough_iterator(cache *cc, uint64 num_pivots = 2 * num_trees; - key_buffer *pivot = TYPED_ARRAY_MALLOC(hid, pivot, num_pivots + 1); + platform_memfrag memfrag_pivot; + key_buffer *pivot = TYPED_ARRAY_MALLOC(hid, pivot, num_pivots + 1); platform_assert(pivot); - btree_iterator *rough_btree_itor = + platform_memfrag memfrag_rough_btree_itor; + btree_iterator *rough_btree_itor = TYPED_ARRAY_MALLOC(hid, rough_btree_itor, num_trees); platform_assert(rough_btree_itor); - iterator **rough_itor = TYPED_ARRAY_MALLOC(hid, rough_itor, num_trees); + platform_memfrag memfrag_rough_itor; + iterator **rough_itor = TYPED_ARRAY_MALLOC(hid, rough_itor, num_trees); platform_assert(rough_itor); for (uint64 tree_no = 0; tree_no < num_trees; tree_no++) { @@ -1366,13 +1387,15 @@ test_btree_rough_iterator(cache *cc, // for (uint64 tree_no = 0; tree_no < num_trees; tree_no++) { // btree_zap(cc, btree_cfg, root_addr[tree_no], PAGE_TYPE_BRANCH); //} - platform_free(hid, root_addr); + + platform_free(&memfrag_root_addr); + for (int i = 0; i < pivot_no; i++) { key_buffer_deinit(&pivot[i]); } - platform_free(hid, pivot); - platform_free(hid, rough_btree_itor); - platform_free(hid, rough_itor); + platform_free(&memfrag_pivot); + platform_free(&memfrag_rough_btree_itor); + platform_free(&memfrag_rough_itor); if (SUCCESS(rc)) { platform_default_log("btree_test: btree rough iterator test succeeded\n"); @@ -1393,13 +1416,15 @@ test_btree_merge_perf(cache *cc, btree_config *btree_cfg = cfg->mt_cfg->btree_cfg; - uint64 num_trees = arity * num_merges; - uint64 *root_addr = TYPED_ARRAY_MALLOC(hid, root_addr, num_trees); + platform_memfrag memfrag_root_addr; + uint64 num_trees = arity * num_merges; + uint64 *root_addr = TYPED_ARRAY_MALLOC(hid, root_addr, num_trees); platform_assert(root_addr); uint64 total_tuples = test_btree_create_packed_trees(cc, cfg, hid, num_trees, root_addr); + platform_memfrag memfrag_output_addr; uint64 *output_addr = TYPED_ARRAY_MALLOC(hid, output_addr, num_trees); platform_assert(output_addr); @@ -1407,10 +1432,12 @@ test_btree_merge_perf(cache *cc, uint64 max_key = (uint64)-1; - uint64 *pivot = TYPED_ARRAY_MALLOC(hid, pivot, arity); + platform_memfrag memfrag_pivot; + uint64 *pivot = TYPED_ARRAY_MALLOC(hid, pivot, arity); platform_assert(pivot); - key_buffer *pivot_key = TYPED_ARRAY_MALLOC(hid, pivot_key, arity + 1); + platform_memfrag memfrag_pivot_key; + key_buffer *pivot_key = TYPED_ARRAY_MALLOC(hid, pivot_key, arity + 1); platform_assert(pivot_key); uint64 start_time = platform_get_timestamp(); @@ -1424,11 +1451,13 @@ test_btree_merge_perf(cache *cc, } key_buffer_init_from_key(&pivot_key[arity], hid, POSITIVE_INFINITY_KEY); - btree_iterator *btree_itor_arr = + platform_memfrag memfrag_btree_itor_arr; + btree_iterator *btree_itor_arr = TYPED_ARRAY_MALLOC(hid, btree_itor_arr, arity); platform_assert(btree_itor_arr); - iterator **itor_arr = TYPED_ARRAY_MALLOC(hid, itor_arr, arity); + platform_memfrag memfrag_itor_arr; + iterator **itor_arr = TYPED_ARRAY_MALLOC(hid, itor_arr, arity); platform_assert(itor_arr); for (uint64 merge_no = 0; merge_no < num_merges; merge_no++) { @@ -1494,15 +1523,16 @@ test_btree_merge_perf(cache *cc, } platform_default_log("\n"); - platform_free(hid, root_addr); - platform_free(hid, output_addr); + platform_free(&memfrag_root_addr); + platform_free(&memfrag_output_addr); for (uint64 pivot_no = 0; pivot_no < arity + 1; pivot_no++) { key_buffer_deinit(&pivot_key[pivot_no]); } - platform_free(hid, pivot); - platform_free(hid, pivot_key); - platform_free(hid, btree_itor_arr); - platform_free(hid, itor_arr); + + platform_free(&memfrag_pivot); + platform_free(&memfrag_pivot_key); + platform_free(&memfrag_btree_itor_arr); + platform_free(&memfrag_itor_arr); return rc; } @@ -1567,8 +1597,9 @@ btree_test(int argc, char *argv[]) uint64 num_bg_threads[NUM_TASK_TYPES] = {0}; // no bg threads - data_config *data_cfg; - trunk_config *cfg = TYPED_MALLOC(hid, cfg); + data_config *data_cfg; + platform_memfrag memfrag_cfg; + trunk_config *cfg = TYPED_MALLOC(hid, cfg); rc = test_parse_args(cfg, &data_cfg, @@ -1616,6 +1647,7 @@ btree_test(int argc, char *argv[]) } } + platform_memfrag memfrag_io; platform_io_handle *io = TYPED_MALLOC(hid, io); platform_assert(io != NULL); rc = io_handle_init(io, &io_cfg, hid); @@ -1634,8 +1666,9 @@ btree_test(int argc, char *argv[]) rc_allocator_init( &al, &al_cfg, (io_handle *)io, hid, platform_get_module_id()); - clockcache *cc = TYPED_MALLOC(hid, cc); - rc = clockcache_init(cc, + platform_memfrag memfrag_cc; + clockcache *cc = TYPED_MALLOC(hid, cc); + rc = clockcache_init(cc, &cache_cfg, (io_handle *)io, (allocator *)&al, @@ -1680,16 +1713,16 @@ btree_test(int argc, char *argv[]) } clockcache_deinit(cc); - platform_free(hid, cc); + platform_free(&memfrag_cc); rc_allocator_deinit(&al); test_deinit_task_system(hid, &ts); rc = STATUS_OK; deinit_iohandle: io_handle_deinit(io); free_iohandle: - platform_free(hid, io); + platform_free(&memfrag_io); cleanup: - platform_free(hid, cfg); + platform_free(&memfrag_cfg); platform_heap_destroy(&hid); return SUCCESS(rc) ? 0 : -1; diff --git a/tests/functional/cache_test.c b/tests/functional/cache_test.c index d5b7e4d7..b97218e0 100644 --- a/tests/functional/cache_test.c +++ b/tests/functional/cache_test.c @@ -107,8 +107,11 @@ test_cache_basic(cache *cc, clockcache_config *cfg, platform_heap_id hid) uint32 extent_capacity = cfg->page_capacity / pages_per_extent; uint32 extents_to_allocate = 2 * extent_capacity; uint64 pages_to_allocate = extents_to_allocate * pages_per_extent; + + platform_memfrag memfrag_addr_arr; addr_arr = TYPED_ARRAY_MALLOC(hid, addr_arr, pages_to_allocate); - rc = cache_test_alloc_extents(cc, cfg, addr_arr, extents_to_allocate); + + rc = cache_test_alloc_extents(cc, cfg, addr_arr, extents_to_allocate); if (!SUCCESS(rc)) { /* no need to set status because we got here from an error status */ goto exit; @@ -126,7 +129,8 @@ test_cache_basic(cache *cc, clockcache_config *cfg, platform_heap_id hid) * Get all entries for read, verify ref counts, and release. Verify * that there are no dirty entries afterwards. */ - uint32 pages_allocated = extents_to_allocate * pages_per_extent; + uint32 pages_allocated = extents_to_allocate * pages_per_extent; + platform_memfrag memfrag_page_arr; page_arr = TYPED_ARRAY_MALLOC(hid, page_arr, cfg->page_capacity); if (page_arr == NULL) { rc = STATUS_NO_MEMORY; @@ -278,11 +282,11 @@ test_cache_basic(cache *cc, clockcache_config *cfg, platform_heap_id hid) exit: if (addr_arr) { - platform_free(hid, addr_arr); + platform_free(&memfrag_addr_arr); } if (page_arr) { - platform_free(hid, page_arr); + platform_free(&memfrag_page_arr); } if (SUCCESS(rc)) { @@ -482,6 +486,7 @@ test_cache_flush(cache *cc, uint64 pages_to_allocate = extents_to_allocate * pages_per_extent; platform_default_log("Allocate %d extents ... ", extents_to_allocate); + platform_memfrag memfrag_addr_arr; addr_arr = TYPED_ARRAY_MALLOC(hid, addr_arr, pages_to_allocate); t_start = platform_get_timestamp(); rc = cache_test_alloc_extents(cc, cfg, addr_arr, extents_to_allocate); @@ -557,7 +562,7 @@ test_cache_flush(cache *cc, exit: if (addr_arr) { - platform_free(hid, addr_arr); + platform_free(&memfrag_addr_arr); } if (SUCCESS(rc)) { @@ -591,6 +596,7 @@ typedef struct { page_handle **handle_arr; // page handles test_async_ctxt ctxt[READER_BATCH_SIZE]; // async_get() contexts platform_semaphore batch_sema; // batch semaphore + size_t handle_arr_size; // of memory allocated } test_params; void @@ -834,7 +840,9 @@ test_cache_async(cache *cc, { platform_status rc; uint32 total_threads = num_reader_threads + num_writer_threads; - test_params *params = + + platform_memfrag memfrag_params; + test_params *params = TYPED_ARRAY_ZALLOC(hid, params, num_reader_threads + num_writer_threads); uint32 i; uint64 *addr_arr = NULL; @@ -860,6 +868,8 @@ test_cache_async(cache *cc, num_reader_threads, num_writer_threads, working_set_percent); + + platform_memfrag memfrag_addr_arr; addr_arr = TYPED_ARRAY_MALLOC(hid, addr_arr, pages_to_allocate); rc = cache_test_alloc_extents(cc, cfg, addr_arr, extents_to_allocate); if (!SUCCESS(rc)) { @@ -870,6 +880,7 @@ test_cache_async(cache *cc, cache_flush(cc); cache_evict(cc, TRUE); cache_reset_stats(cc); + platform_memfrag memfrag; for (i = 0; i < total_threads; i++) { const bool32 is_reader = i < num_reader_threads ? TRUE : FALSE; @@ -888,11 +899,14 @@ test_cache_async(cache *cc, } else { params[i].sync_probability = 10; } - params[i].handle_arr = - TYPED_ARRAY_ZALLOC(hid, params[i].handle_arr, params[i].num_pages); + params[i].handle_arr = TYPED_ARRAY_ZALLOC_MF( + &memfrag, hid, params[i].handle_arr, params[i].num_pages); + params[i].handle_arr_size = memfrag_size(&memfrag); + params[i].ts = ts; params[i].hid = hid; params[i].logger = (i == 0) ? TRUE : FALSE; + /* * With multiple threads doing async_get() to the same page, it's * possible that async_get() returns retry. Not so with single @@ -925,8 +939,10 @@ test_cache_async(cache *cc, for (i = 0; i < total_threads; i++) { platform_thread_join(params[i].thread); } + for (i = 0; i < total_threads; i++) { - platform_free(hid, params[i].handle_arr); + platform_free_mem(hid, params[i].handle_arr, params[i].handle_arr_size); + params[i].handle_arr = NULL; } // Deallocate all the entries. for (uint32 i = 0; i < extents_to_allocate; i++) { @@ -938,8 +954,8 @@ test_cache_async(cache *cc, ref = allocator_dec_ref(al, addr, PAGE_TYPE_MISC); platform_assert(ref == AL_FREE); } - platform_free(hid, addr_arr); - platform_free(hid, params); + platform_free(&memfrag_addr_arr); + platform_free(&memfrag_params); cache_print_stats(Platform_default_log_handle, cc); platform_default_log("\n"); @@ -997,8 +1013,9 @@ cache_test(int argc, char *argv[]) platform_heap_create(platform_get_module_id(), 1 * GiB, use_shmem, &hid); platform_assert_status_ok(rc); - uint64 num_bg_threads[NUM_TASK_TYPES] = {0}; // no bg threads - trunk_config *splinter_cfg = TYPED_MALLOC(hid, splinter_cfg); + uint64 num_bg_threads[NUM_TASK_TYPES] = {0}; // no bg threads + platform_memfrag memfrag_splinter_cfg; + trunk_config *splinter_cfg = TYPED_MALLOC(hid, splinter_cfg); rc = test_parse_args(splinter_cfg, &data_cfg, @@ -1033,6 +1050,7 @@ cache_test(int argc, char *argv[]) goto cleanup; } + platform_memfrag memfrag_io; platform_io_handle *io = TYPED_MALLOC(hid, io); platform_assert(io != NULL); rc = io_handle_init(io, &io_cfg, hid); @@ -1051,8 +1069,9 @@ cache_test(int argc, char *argv[]) rc_allocator_init( &al, &al_cfg, (io_handle *)io, hid, platform_get_module_id()); - clockcache *cc = TYPED_MALLOC(hid, cc); - rc = clockcache_init(cc, + platform_memfrag memfrag_cc; + clockcache *cc = TYPED_MALLOC(hid, cc); + rc = clockcache_init(cc, &cache_cfg, (io_handle *)io, (allocator *)&al, @@ -1125,16 +1144,16 @@ cache_test(int argc, char *argv[]) platform_assert_status_ok(rc); clockcache_deinit(cc); - platform_free(hid, cc); + platform_free(&memfrag_cc); rc_allocator_deinit(&al); test_deinit_task_system(hid, &ts); rc = STATUS_OK; deinit_iohandle: io_handle_deinit(io); free_iohandle: - platform_free(hid, io); + platform_free(&memfrag_io); cleanup: - platform_free(hid, splinter_cfg); + platform_free(&memfrag_splinter_cfg); platform_heap_destroy(&hid); return SUCCESS(rc) ? 0 : -1; diff --git a/tests/functional/filter_test.c b/tests/functional/filter_test.c index 30476004..b6eb0e31 100644 --- a/tests/functional/filter_test.c +++ b/tests/functional/filter_test.c @@ -36,14 +36,26 @@ test_filter_basic(cache *cc, return STATUS_BAD_PARAM; } - uint32 **fp_arr = TYPED_ARRAY_MALLOC(hid, fp_arr, num_values); + platform_memfrag memfrag_fp_arr; + uint32 **fp_arr = TYPED_ARRAY_MALLOC(hid, fp_arr, num_values); + + // Technically, each fp_arr[i] might come from a differently sized + // memory fragment. So we should really track num_values fragments. + // The likelihood of this happening is low, so we skate a bit and + // only save-off one typical memory fragment representing the entire + // array. + platform_memfrag memfrag_fp_arr_i = {0}; + for (uint64 i = 0; i < num_values; i++) { - fp_arr[i] = TYPED_ARRAY_MALLOC(hid, fp_arr[i], num_fingerprints); + fp_arr[i] = TYPED_ARRAY_MALLOC_MF( + &memfrag_fp_arr_i, hid, fp_arr[i], num_fingerprints); } - bool32 *used_keys = + platform_memfrag memfrag_used_keys; + bool *used_keys = TYPED_ARRAY_ZALLOC(hid, used_keys, (num_values + 1) * num_fingerprints); + platform_memfrag memfrag_num_input_keys; uint32 *num_input_keys = TYPED_ARRAY_ZALLOC(hid, num_input_keys, num_values); DECLARE_AUTO_WRITABLE_BUFFER(keywb, hid); @@ -65,7 +77,7 @@ test_filter_basic(cache *cc, } } - platform_free(hid, used_keys); + platform_free(&memfrag_used_keys); routing_filter filter[MAX_FILTERS] = {{0}}; for (uint64 i = 0; i < num_values; i++) { @@ -89,7 +101,7 @@ test_filter_basic(cache *cc, num_input_keys[num_values - 1], num_unique); - platform_free(hid, num_input_keys); + platform_free(&memfrag_num_input_keys); for (uint64 i = 0; i < num_values; i++) { for (uint64 j = 0; j < num_fingerprints; j++) { @@ -133,11 +145,14 @@ test_filter_basic(cache *cc, out: if (fp_arr) { + // All fingerprints are expected to be of the same size. + size_t fp_size = memfrag_size(&memfrag_fp_arr_i); for (uint64 i = 0; i < num_values; i++) { - platform_free(hid, fp_arr[i]); + platform_free_mem(hid, fp_arr[i], fp_size); + fp_arr[i] = NULL; } } - platform_free(hid, fp_arr); + platform_free(&memfrag_fp_arr); return rc; } @@ -158,7 +173,8 @@ test_filter_perf(cache *cc, return STATUS_BAD_PARAM; } - uint32 *fp_arr = TYPED_ARRAY_MALLOC( + platform_memfrag memfrag_fp_arr; + uint32 *fp_arr = TYPED_ARRAY_MALLOC( hid, fp_arr, num_trees * num_values * num_fingerprints); if (fp_arr == NULL) { return STATUS_NO_MEMORY; @@ -176,8 +192,10 @@ test_filter_perf(cache *cc, } } - uint64 start_time = platform_get_timestamp(); - routing_filter *filter = TYPED_ARRAY_ZALLOC(hid, filter, num_trees); + uint64 start_time = platform_get_timestamp(); + platform_memfrag memfrag_filter; + routing_filter *filter = TYPED_ARRAY_ZALLOC(hid, filter, num_trees); + for (uint64 k = 0; k < num_trees; k++) { for (uint64 i = 0; i < num_values; i++) { routing_filter new_filter = {0}; @@ -259,10 +277,8 @@ test_filter_perf(cache *cc, for (uint64 i = 0; i < num_trees; i++) { routing_filter_zap(cc, &filter[i]); } - if (fp_arr) { - platform_free(hid, fp_arr); - } - platform_free(hid, filter); + platform_free(&memfrag_fp_arr); + platform_free(&memfrag_filter); return rc; } @@ -296,28 +312,36 @@ filter_test(int argc, char *argv[]) uint64 seed; test_message_generator gen; - if (argc > 1 && strncmp(argv[1], "--perf", sizeof("--perf")) == 0) { + // Move past the 1st arg which will be the driving tag, 'filter_test'. + argc--; + argv++; + + if (argc && strncmp(argv[0], "--perf", sizeof("--perf")) == 0) { run_perf_test = TRUE; - config_argc = argc - 2; - config_argv = argv + 2; - } else { - run_perf_test = FALSE; config_argc = argc - 1; config_argv = argv + 1; + } else { + run_perf_test = FALSE; + config_argc = argc; + config_argv = argv; } bool use_shmem = config_parse_use_shmem(config_argc, config_argv); // Create a heap for io, allocator, cache and splinter - platform_heap_id hid = NULL; - rc = - platform_heap_create(platform_get_module_id(), 1 * GiB, use_shmem, &hid); + platform_heap_id hid = NULL; + size_t heap_size = ((use_shmem ? 3 : 1) * GiB); + + rc = platform_heap_create( + platform_get_module_id(), heap_size, use_shmem, &hid); + platform_assert_status_ok(rc); uint64 num_memtable_bg_threads_unused = 0; uint64 num_normal_bg_threads_unused = 0; - trunk_config *cfg = TYPED_MALLOC(hid, cfg); + platform_memfrag memfrag_cfg; + trunk_config *cfg = TYPED_MALLOC(hid, cfg); rc = test_parse_args(cfg, &data_cfg, @@ -343,6 +367,7 @@ filter_test(int argc, char *argv[]) goto cleanup; } + platform_memfrag memfrag_io; platform_io_handle *io = TYPED_MALLOC(hid, io); platform_assert(io != NULL); rc = io_handle_init(io, &io_cfg, hid); @@ -358,6 +383,7 @@ filter_test(int argc, char *argv[]) &al, &allocator_cfg, (io_handle *)io, hid, platform_get_module_id()); platform_assert_status_ok(rc); + platform_memfrag memfrag_cc; cc = TYPED_MALLOC(hid, cc); platform_assert(cc); rc = clockcache_init(cc, @@ -404,15 +430,15 @@ filter_test(int argc, char *argv[]) } clockcache_deinit(cc); - platform_free(hid, cc); + platform_free(&memfrag_cc); rc_allocator_deinit(&al); task_system_destroy(hid, &ts); io_handle_deinit(io); free_iohandle: - platform_free(hid, io); + platform_free(&memfrag_io); r = 0; cleanup: - platform_free(hid, cfg); + platform_free(&memfrag_cfg); platform_heap_destroy(&hid); return r; diff --git a/tests/functional/io_apis_test.c b/tests/functional/io_apis_test.c index 3742f08e..ba69c602 100644 --- a/tests/functional/io_apis_test.c +++ b/tests/functional/io_apis_test.c @@ -244,6 +244,7 @@ splinter_io_apis_test(int argc, char *argv[]) // For this test, we allocate this structure. In a running Splinter // instance, this struct is nested inside the splinterdb{} handle. + platform_memfrag memfrag_io_hdl; platform_io_handle *io_hdl = TYPED_ZALLOC(hid, io_hdl); if (!io_hdl) { goto heap_destroy; @@ -431,7 +432,7 @@ splinter_io_apis_test(int argc, char *argv[]) io_free: if (pid > 0) { - platform_free(hid, io_hdl); + platform_free(&memfrag_io_hdl); } heap_destroy: if (pid > 0) { @@ -476,6 +477,7 @@ test_sync_writes(platform_heap_id hid, int page_size = (int)io_cfgp->page_size; + platform_memfrag memfrag_buf; // Allocate a buffer to do page I/O char *buf = TYPED_ARRAY_ZALLOC(hid, buf, page_size); if (!buf) { @@ -515,7 +517,7 @@ test_sync_writes(platform_heap_id hid, } free_buf: - platform_free(hid, buf); + platform_free(&memfrag_buf); out: return rc; } @@ -580,8 +582,12 @@ test_sync_reads(platform_heap_id hid, int page_size = (int)io_cfgp->page_size; // Allocate a buffer to do page I/O, and an expected results buffer - char *buf = TYPED_ARRAY_ZALLOC(hid, buf, page_size); - char *exp = TYPED_ARRAY_ZALLOC(hid, exp, page_size); + platform_memfrag memfrag_buf; + char *buf = TYPED_ARRAY_ZALLOC(hid, buf, page_size); + + platform_memfrag memfrag_exp; + char *exp = TYPED_ARRAY_ZALLOC(hid, exp, page_size); + memset(exp, stamp_char, page_size); platform_status rc = STATUS_OK; @@ -626,8 +632,8 @@ test_sync_reads(platform_heap_id hid, } free_buf: - platform_free(hid, buf); - platform_free(hid, exp); + platform_free(&memfrag_buf); + platform_free(&memfrag_exp); return rc; } @@ -820,13 +826,14 @@ test_async_reads(platform_heap_id hid, int page_size = (int)io_cfgp->page_size; // Allocate a buffer to do page I/O, and an expected results buffer - uint64 nbytes = (page_size * NUM_PAGES_RW_ASYNC_PER_THREAD); - char *buf = TYPED_ARRAY_ZALLOC(hid, buf, nbytes); + uint64 nbytes = (page_size * NUM_PAGES_RW_ASYNC_PER_THREAD); + platform_memfrag memfrag_buf; + char *buf = TYPED_ARRAY_ZALLOC(hid, buf, nbytes); if (!buf) { goto out; } - - char *exp = TYPED_ARRAY_ZALLOC(hid, exp, page_size); + platform_memfrag memfrag_exp; + char *exp = TYPED_ARRAY_ZALLOC(hid, exp, page_size); if (!exp) { goto free_buf; } @@ -871,9 +878,9 @@ test_async_reads(platform_heap_id hid, io_cleanup(ioh, NUM_PAGES_RW_ASYNC_PER_THREAD); - platform_free(hid, exp); + platform_free(&memfrag_exp); free_buf: - platform_free(hid, buf); + platform_free(&memfrag_buf); out: return rc; } diff --git a/tests/functional/log_test.c b/tests/functional/log_test.c index a30f9250..224fd064 100644 --- a/tests/functional/log_test.c +++ b/tests/functional/log_test.c @@ -163,6 +163,7 @@ test_log_perf(cache *cc, platform_heap_id hid) { + platform_memfrag memfrag_params; test_log_thread_params *params = TYPED_ARRAY_MALLOC(hid, params, num_threads); platform_assert(params); @@ -205,7 +206,7 @@ test_log_perf(cache *cc, / platform_timestamp_elapsed(start_time)); cleanup: - platform_free(hid, params); + platform_free(&memfrag_params); return ret; } @@ -272,8 +273,9 @@ log_test(int argc, char *argv[]) platform_heap_create(platform_get_module_id(), 1 * GiB, use_shmem, &hid); platform_assert_status_ok(status); - trunk_config *cfg = TYPED_MALLOC(hid, cfg); - uint64 num_bg_threads[NUM_TASK_TYPES] = {0}; // no bg threads + platform_memfrag memfrag_cfg; + trunk_config *cfg = TYPED_MALLOC(hid, cfg); + uint64 num_bg_threads[NUM_TASK_TYPES] = {0}; // no bg threads status = test_parse_args(cfg, &data_cfg, @@ -300,6 +302,7 @@ log_test(int argc, char *argv[]) goto cleanup; } + platform_memfrag memfrag_io; platform_io_handle *io = TYPED_MALLOC(hid, io); platform_assert(io != NULL); status = io_handle_init(io, &io_cfg, hid); @@ -320,7 +323,8 @@ log_test(int argc, char *argv[]) &al, &al_cfg, (io_handle *)io, hid, platform_get_module_id()); platform_assert_status_ok(status); - clockcache *cc = TYPED_MALLOC(hid, cc); + platform_memfrag memfrag_cc; + clockcache *cc = TYPED_MALLOC(hid, cc); platform_assert(cc != NULL); status = clockcache_init(cc, &cache_cfg, @@ -331,7 +335,8 @@ log_test(int argc, char *argv[]) platform_get_module_id()); platform_assert_status_ok(status); - shard_log *log = TYPED_MALLOC(hid, log); + platform_memfrag memfrag_log; + shard_log *log = TYPED_MALLOC(hid, log); platform_assert(log != NULL); if (run_perf_test) { ret = test_log_perf( @@ -367,16 +372,16 @@ log_test(int argc, char *argv[]) } clockcache_deinit(cc); - platform_free(hid, log); - platform_free(hid, cc); + platform_free(&memfrag_log); + platform_free(&memfrag_cc); rc_allocator_deinit(&al); test_deinit_task_system(hid, &ts); deinit_iohandle: io_handle_deinit(io); free_iohandle: - platform_free(hid, io); + platform_free(&memfrag_io); cleanup: - platform_free(hid, cfg); + platform_free(&memfrag_cfg); platform_heap_destroy(&hid); return rc == 0 ? 0 : -1; diff --git a/tests/functional/random.h b/tests/functional/random.h index 9d8e2f03..13c23ac5 100644 --- a/tests/functional/random.h +++ b/tests/functional/random.h @@ -70,6 +70,17 @@ random_next_uint32(random_state *rs) // IN/OUT return (uint32)random_next_uint64(rs); } +static inline int32 +random_next_int(random_state *rs, /* IN/OUT */ uint32 min, uint32 max) +{ + debug_assert(min > 0); + debug_assert(max > 0); + debug_assert((min < max), "min=%u, max=%u", min, max); + uint32 range = (max - min); + + return (min + (random_next_uint32(rs) % range)); +} + static inline void random_bytes(random_state *rs, char *v, size_t n) { diff --git a/tests/functional/splinter_test.c b/tests/functional/splinter_test.c index c0523ee7..1b058a2f 100644 --- a/tests/functional/splinter_test.c +++ b/tests/functional/splinter_test.c @@ -127,8 +127,11 @@ test_trunk_insert_thread(void *arg) uint8 num_tables = params->num_tables; platform_heap_id heap_id = platform_get_heap_id(); platform_assert(num_tables <= 8); + + platform_memfrag memfrag_insert_base; uint64 *insert_base = TYPED_ARRAY_ZALLOC(heap_id, insert_base, num_tables); - uint8 done = 0; + + uint8 done = 0; uint64 num_inserts = 0; timestamp next_check_time = platform_get_timestamp(); @@ -212,7 +215,8 @@ test_trunk_insert_thread(void *arg) out: merge_accumulator_deinit(&msg); params->rc = STATUS_OK; - platform_free(platform_get_heap_id(), insert_base); + + platform_free(&memfrag_insert_base); for (uint64 i = 0; i < num_tables; i++) { trunk_handle *spl = spl_tables[i]; trunk_perform_tasks(spl); @@ -240,8 +244,11 @@ test_trunk_lookup_thread(void *arg) platform_heap_id heap_id = platform_get_heap_id(); platform_assert(num_tables <= 8); + + platform_memfrag memfrag_lookup_base; uint64 *lookup_base = TYPED_ARRAY_ZALLOC(heap_id, lookup_base, num_tables); - uint8 done = 0; + + uint8 done = 0; merge_accumulator data; merge_accumulator_init(&data, heap_id); @@ -336,7 +343,7 @@ test_trunk_lookup_thread(void *arg) out: merge_accumulator_deinit(&data); params->rc = STATUS_OK; - platform_free(platform_get_heap_id(), lookup_base); + platform_free(&memfrag_lookup_base); } static void @@ -363,8 +370,11 @@ test_trunk_range_thread(void *arg) platform_heap_id heap_id = platform_get_heap_id(); platform_assert(num_tables <= 8); + + platform_memfrag memfrag_range_base; uint64 *range_base = TYPED_ARRAY_ZALLOC(heap_id, range_base, num_tables); - uint8 done = 0; + + uint8 done = 0; bool32 verbose_progress = test_show_verbose_progress(test_cfg->test_exec_cfg); @@ -448,7 +458,7 @@ test_trunk_range_thread(void *arg) } out: params->rc = STATUS_OK; - platform_free(platform_get_heap_id(), range_base); + platform_free(&memfrag_range_base); } /* @@ -688,9 +698,11 @@ test_trunk_insert_lookup_thread(void *arg) random_init(&rs, seed, 0); + platform_memfrag memfrag_bases[NUM_OP_TYPES]; + for (uint8 i = 0; i < NUM_OP_TYPES; i++) { - bases[i] = - TYPED_ARRAY_ZALLOC(platform_get_heap_id(), bases[i], num_tables); + bases[i] = TYPED_ARRAY_ZALLOC_MF( + &memfrag_bases[i], platform_get_heap_id(), bases[i], num_tables); granularities[i] = params->num_ops_per_thread[i]; offsets[i] = 0; @@ -759,11 +771,18 @@ test_trunk_insert_lookup_thread(void *arg) params->rc = STATUS_OK; for (uint8 i = 0; i < NUM_OP_TYPES; i++) { - platform_free(platform_get_heap_id(), bases[i]); + platform_memfrag *mf = &memfrag_bases[i]; + platform_free(mf); } } - +/* + * Multiple memory allocations are performed here. + * + * Returns: + * - platform_status - to indicate success / failure. + * - size of memory fragment allocated for top-level array of trunk_handles. + */ static platform_status test_trunk_create_tables(trunk_handle ***spl_handles, trunk_config *cfg, @@ -772,12 +791,15 @@ test_trunk_create_tables(trunk_handle ***spl_handles, task_system *ts, platform_heap_id hid, uint8 num_tables, - uint8 num_caches) + uint8 num_caches, + size_t *spl_tables_mf_size) // Out { + platform_memfrag memfrag_spl_tables; trunk_handle **spl_tables = TYPED_ARRAY_ZALLOC(hid, spl_tables, num_tables); if (spl_tables == NULL) { return STATUS_NO_MEMORY; } + *spl_tables_mf_size = memfrag_size(&memfrag_spl_tables); for (uint8 spl_idx = 0; spl_idx < num_tables; spl_idx++) { cache *cache_to_use = num_caches > 1 ? cc[spl_idx] : *cc; @@ -791,7 +813,7 @@ test_trunk_create_tables(trunk_handle ***spl_handles, for (uint8 del_idx = 0; del_idx < spl_idx; del_idx++) { trunk_destroy(spl_tables[del_idx]); } - platform_free(hid, spl_tables); + platform_free(&memfrag_spl_tables); return STATUS_NO_MEMORY; } } @@ -802,12 +824,15 @@ test_trunk_create_tables(trunk_handle ***spl_handles, static void test_trunk_destroy_tables(trunk_handle **spl_tables, platform_heap_id hid, - uint8 num_tables) + uint8 num_tables, + size_t spl_tables_mf_size) { for (uint8 spl_idx = 0; spl_idx < num_tables; spl_idx++) { trunk_destroy(spl_tables[spl_idx]); } - platform_free(hid, spl_tables); + // clang-format off + platform_free_mem(hid, spl_tables, spl_tables_mf_size); + // clang-format on } /* @@ -1350,23 +1375,37 @@ test_splinter_perf(trunk_config *cfg, trunk_handle **spl_tables; platform_status rc; - rc = test_trunk_create_tables( - &spl_tables, cfg, al, cc, ts, hid, num_tables, num_caches); + size_t spl_tables_mf_size; + rc = test_trunk_create_tables(&spl_tables, + cfg, + al, + cc, + ts, + hid, + num_tables, + num_caches, + &spl_tables_mf_size); if (!SUCCESS(rc)) { platform_error_log("Failed to create splinter table(s): %s\n", platform_status_to_string(rc)); return rc; } - uint64 *per_table_inserts = + platform_memfrag memfrag_per_table_inserts; + uint64 *per_table_inserts = TYPED_ARRAY_MALLOC(hid, per_table_inserts, num_tables); - uint64 *per_table_ranges = + + platform_memfrag memfrag_per_table_ranges; + uint64 *per_table_ranges = TYPED_ARRAY_MALLOC(hid, per_table_ranges, num_tables); - uint64 *curr_op = TYPED_ARRAY_ZALLOC(hid, curr_op, num_tables); + + platform_memfrag memfrag_curr_op; + uint64 *curr_op = TYPED_ARRAY_ZALLOC(hid, curr_op, num_tables); uint64 num_threads = MAX(num_insert_threads, num_lookup_threads); num_threads = MAX(num_threads, num_range_threads); + platform_memfrag memfrag_params; test_splinter_thread_params *params = TYPED_ARRAY_ZALLOC(hid, params, num_threads); @@ -1441,15 +1480,15 @@ test_splinter_perf(trunk_config *cfg, } destroy_splinter: - test_trunk_destroy_tables(spl_tables, hid, num_tables); + test_trunk_destroy_tables(spl_tables, hid, num_tables, spl_tables_mf_size); platform_default_log("After destroy:\n"); for (uint8 idx = 0; idx < num_caches; idx++) { cache_print_stats(Platform_default_log_handle, cc[idx]); } - platform_free(hid, params); - platform_free(hid, curr_op); - platform_free(hid, per_table_ranges); - platform_free(hid, per_table_inserts); + platform_free(&memfrag_params); + platform_free(&memfrag_curr_op); + platform_free(&memfrag_per_table_ranges); + platform_free(&memfrag_per_table_inserts); return rc; } @@ -1475,21 +1514,34 @@ test_splinter_periodic(trunk_config *cfg, trunk_handle **spl_tables; platform_status rc; - rc = test_trunk_create_tables( - &spl_tables, cfg, al, cc, ts, hid, num_tables, num_caches); + size_t spl_tables_mf_size; + rc = test_trunk_create_tables(&spl_tables, + cfg, + al, + cc, + ts, + hid, + num_tables, + num_caches, + &spl_tables_mf_size); if (!SUCCESS(rc)) { platform_error_log("Failed to create splinter table(s): %s\n", platform_status_to_string(rc)); return rc; } - uint64 tuple_size, num_inserts; - uint64 *per_table_inserts = + uint64 tuple_size, num_inserts; + platform_memfrag memfrag_per_table_inserts; + uint64 *per_table_inserts = TYPED_ARRAY_MALLOC(hid, per_table_inserts, num_tables); - uint64 *per_table_ranges = + + platform_memfrag memfrag_per_table_ranges; + uint64 *per_table_ranges = TYPED_ARRAY_MALLOC(hid, per_table_ranges, num_tables); - uint64 *curr_op = TYPED_ARRAY_ZALLOC(hid, curr_op, num_tables); - uint64 total_inserts = 0; + + platform_memfrag memfrag_curr_op; + uint64 *curr_op = TYPED_ARRAY_ZALLOC(hid, curr_op, num_tables); + uint64 total_inserts = 0; for (uint8 i = 0; i < num_tables; i++) { tuple_size = cfg[i].data_cfg->max_key_size @@ -1512,6 +1564,7 @@ test_splinter_periodic(trunk_config *cfg, num_threads = num_range_threads; } + platform_memfrag memfrag_params; test_splinter_thread_params *params = TYPED_ARRAY_ZALLOC(hid, params, num_threads); for (uint64 i = 0; i < num_threads; i++) { @@ -1916,15 +1969,15 @@ test_splinter_periodic(trunk_config *cfg, } destroy_splinter: - test_trunk_destroy_tables(spl_tables, hid, num_tables); + test_trunk_destroy_tables(spl_tables, hid, num_tables, spl_tables_mf_size); platform_default_log("After destroy:\n"); for (uint8 idx = 0; idx < num_caches; idx++) { cache_print_stats(Platform_default_log_handle, cc[idx]); } - platform_free(hid, params); - platform_free(hid, curr_op); - platform_free(hid, per_table_ranges); - platform_free(hid, per_table_inserts); + platform_free(&memfrag_params); + platform_free(&memfrag_curr_op); + platform_free(&memfrag_per_table_ranges); + platform_free(&memfrag_per_table_inserts); return rc; } @@ -1966,16 +2019,27 @@ test_splinter_parallel_perf(trunk_config *cfg, platform_assert(num_inserts_per_thread <= num_lookups_per_thread); - rc = test_trunk_create_tables( - &spl_tables, cfg, al, cc, ts, hid, num_tables, num_caches); + size_t spl_tables_mf_size; + rc = test_trunk_create_tables(&spl_tables, + cfg, + al, + cc, + ts, + hid, + num_tables, + num_caches, + &spl_tables_mf_size); if (!SUCCESS(rc)) { platform_error_log("Failed to create splinter table(s): %s\n", platform_status_to_string(rc)); return rc; } - uint64 *per_table_inserts = + platform_memfrag memfrag_per_table_inserts; + uint64 *per_table_inserts = TYPED_ARRAY_MALLOC(hid, per_table_inserts, num_tables); + + platform_memfrag memfrag_curr_insert_op; uint64 *curr_insert_op = TYPED_ARRAY_ZALLOC(hid, curr_insert_op, num_tables); // This bit here onwards is very similar to splinter_perf_inserts(), but we @@ -1986,6 +2050,7 @@ test_splinter_parallel_perf(trunk_config *cfg, total_inserts = compute_per_table_inserts(per_table_inserts, cfg, test_cfg, num_tables); + platform_memfrag memfrag_params; test_splinter_thread_params *params = TYPED_ARRAY_ZALLOC(hid, params, num_threads); @@ -2126,14 +2191,14 @@ test_splinter_parallel_perf(trunk_config *cfg, } destroy_splinter: - test_trunk_destroy_tables(spl_tables, hid, num_tables); + test_trunk_destroy_tables(spl_tables, hid, num_tables, spl_tables_mf_size); platform_default_log("After destroy:\n"); for (uint8 idx = 0; idx < num_caches; idx++) { cache_print_stats(Platform_default_log_handle, cc[idx]); } - platform_free(hid, params); - platform_free(hid, curr_insert_op); - platform_free(hid, per_table_inserts); + platform_free(&memfrag_params); + platform_free(&memfrag_curr_insert_op); + platform_free(&memfrag_per_table_inserts); return rc; } @@ -2157,19 +2222,31 @@ test_splinter_delete(trunk_config *cfg, trunk_handle **spl_tables; platform_status rc; - rc = test_trunk_create_tables( - &spl_tables, cfg, al, cc, ts, hid, num_tables, num_caches); + size_t spl_tables_mf_size; + rc = test_trunk_create_tables(&spl_tables, + cfg, + al, + cc, + ts, + hid, + num_tables, + num_caches, + &spl_tables_mf_size); if (!SUCCESS(rc)) { platform_error_log("Failed to initialize splinter table(s): %s\n", platform_status_to_string(rc)); return rc; } - uint64 tuple_size, num_inserts; - uint64 *per_table_inserts = + uint64 tuple_size, num_inserts; + + platform_memfrag memfrag_per_table_inserts; + uint64 *per_table_inserts = TYPED_ARRAY_MALLOC(hid, per_table_inserts, num_tables); - uint64 *curr_op = TYPED_ARRAY_ZALLOC(hid, curr_op, num_tables); - uint64 total_inserts = 0; + + platform_memfrag memfrag_curr_op; + uint64 *curr_op = TYPED_ARRAY_ZALLOC(hid, curr_op, num_tables); + uint64 total_inserts = 0; for (uint8 i = 0; i < num_tables; i++) { tuple_size = cfg[i].data_cfg->max_key_size @@ -2183,6 +2260,7 @@ test_splinter_delete(trunk_config *cfg, if (num_lookup_threads > num_threads) { num_threads = num_lookup_threads; } + platform_memfrag memfrag_params; test_splinter_thread_params *params = TYPED_ARRAY_MALLOC(hid, params, num_threads); platform_assert(params); @@ -2347,14 +2425,14 @@ test_splinter_delete(trunk_config *cfg, } destroy_splinter: - test_trunk_destroy_tables(spl_tables, hid, num_tables); + test_trunk_destroy_tables(spl_tables, hid, num_tables, spl_tables_mf_size); platform_default_log("After destroy:\n"); for (uint8 idx = 0; idx < num_caches; idx++) { cache_print_stats(Platform_default_log_handle, cc[idx]); } - platform_free(hid, params); - platform_free(hid, curr_op); - platform_free(hid, per_table_inserts); + platform_free(&memfrag_params); + platform_free(&memfrag_curr_op); + platform_free(&memfrag_per_table_inserts); return rc; } @@ -2680,8 +2758,9 @@ splinter_test(int argc, char *argv[]) /* * 2. Parse test_config options, see test_config_usage() */ + platform_memfrag memfrag_test_cfg; + test_config *test_cfg = TYPED_ARRAY_MALLOC(hid, test_cfg, num_tables); - test_config *test_cfg = TYPED_ARRAY_MALLOC(hid, test_cfg, num_tables); for (uint8 i = 0; i < num_tables; i++) { test_config_set_defaults(test, &test_cfg[i]); @@ -2702,9 +2781,12 @@ splinter_test(int argc, char *argv[]) /* * 3. Parse trunk_config options, see config_usage() */ - trunk_config *splinter_cfg = + platform_memfrag memfrag_splinter_cfg; + trunk_config *splinter_cfg = TYPED_ARRAY_MALLOC(hid, splinter_cfg, num_tables); + data_config *data_cfg; + platform_memfrag memfrag_cache_cfg; clockcache_config *cache_cfg = TYPED_ARRAY_MALLOC(hid, cache_cfg, num_tables); @@ -2764,6 +2846,7 @@ splinter_test(int argc, char *argv[]) io_cfg.kernel_queue_size); } + platform_memfrag memfrag_io; platform_io_handle *io = TYPED_MALLOC(hid, io); platform_assert(io != NULL); rc = io_handle_init(io, &io_cfg, hid); @@ -2783,8 +2866,10 @@ splinter_test(int argc, char *argv[]) &al, &al_cfg, (io_handle *)io, hid, platform_get_module_id()); platform_error_log("Running splinter_test with %d caches\n", num_caches); - clockcache *cc = TYPED_ARRAY_MALLOC(hid, cc, num_caches); + platform_memfrag memfrag_cc; + clockcache *cc = TYPED_ARRAY_MALLOC(hid, cc, num_caches); platform_assert(cc != NULL); + for (uint8 idx = 0; idx < num_caches; idx++) { rc = clockcache_init(&cc[idx], &cache_cfg[idx], @@ -2798,7 +2883,8 @@ splinter_test(int argc, char *argv[]) allocator *alp = (allocator *)&al; // Allocate an array of cache pointers to pass around. - cache **caches = TYPED_ARRAY_MALLOC(hid, caches, num_caches); + platform_memfrag memfrag_caches; + cache **caches = TYPED_ARRAY_MALLOC(hid, caches, num_caches); platform_assert(caches != NULL); for (uint8 i = 0; i < num_caches; i++) { caches[i] = (cache *)&cc[i]; @@ -2940,19 +3026,22 @@ splinter_test(int argc, char *argv[]) for (uint8 idx = 0; idx < num_caches; idx++) { clockcache_deinit(&cc[idx]); } - platform_free(hid, caches); - platform_free(hid, cc); + + platform_free(&memfrag_caches); + + platform_free(&memfrag_cc); + allocator_assert_noleaks(alp); rc_allocator_deinit(&al); test_deinit_task_system(hid, &ts); handle_deinit: io_handle_deinit(io); io_free: - platform_free(hid, io); + platform_free(&memfrag_io); cfg_free: - platform_free(hid, cache_cfg); - platform_free(hid, splinter_cfg); - platform_free(hid, test_cfg); + platform_free(&memfrag_cache_cfg); + platform_free(&memfrag_splinter_cfg); + platform_free(&memfrag_test_cfg); heap_destroy: platform_heap_destroy(&hid); diff --git a/tests/functional/splinter_test.h b/tests/functional/splinter_test.h index 5511a54d..f857e360 100644 --- a/tests/functional/splinter_test.h +++ b/tests/functional/splinter_test.h @@ -84,8 +84,10 @@ test_config_parse(test_config *cfg, char key_type[MAX_STRING_LENGTH]; } temp_config; - temp_config *temp_cfg = + platform_memfrag memfrag_temp_cfg; + temp_config *temp_cfg = TYPED_ARRAY_MALLOC(platform_get_heap_id(), temp_cfg, num_config); + for (i = 0; i < argc; i++) { // Don't be mislead; this is not dead-code. See the config macro expansion if (0) { @@ -129,7 +131,7 @@ test_config_parse(test_config *cfg, } } out: - platform_free(platform_get_heap_id(), temp_cfg); + platform_free(&memfrag_temp_cfg); return rc; } diff --git a/tests/functional/test.h b/tests/functional/test.h index 9cd04542..db0be22d 100644 --- a/tests/functional/test.h +++ b/tests/functional/test.h @@ -315,7 +315,8 @@ test_parse_args_n(trunk_config *splinter_cfg, // OUT uint8 i; // Allocate memory and setup default configs for up to n-instances - master_config *master_cfg = + platform_memfrag memfrag_master_cfg = {0}; + master_config *master_cfg = TYPED_ARRAY_MALLOC(platform_get_heap_id(), master_cfg, num_config); for (i = 0; i < num_config; i++) { config_set_defaults(&master_cfg[i]); @@ -351,7 +352,7 @@ test_parse_args_n(trunk_config *splinter_cfg, // OUT } out: - platform_free(platform_get_heap_id(), master_cfg); + platform_free(&memfrag_master_cfg); return rc; } diff --git a/tests/functional/test_async.c b/tests/functional/test_async.c index 7d9b1723..d9e9310f 100644 --- a/tests/functional/test_async.c +++ b/tests/functional/test_async.c @@ -72,9 +72,11 @@ async_ctxt_init(platform_heap_id hid, // IN // max_async_inflight can be zero platform_assert(max_async_inflight <= TEST_MAX_ASYNC_INFLIGHT); + platform_memfrag memfrag_async_lookup; async_lookup = TYPED_FLEXIBLE_STRUCT_MALLOC(hid, async_lookup, ctxt, max_async_inflight); platform_assert(async_lookup); + async_lookup->mf_size = memfrag_size(&memfrag_async_lookup); async_lookup->max_async_inflight = max_async_inflight; async_lookup->avail_q = pcq_alloc(hid, max_async_inflight); platform_assert(async_lookup->avail_q); @@ -97,14 +99,14 @@ void async_ctxt_deinit(platform_heap_id hid, test_async_lookup *async_lookup) { platform_assert(pcq_is_full(async_lookup->avail_q)); - pcq_free(hid, async_lookup->avail_q); + pcq_free(hid, &async_lookup->avail_q); platform_assert(pcq_is_empty(async_lookup->ready_q)); - pcq_free(hid, async_lookup->ready_q); + pcq_free(hid, &async_lookup->ready_q); for (uint64 i = 0; i < async_lookup->max_async_inflight; i++) { key_buffer_deinit(&async_lookup->ctxt[i].key); merge_accumulator_deinit(&async_lookup->ctxt[i].data); } - platform_free(hid, async_lookup); + platform_free_mem(hid, async_lookup, async_lookup->mf_size); } diff --git a/tests/functional/test_async.h b/tests/functional/test_async.h index 1c268b2c..c29a7c99 100644 --- a/tests/functional/test_async.h +++ b/tests/functional/test_async.h @@ -38,6 +38,7 @@ typedef struct { uint32 max_async_inflight; pcq *ready_q; pcq *avail_q; + size_t mf_size; // of memory fragment allocated for this struct test_async_ctxt ctxt[]; } test_async_lookup; diff --git a/tests/functional/test_functionality.c b/tests/functional/test_functionality.c index bfd95fa6..10391ab8 100644 --- a/tests/functional/test_functionality.c +++ b/tests/functional/test_functionality.c @@ -236,6 +236,7 @@ verify_range_against_shadow(trunk_handle *spl, platform_assert(start_index <= sharr->nkeys); platform_assert(end_index <= sharr->nkeys); + platform_memfrag memfrag_range_itor; trunk_range_iterator *range_itor = TYPED_MALLOC(hid, range_itor); platform_assert(range_itor != NULL); status = trunk_range_iterator_init(spl, @@ -315,7 +316,7 @@ verify_range_against_shadow(trunk_handle *spl, trunk_range_iterator_deinit(range_itor); out: - platform_free(hid, range_itor); + platform_free(&memfrag_range_itor); return status; } @@ -499,7 +500,7 @@ validate_tree_against_shadow(trunk_handle *spl, rc = verify_against_shadow(spl, &sharr, async_lookup); if (!SUCCESS(rc)) { - platform_free(hid, async_lookup); + platform_free_mem(hid, async_lookup, async_lookup->mf_size); platform_error_log("Failed to verify inserted items in Splinter: %s\n", platform_status_to_string(rc)); goto cleanup; @@ -648,14 +649,16 @@ test_functionality(allocator *al, platform_error_log("Functional test started with %d tables\n", num_tables); platform_assert(cc != NULL); + platform_memfrag memfrag_spl_tables; trunk_handle **spl_tables = TYPED_ARRAY_ZALLOC(hid, spl_tables, num_tables); platform_assert(spl_tables != NULL); + platform_memfrag memfrag_shadows; test_splinter_shadow_tree **shadows = TYPED_ARRAY_ZALLOC(hid, shadows, num_tables); - platform_assert(shadows != NULL); + platform_memfrag memfrag_splinters; allocator_root_id *splinters = TYPED_ARRAY_ZALLOC(hid, splinters, num_tables); @@ -864,8 +867,8 @@ test_functionality(allocator *al, if (async_lookup) { async_ctxt_deinit(hid, async_lookup); } - platform_free(hid, spl_tables); - platform_free(hid, splinters); - platform_free(hid, shadows); + platform_free(&memfrag_spl_tables); + platform_free(&memfrag_splinters); + platform_free(&memfrag_shadows); return status; } diff --git a/tests/functional/test_splinter_shadow.c b/tests/functional/test_splinter_shadow.c index 228cec1e..a6301b5b 100644 --- a/tests/functional/test_splinter_shadow.c +++ b/tests/functional/test_splinter_shadow.c @@ -98,11 +98,13 @@ test_splinter_shadow_create(test_splinter_shadow_tree **tree, { platform_status rc = STATUS_NO_MEMORY; + platform_memfrag memfrag_shadow; test_splinter_shadow_tree *shadow = TYPED_ZALLOC(hid, shadow); if (shadow == NULL) { platform_default_log("Failed to allocate memory for shadow init"); return rc; } + shadow->mf_size = memfrag_size(&memfrag_shadow); /* * XXX : We are allocating for the worst case here. In the future, if need @@ -115,7 +117,7 @@ test_splinter_shadow_create(test_splinter_shadow_tree **tree, sizeof(test_splinter_shadow_node) * max_operations); if (!SUCCESS(rc)) { platform_default_log("Failed to pre allocate nodes for shadow tree\n"); - platform_free(hid, shadow); + platform_free(&memfrag_shadow); return rc; } shadow->nodes = platform_buffer_getaddr(&shadow->nodes_buffer); @@ -264,7 +266,7 @@ test_splinter_shadow_destroy(platform_heap_id hid, { platform_buffer_deinit(&tree->nodes_buffer); tree->numKeys = 0; - platform_free(hid, tree); + platform_free_mem(hid, tree, tree->mf_size); } /* diff --git a/tests/functional/test_splinter_shadow.h b/tests/functional/test_splinter_shadow.h index 4dcc2da6..88de8c66 100644 --- a/tests/functional/test_splinter_shadow.h +++ b/tests/functional/test_splinter_shadow.h @@ -27,6 +27,7 @@ typedef struct test_splinter_shadow_tree { uint64 currentAllocIdx; buffer_handle nodes_buffer; test_splinter_shadow_node *nodes; + size_t mf_size; } test_splinter_shadow_tree; diff --git a/tests/functional/ycsb_test.c b/tests/functional/ycsb_test.c index 01f87f0f..59fa8fe9 100644 --- a/tests/functional/ycsb_test.c +++ b/tests/functional/ycsb_test.c @@ -248,6 +248,7 @@ typedef struct ycsb_op { uint64 range_len; uint64 start_time; uint64 end_time; + size_t mf_size; bool32 found; } ycsb_op; @@ -293,6 +294,7 @@ typedef struct ycsb_log_params { latency_tables tables; task_system *ts; + size_t mf_size; // of memory fragment allocated. } ycsb_log_params; typedef struct ycsb_phase { @@ -303,6 +305,7 @@ typedef struct ycsb_phase { running_times times; latency_tables tables; char *measurement_command; + size_t mf_size; // of memory fragment allocated. } ycsb_phase; static void @@ -429,6 +432,7 @@ run_ycsb_phase(trunk_handle *spl, for (i = 0; i < phase->nlogs; i++) nthreads += phase->params[i].nthreads; + platform_memfrag memfrag_threads; threads = TYPED_ARRAY_MALLOC(hid, threads, nthreads); if (threads == NULL) return -1; @@ -470,11 +474,12 @@ run_ycsb_phase(trunk_handle *spl, } nthreads--; } - platform_free(hid, threads); + platform_free(&memfrag_threads); if (phase->measurement_command) { - const size_t bufsize = 1024; - char *filename = TYPED_ARRAY_MALLOC(hid, filename, bufsize); + const size_t bufsize = 1024; + platform_memfrag memfrag_filename; + char *filename = TYPED_ARRAY_MALLOC(hid, filename, bufsize); platform_assert(filename); snprintf(filename, bufsize, "%s.measurement", phase->name); FILE *measurement_output = fopen(filename, "wb"); @@ -482,24 +487,26 @@ run_ycsb_phase(trunk_handle *spl, FILE *measurement_cmd = popen(phase->measurement_command, "r"); platform_assert(measurement_cmd != NULL); - char *buffer = TYPED_ARRAY_MALLOC(hid, buffer, bufsize); - size_t num_read; - size_t num_written; + platform_memfrag memfrag_buffer; + char *buffer = TYPED_ARRAY_MALLOC(hid, buffer, bufsize); + size_t num_read; + size_t num_written; do { num_read = fread(buffer, 1, bufsize, measurement_cmd); num_written = fwrite(buffer, 1, num_read, measurement_output); if (num_written != num_read) { platform_error_log( "Could not write to measurement output file %s\n", filename); - platform_free(hid, filename); - platform_free(hid, buffer); + platform_free(&memfrag_filename); + platform_free(&memfrag_buffer); exit(1); } } while (!feof(measurement_cmd)); fclose(measurement_output); pclose(measurement_cmd); - platform_free(hid, filename); - platform_free(hid, buffer); + + platform_free(&memfrag_filename); + platform_free(&memfrag_buffer); } return success; @@ -532,6 +539,7 @@ typedef struct parse_ycsb_log_req { bool32 lock; uint64 start_line; uint64 end_line; + size_t mf_size; uint64 *num_ops; ycsb_op **ycsb_ops; uint64 *max_range_len; @@ -564,14 +572,16 @@ parse_ycsb_log_file(void *arg) } uint64 num_lines = req->end_line - req->start_line; - ycsb_op *result = TYPED_ARRAY_MALLOC(hid, result, num_lines); + platform_memfrag memfrag_result; + ycsb_op *result = TYPED_ARRAY_MALLOC(hid, result, num_lines); if (result == NULL) { platform_error_log("Failed to allocate memory for log\n"); goto close_file; } + result->mf_size = memfrag_size(&memfrag_result); if (lock && mlock(result, num_lines * sizeof(ycsb_op))) { platform_error_log("Failed to lock log into RAM.\n"); - platform_free(hid, result); + platform_free(&memfrag_result); goto close_file; } @@ -612,19 +622,21 @@ parse_ycsb_log_file(void *arg) close_file: if (buffer) { - platform_free(hid, buffer); + platform_free_heap(PROCESS_PRIVATE_HEAP_ID, buffer); } fclose(fp); platform_assert(result != NULL); *req->ycsb_ops = result; - platform_free(hid, req); + platform_free_mem(hid, req, req->mf_size); } +// RESOLVE: Fn is never called. Should it be called at end of +// parse_ycsb_log_file()? void unload_ycsb_log(ycsb_op *log, uint64 num_ops) { munlock(log, num_ops * sizeof(*log)); - platform_free(platform_get_heap_id(), log); + platform_free_mem(platform_get_heap_id(), log, log->mf_size); } static void @@ -711,14 +723,29 @@ load_ycsb_logs(int argc, // platform_assert(rc == 0); // platform_free(hid, resize_cgroup_command); - ycsb_phase *phases = TYPED_ARRAY_MALLOC(hid, phases, _nphases); - log_size_bytes += _nphases * sizeof(ycsb_phase); + platform_memfrag memfrag_phases; + ycsb_phase *phases = TYPED_ARRAY_MALLOC(hid, phases, _nphases); + platform_assert(phases); + + // Ensure that memset() is not clobbering memory ... + size_t nbytes = (_nphases * sizeof(*phases)); + platform_assert(memfrag_size(&memfrag_phases) >= nbytes); + phases->mf_size = memfrag_size(&memfrag_phases); + memset(phases, 0, nbytes); + + log_size_bytes += nbytes; + + platform_memfrag memfrag_params; ycsb_log_params *params = TYPED_ARRAY_MALLOC(hid, params, num_threads); - log_size_bytes += num_threads * sizeof(ycsb_log_params); - platform_assert(phases && params); + platform_assert(params); + + // Ensure that memset() is not clobbering memory ... + nbytes = (num_threads * sizeof(*params)); + platform_assert(memfrag_size(&memfrag_params) >= nbytes); + params->mf_size = memfrag_size(&memfrag_params); + memset(params, 0, nbytes); - memset(phases, 0, _nphases * sizeof(ycsb_phase)); - memset(params, 0, num_threads * sizeof(ycsb_log_params)); + log_size_bytes += nbytes; phases[0].params = params; @@ -735,21 +762,26 @@ load_ycsb_logs(int argc, params[lognum].nthreads = 1; params[lognum].batch_size = batch_size; params[lognum].filename = trace_filename; - parse_ycsb_log_req *req = TYPED_MALLOC(hid, req); - req->filename = trace_filename; - req->lock = mlock_log; - req->num_ops = ¶ms[lognum].total_ops; - req->ycsb_ops = ¶ms[lognum].ycsb_ops; - req->start_line = start_line; - req->end_line = start_line + num_lines / num_threads; - req->max_range_len = &max_range_len; + // Freed in parse_ycsb_log_file() + platform_memfrag memfrag_req; + parse_ycsb_log_req *req = TYPED_MALLOC(hid, req); + req->mf_size = memfrag_size(&memfrag_req); + req->filename = trace_filename; + req->lock = mlock_log; + req->num_ops = ¶ms[lognum].total_ops; + req->ycsb_ops = ¶ms[lognum].ycsb_ops; + req->start_line = start_line; + req->end_line = start_line + num_lines / num_threads; + req->max_range_len = &max_range_len; if (lognum < num_lines % num_threads) { req->end_line++; } uint64 num_lines = req->end_line - req->start_line; log_size_bytes += num_lines * sizeof(ycsb_op); start_line = req->end_line; - ret = platform_thread_create( + + // Each thread frees up memory allocated, above, for req struct. + ret = platform_thread_create( ¶ms[lognum].thread, FALSE, parse_ycsb_log_file, req, hid); platform_assert_status_ok(ret); phases[0].nlogs++; @@ -772,24 +804,11 @@ load_ycsb_logs(int argc, return STATUS_OK; bad_params: - platform_free(hid, phases); - platform_free(hid, params); + platform_free(&memfrag_phases); + platform_free(&memfrag_params); return STATUS_BAD_PARAM; } -void -unload_ycsb_logs(ycsb_phase *phases, uint64 nphases) -{ - int i, j; - - for (i = 0; i < nphases; i++) - for (j = 0; j < phases[i].nlogs; j++) - unload_ycsb_log(phases[i].params[j].ycsb_ops, - phases[i].params[j].total_ops); - platform_free(platform_get_heap_id(), phases[0].params); - platform_free(platform_get_heap_id(), phases); -} - void compute_log_latency_tables(ycsb_log_params *params) { @@ -1186,9 +1205,10 @@ ycsb_test(int argc, char *argv[]) rc = platform_heap_create(platform_get_module_id(), 1 * GiB, FALSE, &hid); platform_assert_status_ok(rc); - data_config *data_cfg; - trunk_config *splinter_cfg = TYPED_MALLOC(hid, splinter_cfg); - uint64 num_bg_threads[NUM_TASK_TYPES] = {0}; // no bg threads + data_config *data_cfg; + platform_memfrag memfrag_splinter_cfg; + trunk_config *splinter_cfg = TYPED_MALLOC(hid, splinter_cfg); + uint64 num_bg_threads[NUM_TASK_TYPES] = {0}; // no bg threads rc = test_parse_args(splinter_cfg, &data_cfg, @@ -1268,6 +1288,7 @@ ycsb_test(int argc, char *argv[]) // platform_assert(sys_rc == 0); // platform_free(hid, resize_hugetlb_command); + platform_memfrag memfrag_io; platform_io_handle *io = TYPED_MALLOC(hid, io); platform_assert(io != NULL); if (!SUCCESS(rc)) { @@ -1285,9 +1306,10 @@ ycsb_test(int argc, char *argv[]) goto deinit_iohandle; } - rc_allocator al; - clockcache *cc = TYPED_MALLOC(hid, cc); - trunk_handle *spl; + rc_allocator al; + platform_memfrag memfrag_cc; + clockcache *cc = TYPED_MALLOC(hid, cc); + trunk_handle *spl; if (use_existing) { rc_allocator_mount( @@ -1331,7 +1353,7 @@ ycsb_test(int argc, char *argv[]) trunk_unmount(&spl); clockcache_deinit(cc); - platform_free(hid, cc); + platform_free(&memfrag_cc); rc_allocator_unmount(&al); test_deinit_task_system(hid, &ts); rc = STATUS_OK; @@ -1346,20 +1368,24 @@ ycsb_test(int argc, char *argv[]) compute_all_report_data(phases, nphases); write_all_reports(phases, nphases); + // RESOLVE: Fix all refs to PROCESS_PRIVATE_HEAP_ID to use hid + // and fix call to platform_free(). Help! This area needs attn. + // What should be memfrag's size to be supplied to platform_free_mem()? for (uint64 i = 0; i < nphases; i++) { for (uint64 j = 0; j < phases[i].nlogs; j++) { - platform_free(hid, phases[i].params[j].ycsb_ops); + platform_free_heap(PROCESS_PRIVATE_HEAP_ID, + phases[i].params[j].ycsb_ops); } - platform_free(hid, phases[i].params); + platform_free_heap(PROCESS_PRIVATE_HEAP_ID, phases[i].params); } - platform_free(hid, phases); + platform_free_mem(hid, phases, phases->mf_size); deinit_iohandle: io_handle_deinit(io); free_iohandle: - platform_free(hid, io); + platform_free(&memfrag_io); cleanup: - platform_free(hid, splinter_cfg); + platform_free(&memfrag_splinter_cfg); platform_heap_destroy(&hid); return SUCCESS(rc) ? 0 : -1; diff --git a/tests/unit/btree_stress_test.c b/tests/unit/btree_stress_test.c index bc2c619f..2ee1bd53 100644 --- a/tests/unit/btree_stress_test.c +++ b/tests/unit/btree_stress_test.c @@ -34,6 +34,7 @@ typedef struct insert_thread_params { uint64 root_addr; int start; int end; + size_t mf_size; // Size of memfrag allocated for scratch array } insert_thread_params; // Function Prototypes @@ -201,15 +202,19 @@ CTEST2(btree_stress, test_random_inserts_concurrent) uint64 root_addr = btree_create( (cache *)&data->cc, &data->dbtree_cfg, &mini, PAGE_TYPE_MEMTABLE); - platform_heap_id hid = data->hid; - insert_thread_params *params = TYPED_ARRAY_ZALLOC(hid, params, nthreads); + platform_heap_id hid = data->hid; + platform_memfrag memfrag_params; + insert_thread_params *params = TYPED_ARRAY_ZALLOC(hid, params, nthreads); + platform_memfrag memfrag_threads; platform_thread *threads = TYPED_ARRAY_ZALLOC(hid, threads, nthreads); + platform_memfrag mf = {0}; for (uint64 i = 0; i < nthreads; i++) { params[i].cc = (cache *)&data->cc; params[i].cfg = &data->dbtree_cfg; params[i].hid = data->hid; - params[i].scratch = TYPED_MALLOC(data->hid, params[i].scratch); + params[i].scratch = TYPED_MALLOC_MF(&mf, data->hid, params[i].scratch); + params[i].mf_size = memfrag_size(&mf); params[i].mini = &mini; params[i].root_addr = root_addr; params[i].start = i * (nkvs / nthreads); @@ -240,6 +245,7 @@ CTEST2(btree_stress, test_random_inserts_concurrent) root_addr, nkvs); ASSERT_NOT_EQUAL(0, rc, "Invalid tree\n"); + CTEST_LOG_INFO("BTree stress query_tests() succeeded.\n"); if (!iterator_tests((cache *)&data->cc, &data->dbtree_cfg, @@ -250,6 +256,9 @@ CTEST2(btree_stress, test_random_inserts_concurrent) { CTEST_ERR("invalid ranges in original tree, starting at front\n"); } + CTEST_LOG_INFO("BTree stress Forward scan iterator_tests() " + "succeeded.\n"); + if (!iterator_tests((cache *)&data->cc, &data->dbtree_cfg, root_addr, @@ -259,18 +268,22 @@ CTEST2(btree_stress, test_random_inserts_concurrent) { CTEST_ERR("invalid ranges in original tree, starting at back\n"); } + CTEST_LOG_INFO("BTree stress Backward scan iterator_tests() " + "succeeded.\n"); if (!iterator_seek_tests( (cache *)&data->cc, &data->dbtree_cfg, root_addr, nkvs, data->hid)) { CTEST_ERR("invalid ranges when seeking in original tree\n"); } + CTEST_LOG_INFO("BTree stress iterator_seek_tests() succeeded.\n"); uint64 packed_root_addr = pack_tests( (cache *)&data->cc, &data->dbtree_cfg, data->hid, root_addr, nkvs); if (0 < nkvs && !packed_root_addr) { ASSERT_TRUE(FALSE, "Pack failed.\n"); } + CTEST_LOG_INFO("BTree stress pack_tests() succeeded.\n"); rc = query_tests((cache *)&data->cc, &data->dbtree_cfg, @@ -279,6 +292,7 @@ CTEST2(btree_stress, test_random_inserts_concurrent) packed_root_addr, nkvs); ASSERT_NOT_EQUAL(0, rc, "Invalid tree\n"); + CTEST_LOG_INFO("BTree stress query_tests() after pack succeeded.\n"); rc = iterator_tests((cache *)&data->cc, &data->dbtree_cfg, @@ -287,6 +301,8 @@ CTEST2(btree_stress, test_random_inserts_concurrent) TRUE, data->hid); ASSERT_NOT_EQUAL(0, rc, "Invalid ranges in packed tree\n"); + CTEST_LOG_INFO("BTree stress Forward scan iterator_tests() after " + "pack succeeded.\n"); // Exercise print method to verify that it basically continues to work. set_log_streams_for_tests(MSG_LEVEL_DEBUG); @@ -301,10 +317,10 @@ CTEST2(btree_stress, test_random_inserts_concurrent) // Release memory allocated in this test case for (uint64 i = 0; i < nthreads; i++) { - platform_free(data->hid, params[i].scratch); + platform_free_mem(data->hid, params[i].scratch, params[i].mf_size); } - platform_free(hid, params); - platform_free(hid, threads); + platform_free(&memfrag_params); + platform_free(&memfrag_threads); } /* @@ -342,8 +358,14 @@ insert_tests(cache *cc, uint64 bt_page_size = btree_page_size(cfg); int keybuf_size = bt_page_size; int msgbuf_size = bt_page_size; - uint8 *keybuf = TYPED_MANUAL_MALLOC(hid, keybuf, keybuf_size); - uint8 *msgbuf = TYPED_MANUAL_MALLOC(hid, msgbuf, msgbuf_size); + + platform_memfrag memfrag_keybuf; + uint8 *keybuf = + TYPED_MANUAL_MALLOC(&memfrag_keybuf, hid, keybuf, keybuf_size); + + platform_memfrag memfrag_msgbuf; + uint8 *msgbuf = + TYPED_MANUAL_MALLOC(&memfrag_msgbuf, hid, msgbuf, msgbuf_size); for (uint64 i = start; i < end; i++) { if (!SUCCESS(btree_insert(cc, @@ -360,8 +382,8 @@ insert_tests(cache *cc, ASSERT_TRUE(FALSE, "Failed to insert 4-byte %ld\n", i); } } - platform_free(hid, keybuf); - platform_free(hid, msgbuf); + platform_free(&memfrag_keybuf); + platform_free(&memfrag_msgbuf); } static key @@ -409,10 +431,15 @@ query_tests(cache *cc, uint64 root_addr, int nkvs) { - uint64 bt_page_size = btree_page_size(cfg); - uint8 *keybuf = TYPED_MANUAL_MALLOC(hid, keybuf, bt_page_size); - uint8 *msgbuf = TYPED_MANUAL_MALLOC(hid, msgbuf, bt_page_size); - memset(msgbuf, 0, bt_page_size); + uint64 bt_page_size = btree_page_size(cfg); + platform_memfrag memfrag_keybuf; + uint8 *keybuf = + TYPED_MANUAL_MALLOC(&memfrag_keybuf, hid, keybuf, bt_page_size); + + platform_memfrag memfrag_msgbuf; + uint8 *msgbuf = + TYPED_MANUAL_MALLOC(&memfrag_msgbuf, hid, msgbuf, bt_page_size); + memset(msgbuf, 0, btree_page_size(cfg)); merge_accumulator result; merge_accumulator_init(&result, hid); @@ -433,8 +460,8 @@ query_tests(cache *cc, } merge_accumulator_deinit(&result); - platform_free(hid, keybuf); - platform_free(hid, msgbuf); + platform_free(&memfrag_keybuf); + platform_free(&memfrag_msgbuf); return 1; } @@ -447,10 +474,19 @@ iterator_test(platform_heap_id hid, { uint64 seen = 0; uint64 bt_page_size = btree_page_size(cfg); - uint8 *prevbuf = TYPED_MANUAL_MALLOC(hid, prevbuf, bt_page_size); key prev = NULL_KEY; - uint8 *keybuf = TYPED_MANUAL_MALLOC(hid, keybuf, bt_page_size); - uint8 *msgbuf = TYPED_MANUAL_MALLOC(hid, msgbuf, bt_page_size); + + platform_memfrag memfrag_prevbuf; + uint8 *prevbuf = + TYPED_MANUAL_MALLOC(&memfrag_prevbuf, hid, prevbuf, bt_page_size); + + platform_memfrag memfrag_keybuf; + uint8 *keybuf = + TYPED_MANUAL_MALLOC(&memfrag_keybuf, hid, keybuf, bt_page_size); + + platform_memfrag memfrag_msgbuf; + uint8 *msgbuf = + TYPED_MANUAL_MALLOC(&memfrag_msgbuf, hid, msgbuf, bt_page_size); while (iterator_can_curr(iter)) { key curr_key; @@ -491,9 +527,9 @@ iterator_test(platform_heap_id hid, } } - platform_free(hid, prevbuf); - platform_free(hid, keybuf); - platform_free(hid, msgbuf); + platform_free(&memfrag_prevbuf); + platform_free(&memfrag_keybuf); + platform_free(&memfrag_msgbuf); return seen; } @@ -556,8 +592,11 @@ iterator_seek_tests(cache *cc, { btree_iterator dbiter; - int keybuf_size = btree_page_size(cfg); - uint8 *keybuf = TYPED_MANUAL_MALLOC(hid, keybuf, keybuf_size); + int keybuf_size = btree_page_size(cfg); + + platform_memfrag memfrag_keybuf; + uint8 *keybuf = + TYPED_MANUAL_MALLOC(&memfrag_keybuf, hid, keybuf, keybuf_size); // start in the "middle" of the range key start_key = gen_key(cfg, nkvs / 2, keybuf, keybuf_size); @@ -591,6 +630,7 @@ iterator_seek_tests(cache *cc, btree_iterator_deinit(&dbiter); + platform_free(&memfrag_keybuf); return 1; } diff --git a/tests/unit/btree_test.c b/tests/unit/btree_test.c index 0c89834b..4eab349b 100644 --- a/tests/unit/btree_test.c +++ b/tests/unit/btree_test.c @@ -111,7 +111,8 @@ CTEST_SETUP(btree) // Optional teardown function for suite, called after every test in suite CTEST_TEARDOWN(btree) { - platform_heap_destroy(&data->hid); + platform_status rc = platform_heap_destroy(&data->hid); + ASSERT_TRUE(SUCCESS(rc)); } /* @@ -174,8 +175,10 @@ CTEST2(btree, test_leaf_split) static int leaf_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) { - char *leaf_buffer = - TYPED_MANUAL_MALLOC(hid, leaf_buffer, btree_page_size(cfg)); + size_t page_size = btree_page_size(cfg); + platform_memfrag memfrag_leaf_buffer; + char *leaf_buffer = TYPED_ARRAY_MALLOC(hid, leaf_buffer, page_size); + btree_hdr *hdr = (btree_hdr *)leaf_buffer; /* * The following number is empirically determined to be the most @@ -254,17 +257,18 @@ leaf_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte message %d\n", i); } - platform_free(hid, leaf_buffer); + platform_free(&memfrag_leaf_buffer); return 0; } static int leaf_hdr_search_tests(btree_config *cfg, platform_heap_id hid) { - char *leaf_buffer = - TYPED_MANUAL_MALLOC(hid, leaf_buffer, btree_page_size(cfg)); - btree_hdr *hdr = (btree_hdr *)leaf_buffer; - int nkvs = 256; + size_t page_size = btree_page_size(cfg); + platform_memfrag memfrag_leaf_buffer; + char *leaf_buffer = TYPED_ARRAY_MALLOC(hid, leaf_buffer, page_size); + btree_hdr *hdr = (btree_hdr *)leaf_buffer; + int nkvs = 256; btree_init_hdr(cfg, hdr); @@ -295,17 +299,18 @@ leaf_hdr_search_tests(btree_config *cfg, platform_heap_id hid) ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); } - platform_free(hid, leaf_buffer); + platform_free(&memfrag_leaf_buffer); return 0; } static int index_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) { - char *index_buffer = - TYPED_MANUAL_MALLOC(hid, index_buffer, btree_page_size(cfg)); - btree_hdr *hdr = (btree_hdr *)index_buffer; - int nkvs = 100; + size_t page_size = btree_page_size(cfg); + platform_memfrag memfrag_index_buffer; + char *index_buffer = TYPED_ARRAY_MALLOC(hid, index_buffer, page_size); + btree_hdr *hdr = (btree_hdr *)index_buffer; + int nkvs = 100; bool32 rv = FALSE; int cmp_rv = 0; @@ -358,15 +363,17 @@ index_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) ASSERT_EQUAL(childaddr, i, "Bad childaddr %d\n", i); } - platform_free(hid, index_buffer); + platform_free(&memfrag_index_buffer); return 0; } static int index_hdr_search_tests(btree_config *cfg, platform_heap_id hid) { - char *leaf_buffer = - TYPED_MANUAL_MALLOC(hid, leaf_buffer, btree_page_size(cfg)); + size_t page_size = btree_page_size(cfg); + platform_memfrag memfrag_leaf_buffer; + char *leaf_buffer = TYPED_ARRAY_MALLOC(hid, leaf_buffer, page_size); + btree_hdr *hdr = (btree_hdr *)leaf_buffer; int nkvs = 256; btree_pivot_stats stats; @@ -394,7 +401,7 @@ index_hdr_search_tests(btree_config *cfg, platform_heap_id hid) (i / 2), idx, "Bad pivot search result idx=%ld for i=%d\n", idx, i); } - platform_free(hid, leaf_buffer); + platform_free(&memfrag_leaf_buffer); return 0; } @@ -404,9 +411,12 @@ leaf_split_tests(btree_config *cfg, int nkvs, platform_heap_id hid) { - uint64 bt_page_size = btree_page_size(cfg); - char *leaf_buffer = TYPED_MANUAL_MALLOC(hid, leaf_buffer, bt_page_size); - char *msg_buffer = TYPED_MANUAL_MALLOC(hid, msg_buffer, bt_page_size); + size_t bt_page_size = btree_page_size(cfg); + platform_memfrag memfrag_leaf_buffer; + char *leaf_buffer = TYPED_ARRAY_MALLOC(hid, leaf_buffer, bt_page_size); + + platform_memfrag memfrag_msg_buffer; + char *msg_buffer = TYPED_ARRAY_MALLOC(hid, msg_buffer, bt_page_size); memset(msg_buffer, 0, bt_page_size); @@ -467,7 +477,7 @@ leaf_split_tests(btree_config *cfg, destroy_leaf_incorporate_spec(&spec); } - platform_free(hid, leaf_buffer); - platform_free(hid, msg_buffer); + platform_free(&memfrag_leaf_buffer); + platform_free(&memfrag_msg_buffer); return 0; } diff --git a/tests/unit/config_parse_test.c b/tests/unit/config_parse_test.c index 7a102923..3820d3c3 100644 --- a/tests/unit/config_parse_test.c +++ b/tests/unit/config_parse_test.c @@ -75,8 +75,10 @@ CTEST2(config_parse, test_basic_parsing) int num_tables = 1; // Allocate memory for global config structures + platform_memfrag memfrag_splinter_cfg; splinter_cfg = TYPED_ARRAY_MALLOC(data->hid, splinter_cfg, num_tables); + platform_memfrag memfrag_cache_cfg; cache_cfg = TYPED_ARRAY_MALLOC(data->hid, cache_cfg, num_tables); platform_status rc; @@ -118,6 +120,6 @@ CTEST2(config_parse, test_basic_parsing) "Parameter '%s' expected. ", "--verbose-progress"); - platform_free(data->hid, cache_cfg); - platform_free(data->hid, splinter_cfg); + platform_free(&memfrag_cache_cfg); + platform_free(&memfrag_splinter_cfg); } diff --git a/tests/unit/large_inserts_stress_test.c b/tests/unit/large_inserts_stress_test.c index edafe4ba..3ef85b45 100644 --- a/tests/unit/large_inserts_stress_test.c +++ b/tests/unit/large_inserts_stress_test.c @@ -5,9 +5,42 @@ * ----------------------------------------------------------------------------- * large_inserts_stress_test.c -- * - * This test exercises simple very large #s of inserts which have found to - * trigger some bugs in some code paths. This is just a miscellaneous collection - * of test cases for different issues reported. + * This test exercises very large #s of inserts with different combinations + * of key-data and value-data layout to exercise core insertion logic of + * SplinterDB. During the development of shared-memory support, some of these + * cases were found to trigger bugs in either core SplinterDB or the new + * shared-memory support. + * + * Additionally, there is a small collection of miscellaneous test cases for + * different issues encountered during different stabilization rounds. + * + * Single-client test cases: + * Different strategies of loading key-data and value-data are defined as + * tokens in key_strategy and val_strategy enums. These tests exercise + * different pairs of these strategies for a single-client. The driving + * function is exec_worker_thread() which receives the test-case parameters + * via worker_config{} structure. + * + * Multiple-threads test cases: + * Similar to the single-client test cases, except that we now run through + * all combinations of key-data and value-data strategies across multiple + * threads. As this is intended to be a stress test, we do not distribute + * the --num-inserts across --num-threads threads. Each thread will insert + * the specified # of inserts, so we generate a high insert workload. + * + * Few variations of tests that start from the same start key-ID + * across all threads are added to exercise the logic of maintaining the + * BTrees across tons of duplicate key insertions. + * + * Test-case with forked process: test_Seq_key_be32_Seq_values_inserts_forked() + * Identical to test_Seq_key_be32_Seq_values_inserts() but the test is run in + * forked child process. Only one such scenario is exercised for forked + * processes. + * + * Regression fix test cases: + * test_issue_458_mini_destroy_unused_debug_assert + * test_fp_num_tuples_out_of_bounds_bug_trunk_build_filters + * * ----------------------------------------------------------------------------- */ #include @@ -21,61 +54,155 @@ #include "config.h" #include "unit_tests.h" #include "ctest.h" // This is required for all test-case files. +#include "splinterdb_tests_private.h" +#include "functional/random.h" // Nothing particularly significant about these constants. #define TEST_KEY_SIZE 30 -#define TEST_VALUE_SIZE 256 +#define TEST_VALUE_SIZE 32 /* - * Configuration for each worker thread. See the selection of 'fd'-semantics - * as implemented in exec_worker_thread(), to select diff types of key/value's - * data distribution during inserts. + * ---------------------------------------------------------------------------- + * Key-data test strategies: + * + * SEQ_KEY_BIG_ENDIAN_32 - Sequential int32 key-data in big-endian format. + * + * SEQ_KEY_HOST_ENDIAN_32 - Sequential int32 key-data in host-endian format. + * + * SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH - Sequential int32 key-data in + * host-endian format, packed-out with 'K' to the length of the key-data + * buffer. The sorted-ness exercises different tree management algorithms, + * while the padding bytes increases the key-size to trigger different tree + * management operations. + * + * RAND_KEY_RAND_LENGTH - Randomly generated random number of bytes of length + * within [1, key-data-buffer-size]. This is the most general use-case to + * exercise random key payloads of varying lengths. + * + * RAND_KEY_DATA_BUF_SIZE - Randomly generated key of length == key-data-buffer + * size. + * ---------------------------------------------------------------------------- */ -typedef struct { - splinterdb *kvsb; - master_config *master_cfg; - uint64 start_value; - uint64 num_inserts; - uint64 num_insert_threads; - int random_key_fd; // Options to choose the type of key inserted - int random_val_fd; // Options to choose the type of value inserted - bool is_thread; // Is main() or thread executing worker fn -} worker_config; +// clang-format off +typedef enum { // Test-case + SEQ_KEY_BIG_ENDIAN_32 = 1, // 1 + SEQ_KEY_HOST_ENDIAN_32, // 2 + SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH, // 3 + RAND_KEY_RAND_LENGTH, // 4 + RAND_KEY_DATA_BUF_SIZE, // 5 + SEQ_KEY_HOST_ENDIAN_32_SAME_START_ID, // 6 + SEQ_KEY_BIG_ENDIAN_32_SAME_START_ID, // 7 + NUM_KEY_DATA_STRATEGIES +} key_strategy; + +// Key-data strategy names, indexed by key_strategy enum values. +const char *Key_strategy_names[] = { + "Undefined key-data strategy" + , "Sequential key, 32-bit big-endian" + , "Sequential key, 32-bit host-endian" + , "Sequential key, fully-packed to key-data buffer, 32-bit host-endian" + , "Random key-data, random length" + , "Random key-data, fully-packed to key-data buffer" + , "Sequential key, 32-bit host-endian, same start-ID across all threads" + , "Sequential key, 32-bit big-endian, same start-ID across all threads" +}; -// Function Prototypes -static void * -exec_worker_thread(void *w); +// clang-format on -static void -do_inserts_n_threads(splinterdb *kvsb, - master_config *master_cfg, - platform_heap_id hid, - int random_key_fd, - int random_val_fd, - uint64 num_inserts, - uint64 num_insert_threads); +// Ensure that the strategy name-lookup array is adequately sized. +_Static_assert(ARRAY_SIZE(Key_strategy_names) == NUM_KEY_DATA_STRATEGIES, + "Lookup array Key_strategy_names[] is incorrectly sized for " + "NUM_KEY_DATA_STRATEGIES"); -// Run n-threads concurrently inserting many KV-pairs -#define NUM_THREADS 8 +#define Key_strategy_name(id) \ + ((((id) > 0) && ((id) < NUM_KEY_DATA_STRATEGIES)) \ + ? Key_strategy_names[(id)] \ + : Key_strategy_names[0]) /* - * Some test-cases can drive multiple threads to use either the same start - * value for all threads. Or, each thread will use its own start value so - * that all threads are inserting in non-intersecting bands of keys. - * These mnemonics control these behaviours. + * ---------------------------------------------------------------------------- + * Value-data test strategies: + * + * SEQ_VAL_SMALL - Generate sprintf("Row-%d")'ed small value, whose length will + * be few bytes + * + * SEQ_VAL_PADDED_LENGTH - Similarly sprintf()'ed value but padded-out to the + * length of the value-data buffer. This exercises large-values so we can + * fill-up pages more easily. + * + * RAND_VAL_RAND_LENGTH - Randomly generated random number of bytes of length + * within [1, value-data-buffer-size]. This is the most general use-case to + * exercise random message payloads of varying lengths. + * + * RAND_6BYTE_VAL - Randomly generated value 6-bytes length. (6 bytes is the + * length of the payload when integrating SplinterDB with Postgres.) + * ---------------------------------------------------------------------------- */ -#define TEST_INSERTS_SEQ_KEY_DIFF_START_KEYID_FD ((int)0) -#define TEST_INSERTS_SEQ_KEY_SAME_START_KEYID_FD ((int)-1) +// clang-format off +typedef enum { // Sub-case + SEQ_VAL_SMALL = 1, // (a) 'Row-%d' + SEQ_VAL_PADDED_LENGTH, // (b) 'Row-%d' padded to value data buffer size + RAND_VAL_RAND_LENGTH, // (c) Random value-bytes of random length + RAND_6BYTE_VAL, // (d) Random value-bytes, exactly 6-bytes long + NUM_VALUE_DATA_STRATEGIES +} val_strategy; + +// Value-data strategy names, indexed by val_strategy enum values. +const char *Val_strategy_names[] = { + "Undefined value-data strategy" + , "Small length sequential value" + , "Sequential value, fully-packed to value-data buffer" + , "Random value, of random-length" + , "Random value, 6-bytes length" +}; + +// clang-format on -/* Drive inserts to generate sequential short-length values */ -#define TEST_INSERT_SEQ_VALUES_FD ((int)0) +// Ensure that the strategy name-lookup array is adequately sized. +_Static_assert(ARRAY_SIZE(Val_strategy_names) == NUM_VALUE_DATA_STRATEGIES, + "Lookup array Key_strategy_names[] is incorrectly sized for " + "NUM_VALUE_DATA_STRATEGIES"); + +#define Val_strategy_name(id) \ + ((((id) > 0) && ((id) < NUM_VALUE_DATA_STRATEGIES)) \ + ? Val_strategy_names[(id)] \ + : Val_strategy_names[0]) /* - * Some test-cases drive inserts to choose a fully-packed value of size - * TEST_VALUE_SIZE bytes. This variation has been seen to trigger some - * assertions. + * ---------------------------------------------------------------------------- + * Configuration for each worker thread. See the handling of 'key_size' + * and 'val_size'-semantics as implemented in exec_worker_thread(). This + * selects diff types of key/value's data distribution during inserts. + * ---------------------------------------------------------------------------- */ -#define TEST_INSERT_FULLY_PACKED_CONSTANT_VALUE_FD (int)-1 +typedef struct worker_config { + platform_heap_id hid; + splinterdb *kvsb; + uint64 start_value; + uint64 num_inserts; + size_t key_size; // --key-size test execution argument + size_t val_size; // --data-size test execution argument + uint64 rand_seed; + key_strategy key_type; + val_strategy val_type; + bool is_thread; + bool fork_child; + bool verbose_progress; + bool show_strategy; +} worker_config; + +// Function Prototypes +static void * +exec_worker_thread(void *w); + +static void +do_inserts_n_threads(worker_config *data_work_cfg, + key_strategy key_type, + val_strategy val_type, + uint64 num_insert_threads); + +// Run n-threads concurrently inserting many KV-pairs +#define NUM_THREADS 8 /* * Global data declaration macro: @@ -88,69 +215,90 @@ CTEST_DATA(large_inserts_stress) splinterdb *kvsb; splinterdb_config cfg; data_config default_data_config; - master_config master_cfg; uint64 num_inserts; // per main() process or per thread uint64 num_insert_threads; + size_t key_size; // --key-size test execution argument + size_t val_size; // --data-size test execution argument int this_pid; + bool verbose_progress; bool am_parent; + worker_config work_cfg; }; // Optional setup function for suite, called before every test in suite CTEST_SETUP(large_inserts_stress) { + master_config master_cfg = {0}; // First, register that main() is being run as a parent process data->am_parent = TRUE; data->this_pid = platform_getpid(); platform_status rc; - uint64 heap_capacity = (64 * MiB); // small heap is sufficient. - - config_set_defaults(&data->master_cfg); + config_set_defaults(&master_cfg); // Expected args to parse --num-inserts, --use-shmem, --verbose-progress. - rc = config_parse(&data->master_cfg, 1, Ctest_argc, (char **)Ctest_argv); + rc = config_parse(&master_cfg, 1, Ctest_argc, (char **)Ctest_argv); ASSERT_TRUE(SUCCESS(rc)); - // Create a heap for allocating on-stack buffers for various arrays. - rc = platform_heap_create(platform_get_module_id(), - heap_capacity, - data->master_cfg.use_shmem, - &data->hid); - platform_assert_status_ok(rc); - - data->cfg = (splinterdb_config){.filename = TEST_DB_NAME, - .cache_size = 1 * Giga, - .disk_size = 40 * Giga, - .use_shmem = data->master_cfg.use_shmem, - .shmem_size = (4 * GiB), - .data_cfg = &data->default_data_config}; + data->cfg = + (splinterdb_config){.filename = "splinterdb_large_inserts_stress_test_db", + .cache_size = 4 * GiB, + .disk_size = 40 * GiB, + .use_shmem = master_cfg.use_shmem, + .shmem_size = (1 * GiB), + .data_cfg = &data->default_data_config}; data->num_inserts = - (data->master_cfg.num_inserts ? data->master_cfg.num_inserts - : (1 * MILLION)); - data->num_insert_threads = NUM_THREADS; + (master_cfg.num_inserts ? master_cfg.num_inserts : (2 * MILLION)); + + // If num_threads is unspecified, use default for this test. + data->num_insert_threads = + (master_cfg.num_threads ? master_cfg.num_threads : NUM_THREADS); if ((data->num_inserts % MILLION) != 0) { - platform_error_log("Test expects --num-inserts parameter to be an" - " integral multiple of a million.\n"); - ASSERT_EQUAL(0, (data->num_inserts % MILLION)); - return; + size_t num_million = (data->num_inserts / MILLION); + data->num_inserts = (num_million * MILLION); + CTEST_LOG_INFO("Test expects --num-inserts parameter to be an" + " integral multiple of a million." + " Reset --num-inserts to %lu million.\n", + num_million); } // Run with higher configured shared memory, if specified - if (data->master_cfg.shmem_size > data->cfg.shmem_size) { - data->cfg.shmem_size = data->master_cfg.shmem_size; + if (master_cfg.shmem_size > data->cfg.shmem_size) { + data->cfg.shmem_size = master_cfg.shmem_size; } // Setup Splinter's background thread config, if specified - data->cfg.num_memtable_bg_threads = data->master_cfg.num_memtable_bg_threads; - data->cfg.num_normal_bg_threads = data->master_cfg.num_normal_bg_threads; + data->cfg.num_memtable_bg_threads = master_cfg.num_memtable_bg_threads; + data->cfg.num_normal_bg_threads = master_cfg.num_normal_bg_threads; + data->cfg.use_stats = master_cfg.use_stats; + data->key_size = + (master_cfg.max_key_size ? master_cfg.max_key_size : TEST_KEY_SIZE); + data->val_size = + (master_cfg.message_size ? master_cfg.message_size : TEST_VALUE_SIZE); + default_data_config_init(data->key_size, data->cfg.data_cfg); - size_t max_key_size = TEST_KEY_SIZE; - default_data_config_init(max_key_size, data->cfg.data_cfg); + data->verbose_progress = master_cfg.verbose_progress; + + // platform_enable_tracing_large_frags(); int rv = splinterdb_create(&data->cfg, &data->kvsb); ASSERT_EQUAL(0, rv); + + CTEST_LOG_INFO("... with key-size=%lu, value-size=%lu bytes\n", + data->key_size, + data->val_size); + + // Setup default configuration for each worker-thread ... + ZERO_STRUCT(data->work_cfg); + data->work_cfg.hid = data->hid; + data->work_cfg.kvsb = data->kvsb; + data->work_cfg.num_inserts = data->num_inserts; + data->work_cfg.key_size = data->key_size; + data->work_cfg.val_size = data->val_size; + data->work_cfg.rand_seed = master_cfg.seed; + data->work_cfg.verbose_progress = master_cfg.verbose_progress; } // Optional teardown function for suite, called after every test in suite @@ -158,18 +306,34 @@ CTEST_TEARDOWN(large_inserts_stress) { // Only parent process should tear down Splinter. if (data->am_parent) { - splinterdb_close(&data->kvsb); - platform_heap_destroy(&data->hid); + int rv = splinterdb_close(&data->kvsb); + ASSERT_EQUAL(0, rv); + + platform_disable_tracing_large_frags(); + platform_status rc = platform_heap_destroy(&data->hid); + ASSERT_TRUE(SUCCESS(rc)); } } /* * Test case that inserts large # of KV-pairs, and goes into a code path - * reported by issue# 458, tripping a debug assert. + * reported by issue# 458, tripping a debug assert. This test case also + * triggered the failure(s) reported by issue # 545. + */ +// clang-format off +/* + * FIXME: In diff CI release-build runs, we see messages like: + * btree_pack(): req->num_tuples=6291456 exceeded output size limit, req->max_tuples=6291456 + * And in some debug-build runs, the test-runs fails with "timeout exceeded" + * The timeout issues _seems_like_ occurs mostly in debug-build runs. */ -CTEST2_SKIP(large_inserts_stress, - test_issue_458_mini_destroy_unused_debug_assert) +#if SPLINTER_DEBUG +CTEST2_SKIP(large_inserts_stress, test_issue_458_mini_destroy_unused_debug_assert) +#else // SPLINTER_DEBUG +CTEST2(large_inserts_stress, test_issue_458_mini_destroy_unused_debug_assert) +#endif // SPLINTER_DEBUG { + // clang-format on char key_data[TEST_KEY_SIZE]; char val_data[TEST_VALUE_SIZE]; @@ -194,19 +358,26 @@ CTEST2_SKIP(large_inserts_stress, uint64 elapsed_ns = platform_timestamp_elapsed(start_time); uint64 test_elapsed_ns = platform_timestamp_elapsed(test_start_time); - platform_default_log( - PLATFORM_CR + uint64 elapsed_s = NSEC_TO_SEC(elapsed_ns); + uint64 test_elapsed_s = NSEC_TO_SEC(test_elapsed_ns); + + elapsed_s = ((elapsed_s > 0) ? elapsed_s : 1); + test_elapsed_s = ((test_elapsed_s > 0) ? test_elapsed_s : 1); + + CTEST_LOG_INFO( + "\n" // PLATFORM_CR "Inserted %lu million KV-pairs" ", this batch: %lu s, %lu rows/s, cumulative: %lu s, %lu rows/s ...", (ictr + 1), - NSEC_TO_SEC(elapsed_ns), - (jctr / NSEC_TO_SEC(elapsed_ns)), - NSEC_TO_SEC(test_elapsed_ns), - (((ictr + 1) * jctr) / NSEC_TO_SEC(test_elapsed_ns))); + elapsed_s, + (jctr / elapsed_s), + test_elapsed_s, + (((ictr + 1) * jctr) / test_elapsed_s)); } } /* + * ---------------------------------------------------------------------------- * Test cases exercise the thread's worker-function, exec_worker_thread(), * from the main connection to splinter, for specified number of inserts. * @@ -215,174 +386,446 @@ CTEST2_SKIP(large_inserts_stress, * - random keys, sequential values * - sequential keys, random values * - random keys, random values + * ---------------------------------------------------------------------------- */ -CTEST2(large_inserts_stress, test_seq_key_seq_values_inserts) +// Case 1(a) - SEQ_KEY_BIG_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_be32_Seq_values_inserts) { - worker_config wcfg; - ZERO_STRUCT(wcfg); + worker_config *wcfg = &data->work_cfg; // Load worker config params - wcfg.kvsb = data->kvsb; - wcfg.master_cfg = &data->master_cfg; - wcfg.num_inserts = data->num_inserts; + wcfg->key_type = SEQ_KEY_BIG_ENDIAN_32; + wcfg->val_type = SEQ_VAL_SMALL; - exec_worker_thread(&wcfg); + exec_worker_thread(wcfg); } -CTEST2(large_inserts_stress, test_random_key_seq_values_inserts) +// Case 1(b) - SEQ_KEY_BIG_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_be32_Seq_values_packed_inserts) { - worker_config wcfg; - ZERO_STRUCT(wcfg); + worker_config *wcfg = &data->work_cfg; // Load worker config params - wcfg.kvsb = data->kvsb; - wcfg.master_cfg = &data->master_cfg; - wcfg.num_inserts = data->num_inserts; - wcfg.random_key_fd = open("/dev/urandom", O_RDONLY); - - exec_worker_thread(&wcfg); - - close(wcfg.random_key_fd); + wcfg->key_type = SEQ_KEY_BIG_ENDIAN_32; + wcfg->val_type = SEQ_VAL_PADDED_LENGTH; + exec_worker_thread(wcfg); } -CTEST2(large_inserts_stress, test_seq_key_random_values_inserts) +// Case 1(c) - SEQ_KEY_BIG_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_be32_Rand_length_values_inserts) { - worker_config wcfg; - ZERO_STRUCT(wcfg); + worker_config *wcfg = &data->work_cfg; // Load worker config params - wcfg.kvsb = data->kvsb; - wcfg.master_cfg = &data->master_cfg; - wcfg.num_inserts = data->num_inserts; - wcfg.random_val_fd = open("/dev/urandom", O_RDONLY); + wcfg->key_type = SEQ_KEY_BIG_ENDIAN_32; + wcfg->val_type = RAND_VAL_RAND_LENGTH; + exec_worker_thread(wcfg); +} - exec_worker_thread(&wcfg); +/* + * Fails, sometimes, due to assertion failure as reported in issue #560. + */ +// Case 1(d) - SEQ_KEY_BIG_ENDIAN_32 +// clang-format off +CTEST2(large_inserts_stress, test_Seq_key_be32_Rand_6byte_values_inserts) +// clang-format on +{ + worker_config *wcfg = &data->work_cfg; - close(wcfg.random_val_fd); + // Load worker config params + wcfg->key_type = SEQ_KEY_BIG_ENDIAN_32; + wcfg->val_type = RAND_6BYTE_VAL; + exec_worker_thread(wcfg); } -CTEST2(large_inserts_stress, test_random_key_random_values_inserts) +// Case 2(a) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_he32_Seq_values_inserts) { - worker_config wcfg; - ZERO_STRUCT(wcfg); + worker_config *wcfg = &data->work_cfg; // Load worker config params - wcfg.kvsb = data->kvsb; - wcfg.master_cfg = &data->master_cfg; - wcfg.num_inserts = data->num_inserts; - wcfg.random_key_fd = open("/dev/urandom", O_RDONLY); - wcfg.random_val_fd = open("/dev/urandom", O_RDONLY); + wcfg->key_type = SEQ_KEY_HOST_ENDIAN_32; + wcfg->val_type = SEQ_VAL_SMALL; + exec_worker_thread(wcfg); +} - exec_worker_thread(&wcfg); +// Case 2(b) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_he32_Seq_values_packed_inserts) +{ + worker_config *wcfg = &data->work_cfg; - close(wcfg.random_key_fd); - close(wcfg.random_val_fd); + // Load worker config params + wcfg->key_type = SEQ_KEY_HOST_ENDIAN_32; + wcfg->val_type = SEQ_VAL_PADDED_LENGTH; + exec_worker_thread(wcfg); } -static void -safe_wait() +// Case 2(c) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_he32_Rand_length_values_inserts) { - int wstatus; - int wr = wait(&wstatus); - platform_assert(wr != -1, "wait failure: %s", strerror(errno)); - platform_assert(WIFEXITED(wstatus), - "Child terminated abnormally: SIGNAL=%d", - WIFSIGNALED(wstatus) ? WTERMSIG(wstatus) : 0); - platform_assert(WEXITSTATUS(wstatus) == 0); + worker_config *wcfg = &data->work_cfg; + + // Load worker config params + wcfg->key_type = SEQ_KEY_HOST_ENDIAN_32; + wcfg->val_type = RAND_VAL_RAND_LENGTH; + exec_worker_thread(wcfg); } /* - * ---------------------------------------------------------------------------- - * test_seq_key_seq_values_inserts_forked() -- - * - * Test case is identical to test_seq_key_seq_values_inserts() but the - * actual execution of the function that does inserts is done from - * a forked-child process. This test, therefore, does basic validation - * that from a forked-child process we can drive basic SplinterDB commands. - * And then the parent can resume after the child exits, and can cleanly - * shutdown the instance. - * ---------------------------------------------------------------------------- + * Fails, sometimes, due to assertion failure as reported in issue #560. */ -CTEST2(large_inserts_stress, test_seq_key_seq_values_inserts_forked) +// Case 2(d) - SEQ_KEY_HOST_ENDIAN_32 +// clang-format off +CTEST2(large_inserts_stress, test_Seq_key_he32_Rand_6byte_values_inserts) +// clang-format on { - worker_config wcfg; - ZERO_STRUCT(wcfg); + worker_config *wcfg = &data->work_cfg; // Load worker config params - wcfg.kvsb = data->kvsb; - wcfg.master_cfg = &data->master_cfg; - wcfg.num_inserts = data->num_inserts; + wcfg->key_type = SEQ_KEY_HOST_ENDIAN_32; + wcfg->val_type = RAND_6BYTE_VAL; + exec_worker_thread(wcfg); +} - int pid = platform_getpid(); +// Case 3(a) - SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH +CTEST2(large_inserts_stress, test_Seq_key_packed_he32_Seq_values_inserts) +{ + worker_config *wcfg = &data->work_cfg; - if (wcfg.master_cfg->fork_child) { - pid = fork(); + // Load worker config params + wcfg->key_type = SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH; + wcfg->val_type = SEQ_VAL_SMALL; + exec_worker_thread(wcfg); +} - if (pid < 0) { - platform_error_log("fork() of child process failed: pid=%d\n", pid); - return; - } else if (pid) { - platform_default_log("OS-pid=%d, Thread-ID=%lu: " - "Waiting for child pid=%d to complete ...\n", - platform_getpid(), - platform_get_tid(), - pid); +// Case 3(b) - SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH +CTEST2(large_inserts_stress, test_Seq_key_packed_he32_Seq_values_packed_inserts) +{ + worker_config *wcfg = &data->work_cfg; - safe_wait(); + // Load worker config params + wcfg->key_type = SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH; + wcfg->val_type = SEQ_VAL_PADDED_LENGTH; + exec_worker_thread(wcfg); +} - platform_default_log("Thread-ID=%lu, OS-pid=%d: " - "Child execution wait() completed." - " Resuming parent ...\n", - platform_get_tid(), - platform_getpid()); - } - } - if (pid == 0) { - // Record in global data that we are now running as a child. - data->am_parent = FALSE; - data->this_pid = platform_getpid(); +// Case 3(c) - SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH +// clang-format off +CTEST2(large_inserts_stress, test_Seq_key_packed_he32_Rand_length_values_inserts) +// clang-format on +{ + worker_config *wcfg = &data->work_cfg; - platform_default_log( - "OS-pid=%d Running as %s process ...\n", - data->this_pid, - (wcfg.master_cfg->fork_child ? "forked child" : "parent")); + // Load worker config params + wcfg->key_type = SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH; + wcfg->val_type = RAND_VAL_RAND_LENGTH; + exec_worker_thread(wcfg); +} - splinterdb_register_thread(wcfg.kvsb); +// Case 4(a) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Seq_values_inserts) +{ + worker_config *wcfg = &data->work_cfg; - exec_worker_thread(&wcfg); + // Load worker config params + wcfg->key_type = RAND_KEY_RAND_LENGTH; + wcfg->val_type = SEQ_VAL_SMALL; + exec_worker_thread(wcfg); +} - platform_default_log("OS-pid=%d, Thread-ID=%lu, Child process" - ", completed inserts.\n", - data->this_pid, - platform_get_tid()); - splinterdb_deregister_thread(wcfg.kvsb); - exit(0); - return; - } +// Case 4(b) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Seq_values_packed_inserts) +{ + worker_config *wcfg = &data->work_cfg; + + // Load worker config params + wcfg->key_type = RAND_KEY_RAND_LENGTH; + wcfg->val_type = SEQ_VAL_PADDED_LENGTH; + exec_worker_thread(wcfg); +} + +// Case 4(c) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Rand_length_values_inserts) +{ + worker_config *wcfg = &data->work_cfg; + + // Load worker config params + wcfg->key_type = RAND_KEY_RAND_LENGTH; + wcfg->val_type = RAND_VAL_RAND_LENGTH; + exec_worker_thread(wcfg); +} + +// Case 4(d) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Rand_6byte_values_inserts) +{ + worker_config *wcfg = &data->work_cfg; + + // Load worker config params + wcfg->key_type = RAND_KEY_RAND_LENGTH; + wcfg->val_type = RAND_6BYTE_VAL; + exec_worker_thread(wcfg); +} + +// Case 5(a) - RAND_KEY_DATA_BUF_SIZE +CTEST2(large_inserts_stress, test_Rand_key_packed_Seq_values_inserts) +{ + worker_config *wcfg = &data->work_cfg; + + // Load worker config params + wcfg->key_type = RAND_KEY_DATA_BUF_SIZE; + wcfg->val_type = SEQ_VAL_SMALL; + exec_worker_thread(wcfg); +} + +// Case 5(b) - RAND_KEY_DATA_BUF_SIZE +CTEST2(large_inserts_stress, test_Rand_key_packed_Seq_values_packed_inserts) +{ + worker_config *wcfg = &data->work_cfg; + + // Load worker config params + wcfg->key_type = RAND_KEY_DATA_BUF_SIZE; + wcfg->val_type = SEQ_VAL_PADDED_LENGTH; + exec_worker_thread(wcfg); +} + +// Case 5(c) - RAND_KEY_DATA_BUF_SIZE +CTEST2(large_inserts_stress, test_Rand_key_packed_Rand_length_values_inserts) +{ + worker_config *wcfg = &data->work_cfg; + + // Load worker config params + wcfg->key_type = RAND_KEY_DATA_BUF_SIZE; + wcfg->val_type = RAND_VAL_RAND_LENGTH; + exec_worker_thread(wcfg); +} + +static void +safe_wait() +{ + int wstatus; + int wr = wait(&wstatus); + platform_assert(wr != -1, "wait failure: %s", strerror(errno)); + platform_assert(WIFEXITED(wstatus), + "Child terminated abnormally: SIGNAL=%d", + WIFSIGNALED(wstatus) ? WTERMSIG(wstatus) : 0); + platform_assert(WEXITSTATUS(wstatus) == 0); } /* * ---------------------------------------------------------------------------- * Collection of test cases that fire-up diff combinations of inserts * (sequential, random keys & values) executed by n-threads. + * These test cases are identical to the list 1(a), 1(b), etc., which are all + * single-threaded. These test cases execute the same workload (key- and + * value-data distribution strategies), except that these run multiple threads. * ---------------------------------------------------------------------------- */ /* * Test case that fires up many threads each concurrently inserting large # of * KV-pairs, with discrete ranges of keys inserted by each thread. - * RESOLVE: This hangs in this flow; never completes ... + * RESOLVE: This hangs in this flow; never completes with --num-threads 63 ... * clockcache_try_get_read() -> memtable_maybe_rotate_and_get_insert_lock() * This problem will probably occur in /main as well. + * FIXME: Runs into btree_pack(): req->num_tuples=6291457 exceeded output size + * limit: req->max_tuples=6291456 */ -CTEST2_SKIP(large_inserts_stress, test_seq_key_seq_values_inserts_threaded) -{ - // Run n-threads with sequential key and sequential values inserted - do_inserts_n_threads(data->kvsb, - &data->master_cfg, - data->hid, - TEST_INSERTS_SEQ_KEY_DIFF_START_KEYID_FD, - TEST_INSERT_SEQ_VALUES_FD, - data->num_inserts, +// Case 1(a) - SEQ_KEY_BIG_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_be32_Seq_values_inserts_threaded) +{ + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_BIG_ENDIAN_32, + SEQ_VAL_SMALL, + data->num_insert_threads); +} + +// clang-format off +// Case 1(b) - SEQ_KEY_BIG_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_be32_Seq_values_packed_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_BIG_ENDIAN_32, + SEQ_VAL_PADDED_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 1(c) - SEQ_KEY_BIG_ENDIAN_32 +// FIXME: Failed in CI main-pr-clang job with issue #474: +// # OS-pid=1969, OS-tid=1988, Thread-ID=3, Assertion failed at src/trunk.c:2218:trunk_get_new_bundle(): "(node->hdr->end_bundle != node->hdr->start_bundle)". No available bundles in trunk node. page disk_addr=122658816, end_bundle=8, start_bundle=8 +CTEST2(large_inserts_stress, test_Seq_key_be32_Rand_length_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_BIG_ENDIAN_32, + RAND_VAL_RAND_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 1(d) - SEQ_KEY_BIG_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_be32_Rand_6byte_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_BIG_ENDIAN_32, + RAND_6BYTE_VAL, + data->num_insert_threads); +} + +// clang-format off +// Case 2(a) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_he32_Seq_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32, + SEQ_VAL_SMALL, + data->num_insert_threads); +} + +// clang-format off +// Case 2(b) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_he32_Seq_values_packed_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32, + SEQ_VAL_PADDED_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 2(c) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_he32_Rand_length_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32, + RAND_VAL_RAND_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 2(d) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2(large_inserts_stress, test_Seq_key_he32_Rand_6byte_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32, + RAND_6BYTE_VAL, + data->num_insert_threads); +} + +// clang-format off +// Case 3(a) - SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH +CTEST2(large_inserts_stress, test_Seq_key_packed_he32_Seq_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH, + SEQ_VAL_SMALL, + data->num_insert_threads); +} + +// clang-format off +// Case 3(b) - SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH +CTEST2(large_inserts_stress, test_Seq_key_packed_he32_Seq_values_packed_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH, + SEQ_VAL_PADDED_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 3(c) - SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH +CTEST2(large_inserts_stress, test_Seq_key_packed_he32_Rand_length_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH, + RAND_VAL_RAND_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 4(a) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Seq_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + RAND_KEY_RAND_LENGTH, + SEQ_VAL_SMALL, + data->num_insert_threads); +} + +// clang-format off +// Case 4(b) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Seq_values_packed_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + RAND_KEY_RAND_LENGTH, + SEQ_VAL_PADDED_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 4(c) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Rand_length_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + RAND_KEY_RAND_LENGTH, + RAND_VAL_RAND_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 4(d) - RAND_KEY_RAND_LENGTH +CTEST2(large_inserts_stress, test_Rand_key_Rand_6byte_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + RAND_KEY_RAND_LENGTH, + RAND_6BYTE_VAL, + data->num_insert_threads); +} + +// clang-format off +// Case 5(a) - RAND_KEY_DATA_BUF_SIZE +CTEST2(large_inserts_stress, test_Rand_key_packed_Seq_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + RAND_KEY_DATA_BUF_SIZE, + SEQ_VAL_SMALL, + data->num_insert_threads); +} + +// clang-format off +// Case 5(b) - RAND_KEY_DATA_BUF_SIZE +// FIXME: Failed in CI main-pr-asan job: +// OS-pid=2690, OS-tid=2820, Thread-ID=2, Assertion failed at src/trunk.c:5500:trunk_compact_bundle(): "height != 0". +CTEST2(large_inserts_stress, test_Rand_key_packed_Seq_values_packed_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + RAND_KEY_DATA_BUF_SIZE, + SEQ_VAL_PADDED_LENGTH, + data->num_insert_threads); +} + +// clang-format off +// Case 5(c) - RAND_KEY_DATA_BUF_SIZE +CTEST2(large_inserts_stress, test_Rand_key_packed_Rand_length_values_inserts_threaded) +{ + // clang-format on + do_inserts_n_threads(&data->work_cfg, + RAND_KEY_DATA_BUF_SIZE, + RAND_VAL_RAND_LENGTH, data->num_insert_threads); } @@ -392,17 +835,19 @@ CTEST2_SKIP(large_inserts_stress, test_seq_key_seq_values_inserts_threaded) * * With --num-threads 63, hangs in * clockcache_get_read() -> memtable_maybe_rotate_and_get_insert_lock() + * FIXME: Runs into BTree pack errors: + * btree_pack(): req->num_tuples=6291456 exceeded output size limit, + * req->max_tuples=6291456 btree_pack failed: No space left on device + * FIXME: Causes CI-timeout after 2h in debug-test runs. */ -CTEST2(large_inserts_stress, - test_seq_key_seq_values_inserts_threaded_same_start_keyid) -{ - // Run n-threads with sequential key and sequential values inserted - do_inserts_n_threads(data->kvsb, - &data->master_cfg, - data->hid, - TEST_INSERTS_SEQ_KEY_SAME_START_KEYID_FD, - TEST_INSERT_SEQ_VALUES_FD, - data->num_inserts, +// clang-format off +// Case 6(a) Variation of Case 2(a) - SEQ_KEY_HOST_ENDIAN_32 +CTEST2_SKIP(large_inserts_stress, test_Seq_key_he32_same_start_keyid_Seq_values_inserts_threaded) +// clang-format on +{ + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_HOST_ENDIAN_32_SAME_START_ID, + SEQ_VAL_SMALL, data->num_insert_threads); } @@ -410,91 +855,166 @@ CTEST2(large_inserts_stress, * Test case that fires up many threads each concurrently inserting large # of * KV-pairs, with all threads inserting from same start-value, using a fixed * fully-packed value. + * FIXME: Causes CI-timeout after 2h in debug-test runs. */ -CTEST2(large_inserts_stress, - test_seq_key_fully_packed_value_inserts_threaded_same_start_keyid) -{ - // Run n-threads with sequential key and sequential values inserted - do_inserts_n_threads(data->kvsb, - &data->master_cfg, - data->hid, - TEST_INSERTS_SEQ_KEY_SAME_START_KEYID_FD, - TEST_INSERT_FULLY_PACKED_CONSTANT_VALUE_FD, - data->num_inserts, +// clang-format off +// Case 7(b) Variation of Case 1(b) - SEQ_KEY_BIG_ENDIAN_32 +CTEST2_SKIP(large_inserts_stress, test_Seq_key_be32_same_start_keyid_Seq_values_packed_inserts_threaded) +// clang-format on +{ + do_inserts_n_threads(&data->work_cfg, + SEQ_KEY_BIG_ENDIAN_32_SAME_START_ID, + SEQ_VAL_PADDED_LENGTH, data->num_insert_threads); } -CTEST2(large_inserts_stress, test_random_keys_seq_values_threaded) +/* + * ---------------------------------------------------------------------------- + * test_Seq_key_be32_Seq_values_inserts_forked() -- + * + * Test case is identical to test_Seq_key_be32_Seq_values_inserts() but the + * actual execution of the function that does inserts is done from + * a forked-child process. This test, therefore, does basic validation + * that from a forked-child process we can drive basic SplinterDB commands. + * And then the parent can resume after the child exits, and can cleanly + * shutdown the instance. + * ---------------------------------------------------------------------------- + */ +// RESOLVE: Fails due to assertion: +// OS-pid=1576708, OS-tid=1576708, Thread-ID=0, Assertion failed at +// src/rc_allocator.c:536:rc_allocator_dec_ref(): "(ref_count != UINT8_MAX)". +// extent_no=14, ref_count=255 (0xff) +CTEST2_SKIP(large_inserts_stress, test_Seq_key_be32_Seq_values_inserts_forked) { - int random_key_fd = open("/dev/urandom", O_RDONLY); - ASSERT_TRUE(random_key_fd > 0); + worker_config *wcfg = &data->work_cfg; - // Run n-threads with sequential key and sequential values inserted - do_inserts_n_threads(data->kvsb, - &data->master_cfg, - data->hid, - random_key_fd, - TEST_INSERT_SEQ_VALUES_FD, - data->num_inserts, - data->num_insert_threads); + // Load worker config params + wcfg->key_type = SEQ_KEY_BIG_ENDIAN_32; + wcfg->val_type = SEQ_VAL_SMALL; + wcfg->fork_child = TRUE; - close(random_key_fd); -} + int pid = getpid(); -CTEST2(large_inserts_stress, test_seq_keys_random_values_threaded) -{ - int random_val_fd = open("/dev/urandom", O_RDONLY); - ASSERT_TRUE(random_val_fd > 0); + if (wcfg->fork_child) { + pid = fork(); - // Run n-threads with sequential key and sequential values inserted - do_inserts_n_threads(data->kvsb, - &data->master_cfg, - data->hid, - TEST_INSERTS_SEQ_KEY_DIFF_START_KEYID_FD, - random_val_fd, - data->num_inserts, - data->num_insert_threads); + if (pid < 0) { + platform_error_log("fork() of child process failed: pid=%d\n", pid); + return; + } else if (pid) { + CTEST_LOG_INFO("OS-pid=%d, Thread-ID=%lu: " + "Waiting for child pid=%d to complete ...\n", + getpid(), + platform_get_tid(), + pid); - close(random_val_fd); -} + safe_wait(); -CTEST2(large_inserts_stress, - test_seq_keys_random_values_threaded_same_start_keyid) -{ - int random_val_fd = open("/dev/urandom", O_RDONLY); - ASSERT_TRUE(random_val_fd > 0); + CTEST_LOG_INFO("Thread-ID=%lu, OS-pid=%d: " + "Child execution wait() completed." + " Resuming parent ...\n", + platform_get_tid(), + getpid()); + } + } + if (pid == 0) { + // Record in global data that we are now running as a child. + data->am_parent = FALSE; + data->this_pid = getpid(); - // Run n-threads with sequential key and sequential values inserted - do_inserts_n_threads(data->kvsb, - &data->master_cfg, - data->hid, - TEST_INSERTS_SEQ_KEY_SAME_START_KEYID_FD, - random_val_fd, - data->num_inserts, - data->num_insert_threads); + CTEST_LOG_INFO("OS-pid=%d Running as %s process ...\n", + data->this_pid, + (wcfg->fork_child ? "forked child" : "parent")); + + splinterdb_register_thread(wcfg->kvsb); + + exec_worker_thread(wcfg); - close(random_val_fd); + CTEST_LOG_INFO("OS-pid=%d, Thread-ID=%lu, Child process" + ", completed inserts.\n", + data->this_pid, + platform_get_tid()); + splinterdb_deregister_thread(wcfg->kvsb); + exit(0); + return; + } } -CTEST2(large_inserts_stress, test_random_keys_random_values_threaded) +/* + * Test case developed to repro an out-of-bounds assertion tripped up in + * trunk_build_filters() -> fingerprint_ntuples(). The fix has been id'ed + * to relocate fingerprint_ntuples() in its flow. There was no real logic + * error but a code-flow error. The now-fixed-bug would only repro with + * something like --num-inserts 20M. + */ +// clang-format off +CTEST2(large_inserts_stress, test_fp_num_tuples_out_of_bounds_bug_trunk_build_filters) { - int random_key_fd = open("/dev/urandom", O_RDONLY); - ASSERT_TRUE(random_key_fd > 0); + // clang-format on + char key_data[TEST_KEY_SIZE]; + char val_data[TEST_VALUE_SIZE]; - int random_val_fd = open("/dev/urandom", O_RDONLY); - ASSERT_TRUE(random_val_fd > 0); + uint64 start_key = 0; - // Run n-threads with sequential key and sequential values inserted - do_inserts_n_threads(data->kvsb, - &data->master_cfg, - data->hid, - random_key_fd, - random_val_fd, - data->num_inserts, - data->num_insert_threads); + uint64 start_time = platform_get_timestamp(); + + threadid thread_idx = platform_get_tid(); + + // Test is written to insert multiples of millions per thread. + ASSERT_EQUAL(0, (data->num_inserts % MILLION)); + + CTEST_LOG_INFO("%s()::%d:Thread-%-lu inserts %lu (%lu million)" + ", sequential key, sequential value, " + "KV-pairs starting from %lu ...\n", + __func__, + __LINE__, + thread_idx, + data->num_inserts, + (data->num_inserts / MILLION), + start_key); + + uint64 ictr = 0; + uint64 jctr = 0; + + bool verbose_progress = TRUE; + memset(val_data, 'V', sizeof(val_data)); + uint64 val_len = sizeof(val_data); + + for (ictr = 0; ictr < (data->num_inserts / MILLION); ictr++) { + for (jctr = 0; jctr < MILLION; jctr++) { - close(random_key_fd); - close(random_val_fd); + uint64 id = (start_key + (ictr * MILLION) + jctr); + + // Generate sequential key data + snprintf(key_data, sizeof(key_data), "%lu", id); + uint64 key_len = strlen(key_data); + + slice key = slice_create(key_len, key_data); + slice val = slice_create(val_len, val_data); + + int rc = splinterdb_insert(data->kvsb, key, val); + ASSERT_EQUAL(0, rc); + } + if (verbose_progress) { + CTEST_LOG_INFO("Thread-%lu Inserted %lu million KV-pairs ...\n", + thread_idx, + (ictr + 1)); + } + } + uint64 elapsed_ns = platform_timestamp_elapsed(start_time); + uint64 elapsed_s = NSEC_TO_SEC(elapsed_ns); + if (elapsed_s == 0) { + elapsed_s = 1; + } + + CTEST_LOG_INFO("%s()::%d:Thread-%lu Inserted %lu million KV-pairs in " + "%lu s, %lu rows/s\n", + __func__, + __LINE__, + thread_idx, + ictr, // outer-loop ends at #-of-Millions inserted + elapsed_s, + (data->num_inserts / elapsed_s)); } /* @@ -504,67 +1024,64 @@ CTEST2(large_inserts_stress, test_random_keys_random_values_threaded) * control via parameters are: * * Parameters: - * - random_key_fd - Sequential / random key - * - random_val_fd - Sequential / random value / fully-packed value. - * - num_inserts - # of inserts / thread + * - data_work_cfg - Template-configs for all worker-threads + * - key_type - Key-data strategy + * - val_type - Value-data strategy * - num_insert_threads - # of inserting threads to start-up - * - same_start_value - Boolean to control inserted batch' start-value. - * - * NOTE: Semantics of random_key_fd: - * - * fd == 0: => Each thread will insert into its own assigned space of - * {start-value, num-inserts} range. The concurrent inserts are all - * unique non-conflicting keys. - * - * fd > 0: => Each thread will insert num_inserts rows with randomly generated - * keys, usually fully-packed to TEST_KEY_SIZE. - * - * fd < 0: => Each thread will insert num_inserts rows all starting at the - * same start value; chosen as 0. - * This is a lapsed case to exercise heavy inserts of duplicate - * keys, creating diff BTree split dynamics. - * - * NOTE: Semantics of random_val_fd: - * - * You can use this to control the type of value that will be generated: - * fd == 0: Use sequential small-length values. - * fd == 1: Use randomly generated values, fully-packed to TEST_VALUE_SIZE. * ---------------------------------------------------------------------------- */ static void -do_inserts_n_threads(splinterdb *kvsb, - master_config *master_cfg, - platform_heap_id hid, - int random_key_fd, - int random_val_fd, - uint64 num_inserts, - uint64 num_insert_threads) +do_inserts_n_threads(worker_config *data_work_cfg, + key_strategy key_type, + val_strategy val_type, + uint64 num_insert_threads) { - worker_config *wcfg = TYPED_ARRAY_ZALLOC(hid, wcfg, num_insert_threads); + platform_memfrag memfrag_wcfg; + worker_config *wcfg = + TYPED_ARRAY_ZALLOC(data_work_cfg->hid, wcfg, num_insert_threads); // Setup thread-specific insert parameters for (int ictr = 0; ictr < num_insert_threads; ictr++) { - wcfg[ictr].kvsb = kvsb; - wcfg[ictr].master_cfg = master_cfg; - wcfg[ictr].num_inserts = num_inserts; - - // Choose the same or diff start key-ID for each thread. - wcfg[ictr].start_value = - ((random_key_fd < 0) ? 0 : (wcfg[ictr].num_inserts * ictr)); - wcfg[ictr].random_key_fd = random_key_fd; - wcfg[ictr].random_val_fd = random_val_fd; - wcfg[ictr].is_thread = TRUE; + // First get all common config-params for each worker + memmove(&wcfg[ictr], data_work_cfg, sizeof(*data_work_cfg)); + + // Choose the start key-ID for each thread. + switch (key_type) { + case SEQ_KEY_HOST_ENDIAN_32_SAME_START_ID: + case SEQ_KEY_BIG_ENDIAN_32_SAME_START_ID: + CTEST_LOG_INFO("All threads start from same start key ID=0\n"); + wcfg[ictr].start_value = 0; + key_type = ((key_type == SEQ_KEY_HOST_ENDIAN_32_SAME_START_ID) + ? SEQ_KEY_HOST_ENDIAN_32 + : SEQ_KEY_BIG_ENDIAN_32); + break; + default: + // All other inserts will start with non-overlapping key-IDs. + wcfg[ictr].start_value = (wcfg[ictr].num_inserts * ictr); + break; + } + + wcfg[ictr].key_type = key_type; + wcfg[ictr].val_type = val_type; + wcfg[ictr].is_thread = TRUE; + wcfg[ictr].verbose_progress = TRUE; } + wcfg[0].show_strategy = TRUE; + platform_memfrag memfrag_thread_ids; platform_thread *thread_ids = - TYPED_ARRAY_ZALLOC(hid, thread_ids, num_insert_threads); + TYPED_ARRAY_ZALLOC(data_work_cfg->hid, thread_ids, num_insert_threads); // Fire-off the threads to drive inserts ... + // clang-format off for (int tctr = 0; tctr < num_insert_threads; tctr++) { - int rc = pthread_create( - &thread_ids[tctr], NULL, &exec_worker_thread, &wcfg[tctr]); + int rc = pthread_create(&thread_ids[tctr], + NULL, + &exec_worker_thread, + &wcfg[tctr]); ASSERT_EQUAL(0, rc); } + // clang-format on // Wait for all threads to complete ... for (int tctr = 0; tctr < num_insert_threads; tctr++) { @@ -580,8 +1097,8 @@ do_inserts_n_threads(splinterdb *kvsb, ASSERT_TRUE(FALSE); } } - platform_free(hid, thread_ids); - platform_free(hid, wcfg); + platform_free(&memfrag_thread_ids); + platform_free(&memfrag_wcfg); } /* @@ -597,16 +1114,19 @@ do_inserts_n_threads(splinterdb *kvsb, static void * exec_worker_thread(void *w) { - char key_data[TEST_KEY_SIZE]; - char val_data[TEST_VALUE_SIZE]; - worker_config *wcfg = (worker_config *)w; - splinterdb *kvsb = wcfg->kvsb; - uint64 start_key = wcfg->start_value; - uint64 num_inserts = wcfg->num_inserts; - int random_key_fd = wcfg->random_key_fd; - int random_val_fd = wcfg->random_val_fd; + platform_memfrag memfrag_key_buf; + char *key_buf = TYPED_ARRAY_MALLOC(wcfg->hid, key_buf, wcfg->key_size); + size_t key_buf_size = wcfg->key_size; + + size_t val_buf_size = wcfg->val_size; + platform_memfrag memfrag_val_buf; + char *val_buf = TYPED_ARRAY_MALLOC(wcfg->hid, val_buf, (val_buf_size + 1)); + + splinterdb *kvsb = wcfg->kvsb; + uint64 start_key = wcfg->start_value; + uint64 num_inserts = wcfg->num_inserts; uint64 start_time = platform_get_timestamp(); @@ -618,112 +1138,164 @@ exec_worker_thread(void *w) // Test is written to insert multiples of millions per thread. ASSERT_EQUAL(0, (num_inserts % MILLION)); - const char *random_val_descr = NULL; - random_val_descr = ((random_val_fd > 0) ? "random" - : (random_val_fd == 0) ? "sequential" - : "fully-packed constant"); - - platform_default_log("%s()::%d:Thread %-2lu inserts %lu (%lu million)" - ", %s key, %s value, " - "KV-pairs starting from %lu (%lu%s) ...\n", - __func__, - __LINE__, - thread_idx, - num_inserts, - (num_inserts / MILLION), - ((random_key_fd > 0) ? "random" : "sequential"), - random_val_descr, - start_key, - (start_key / MILLION), - (start_key ? " million" : "")); + if (wcfg->show_strategy) { + CTEST_LOG_INFO("\nKey-data: '%s', Value-data: '%s' ...\n", + Key_strategy_names[wcfg->key_type], + Val_strategy_names[wcfg->val_type]); + } + CTEST_LOG_INFO("%s()::%d:Thread %-2lu inserts %lu (%lu million)" + " KV-pairs starting from %lu (%lu%s)\n", + __func__, + __LINE__, + thread_idx, + num_inserts, + (num_inserts / MILLION), + start_key, + (start_key / MILLION), + (start_key ? " million" : "")); uint64 ictr = 0; uint64 jctr = 0; - bool verbose_progress = wcfg->master_cfg->verbose_progress; + bool verbose_progress = wcfg->verbose_progress; + + // Initialize allocated buffers to avoid MSAN failures + memset(key_buf, 'K', key_buf_size); // Insert fully-packed wider-values so we fill pages faster. - // This value-data will be chosen when random_key_fd < 0. - memset(val_data, 'V', sizeof(val_data)); - uint64 val_len = sizeof(val_data); + uint64 val_len = val_buf_size; + memset(val_buf, 'V', val_buf_size); + + int32 key_data_be; // int-32 keys generated in big-endian-32 notation + int32 key_data_he; // int-32 keys generated in host-endian-32 notation + uint64 key_len; + + random_state key_rs = {0}; + switch (wcfg->key_type) { + case SEQ_KEY_BIG_ENDIAN_32: + key_buf = (char *)&key_data_be; + key_len = sizeof(key_data_be); + break; + + case SEQ_KEY_HOST_ENDIAN_32: + key_buf = (char *)&key_data_he; + key_len = sizeof(key_data_he); + break; + + case SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH: + key_len = key_buf_size; + break; + + case RAND_KEY_DATA_BUF_SIZE: + key_len = key_buf_size; + // Fall-through + case RAND_KEY_RAND_LENGTH: + random_init(&key_rs, wcfg->rand_seed, 0); + break; + + default: + platform_assert(FALSE, + "Unknown key-data strategy %d (%s)", + wcfg->key_type, + Key_strategy_name(wcfg->key_type)); + } - bool val_length_msg_printed = FALSE; + random_state val_rs = {0}; + switch (wcfg->val_type) { + case RAND_6BYTE_VAL: + val_len = 6; + // Fall-through + case RAND_VAL_RAND_LENGTH: + random_init(&val_rs, wcfg->rand_seed, 0); + break; + + default: + break; + } for (ictr = 0; ictr < (num_inserts / MILLION); ictr++) { for (jctr = 0; jctr < MILLION; jctr++) { uint64 id = (start_key + (ictr * MILLION) + jctr); - uint64 key_len; - // Generate random key / value if calling test-case requests it. - if (random_key_fd > 0) { + // Generate key-data based on key-strategy specified. + switch (wcfg->key_type) { + case SEQ_KEY_BIG_ENDIAN_32: + // Generate sequential key data, stored in big-endian order + key_data_be = htobe32(id); + break; + + case SEQ_KEY_HOST_ENDIAN_32: + key_data_he = id; + break; + case SEQ_KEY_HOST_ENDIAN_32_PADDED_LENGTH: + { + int tmp_len = snprintf(key_buf, key_buf_size, "%lu", id); + key_buf[tmp_len] = 'K'; + break; + } - // Generate random key-data for full width of key. - size_t result = read(random_key_fd, key_data, sizeof(key_data)); - ASSERT_TRUE(result >= 0); + case RAND_KEY_RAND_LENGTH: + // Fill-up key-data buffer with random data for random length. + key_len = random_next_int( + &key_rs, TEST_CONFIG_MIN_KEY_SIZE, key_buf_size); + random_bytes(&key_rs, key_buf, key_len); + break; - key_len = result; - } else { - // Generate sequential key data - snprintf(key_data, sizeof(key_data), "%lu", id); - key_len = strlen(key_data); + case RAND_KEY_DATA_BUF_SIZE: + // Pack-up key-data buffer with random data + random_bytes(&key_rs, key_buf, key_len); + break; + + default: + break; } - // Manage how the value-data is generated based on random_val_fd - if (random_val_fd > 0) { - - // Generate random value for full width of value. - size_t result = read(random_val_fd, val_data, sizeof(val_data)); - ASSERT_TRUE(result >= 0); - - val_len = result; - if (!val_length_msg_printed) { - platform_default_log("OS-pid=%d, Thread-ID=%lu" - ", Insert random value of " - "fixed-length=%lu bytes.\n", - platform_getpid(), - thread_idx, - val_len); - val_length_msg_printed = TRUE; - } - } else if (random_val_fd == 0) { - // Generate small-length sequential value data - snprintf(val_data, sizeof(val_data), "Row-%lu", id); - val_len = strlen(val_data); - - if (!val_length_msg_printed) { - platform_default_log("OS-pid=%d, Thread-ID=%lu" - ", Insert small-width sequential values of " - "different lengths.\n", - platform_getpid(), - thread_idx); - val_length_msg_printed = TRUE; - } - } else if (random_val_fd < 0) { - if (!val_length_msg_printed) { - platform_default_log("OS-pid=%d, Thread-ID=%lu" - ", Insert fully-packed fixed value of " - "length=%lu bytes.\n", - platform_getpid(), - thread_idx, - val_len); - val_length_msg_printed = TRUE; + // Generate value-data based on value-strategy specified. + switch (wcfg->val_type) { + case SEQ_VAL_SMALL: + // Generate small-length sequential value data + val_len = snprintf(val_buf, val_buf_size, "Row-%lu", id); + break; + + case SEQ_VAL_PADDED_LENGTH: + { + // Generate small-length sequential value packed-data + int tmp_len = snprintf(val_buf, val_buf_size, "Row-%lu", id); + val_buf[tmp_len] = 'V'; + break; } + case RAND_VAL_RAND_LENGTH: + // Fill-up value-data buffer with random data for random length. + val_len = random_next_int(&val_rs, 1, val_buf_size); + random_bytes(&val_rs, val_buf, val_len); + break; + + case RAND_6BYTE_VAL: + // Fill-up value-data buffer with random data for 6-bytes + random_bytes(&val_rs, val_buf, val_len); + break; + + default: + platform_assert(FALSE, + "Unknown value-data strategy %d (%s)", + wcfg->val_type, + Val_strategy_name(wcfg->val_type)); + break; } - - slice key = slice_create(key_len, key_data); - slice val = slice_create(val_len, val_data); + slice key = slice_create(key_len, key_buf); + slice val = slice_create(val_len, val_buf); int rc = splinterdb_insert(kvsb, key, val); ASSERT_EQUAL(0, rc); } if (verbose_progress) { - platform_default_log( - "%s()::%d:Thread-%lu Inserted %lu million KV-pairs ...\n", - __func__, - __LINE__, - thread_idx, - (ictr + 1)); + CTEST_LOG_INFO("%s()::%d:Thread-%lu Inserted %lu million " + "KV-pairs ...\n", + __func__, + __LINE__, + thread_idx, + (ictr + 1)); } } // Deal with low ns-elapsed times when inserting small #s of rows @@ -733,18 +1305,21 @@ exec_worker_thread(void *w) elapsed_s = 1; } - platform_default_log("%s()::%d:Thread-%lu Inserted %lu million KV-pairs in " - "%lu s, %lu rows/s\n", - __func__, - __LINE__, - thread_idx, - ictr, // outer-loop ends at #-of-Millions inserted - elapsed_s, - (num_inserts / elapsed_s)); + CTEST_LOG_INFO("%s()::%d:Thread-%lu Inserted %lu million KV-pairs in " + "%lu s, %lu rows/s\n", + __func__, + __LINE__, + thread_idx, + ictr, // outer-loop ends at #-of-Millions inserted + elapsed_s, + (num_inserts / elapsed_s)); if (wcfg->is_thread) { splinterdb_deregister_thread(kvsb); } + // Cleanup resources opened in this call. + platform_free(&memfrag_key_buf); + platform_free(&memfrag_val_buf); return 0; } diff --git a/tests/unit/limitations_test.c b/tests/unit/limitations_test.c index 655b1fb8..38749b94 100644 --- a/tests/unit/limitations_test.c +++ b/tests/unit/limitations_test.c @@ -102,10 +102,13 @@ CTEST2(limitations, test_io_init_invalid_page_size) uint64 num_tables = 1; // Allocate memory for global config structures - data->splinter_cfg = - TYPED_ARRAY_MALLOC(data->hid, data->splinter_cfg, num_tables); + platform_memfrag memfrag_splinter_cfg; + data->splinter_cfg = TYPED_ARRAY_MALLOC_MF( + &memfrag_splinter_cfg, data->hid, data->splinter_cfg, num_tables); - data->cache_cfg = TYPED_ARRAY_MALLOC(data->hid, data->cache_cfg, num_tables); + platform_memfrag memfrag_cache_cfg; + data->cache_cfg = TYPED_ARRAY_MALLOC_MF( + &memfrag_cache_cfg, data->hid, data->cache_cfg, num_tables); ZERO_STRUCT(data->test_exec_cfg); @@ -124,7 +127,8 @@ CTEST2(limitations, test_io_init_invalid_page_size) platform_assert_status_ok(rc); // Allocate and initialize the IO sub-system. - data->io = TYPED_MALLOC(data->hid, data->io); + platform_memfrag memfrag_io; + data->io = TYPED_MALLOC_MF(&memfrag_io, data->hid, data->io); ASSERT_TRUE((data->io != NULL)); // Hard-fix the configured default page-size to an illegal value @@ -150,15 +154,15 @@ CTEST2(limitations, test_io_init_invalid_page_size) ASSERT_TRUE(SUCCESS(rc)); // Release resources acquired in this test case. - platform_free(data->hid, data->io->req); - platform_free(data->hid, data->io); + io_handle_deinit(data->io); + platform_free(&memfrag_io); if (data->cache_cfg) { - platform_free(data->hid, data->cache_cfg); + platform_free(&memfrag_cache_cfg); } if (data->splinter_cfg) { - platform_free(data->hid, data->splinter_cfg); + platform_free(&memfrag_splinter_cfg); } } @@ -173,10 +177,13 @@ CTEST2(limitations, test_io_init_invalid_extent_size) uint64 num_tables = 1; // Allocate memory for global config structures - data->splinter_cfg = - TYPED_ARRAY_MALLOC(data->hid, data->splinter_cfg, num_tables); + platform_memfrag memfrag_splinter_cfg; + data->splinter_cfg = TYPED_ARRAY_MALLOC_MF( + &memfrag_splinter_cfg, data->hid, data->splinter_cfg, num_tables); - data->cache_cfg = TYPED_ARRAY_MALLOC(data->hid, data->cache_cfg, num_tables); + platform_memfrag memfrag_cache_cfg; + data->cache_cfg = TYPED_ARRAY_MALLOC_MF( + &memfrag_cache_cfg, data->hid, data->cache_cfg, num_tables); ZERO_STRUCT(data->test_exec_cfg); @@ -195,7 +202,8 @@ CTEST2(limitations, test_io_init_invalid_extent_size) platform_assert_status_ok(rc); // Allocate and initialize the IO sub-system. - data->io = TYPED_MALLOC(data->hid, data->io); + platform_memfrag memfrag_io; + data->io = TYPED_MALLOC_MF(&memfrag_io, data->hid, data->io); ASSERT_TRUE((data->io != NULL)); uint64 pages_per_extent = @@ -230,12 +238,14 @@ CTEST2(limitations, test_io_init_invalid_extent_size) ASSERT_TRUE(SUCCESS(rc)); // Release resources acquired in this test case. + io_handle_deinit(data->io); + if (data->cache_cfg) { - platform_free(data->hid, data->cache_cfg); + platform_free(&memfrag_cache_cfg); } if (data->splinter_cfg) { - platform_free(data->hid, data->splinter_cfg); + platform_free(&memfrag_splinter_cfg); } } @@ -393,10 +403,13 @@ CTEST2(limitations, test_trunk_config_init_fails_for_invalid_configs) uint64 num_tables = 1; // Allocate memory for global config structures - data->splinter_cfg = - TYPED_ARRAY_MALLOC(data->hid, data->splinter_cfg, num_tables); + platform_memfrag memfrag_splinter_cfg; + data->splinter_cfg = TYPED_ARRAY_MALLOC_MF( + &memfrag_splinter_cfg, data->hid, data->splinter_cfg, num_tables); - data->cache_cfg = TYPED_ARRAY_MALLOC(data->hid, data->cache_cfg, num_tables); + platform_memfrag memfrag_cache_cfg; + data->cache_cfg = TYPED_ARRAY_MALLOC_MF( + &memfrag_cache_cfg, data->hid, data->cache_cfg, num_tables); char *unit_test_argv0[] = {"--key-size", "1000"}; int unit_test_argc = ARRAY_SIZE(unit_test_argv0); @@ -421,11 +434,11 @@ CTEST2(limitations, test_trunk_config_init_fails_for_invalid_configs) // Release resources acquired in this test case. if (data->cache_cfg) { - platform_free(data->hid, data->cache_cfg); + platform_free(&memfrag_cache_cfg); } if (data->splinter_cfg) { - platform_free(data->hid, data->splinter_cfg); + platform_free(&memfrag_splinter_cfg); } } diff --git a/tests/unit/platform_apis_test.c b/tests/unit/platform_apis_test.c index 50df378c..806810bf 100644 --- a/tests/unit/platform_apis_test.c +++ b/tests/unit/platform_apis_test.c @@ -18,8 +18,19 @@ #include "ctest.h" // This is required for all test-case files. #include "platform.h" #include "config.h" +#include "shmem.h" +#include "trunk.h" #include "unit_tests.h" +// Define a struct to be used for memory allocation. +typedef struct any_struct { + struct any_struct *prev; + struct any_struct *next; + size_t nbytes; + uint32 value; + uint32 size; +} any_struct; + /* * Global data declaration macro: */ @@ -28,6 +39,7 @@ CTEST_DATA(platform_api) // Declare heap handles for platform heap memory. platform_heap_id hid; platform_module_id mid; + bool use_shmem; }; CTEST_SETUP(platform_api) @@ -37,6 +49,7 @@ CTEST_SETUP(platform_api) uint64 heap_capacity = (256 * MiB); // small heap is sufficient. data->mid = platform_get_module_id(); + data->use_shmem = use_shmem; rc = platform_heap_create(data->mid, heap_capacity, use_shmem, &data->hid); platform_assert_status_ok(rc); } @@ -169,3 +182,344 @@ CTEST2(platform_api, test_platform_condvar_init_destroy) platform_condvar_destroy(&cv); } + +/* + * ---------------------------------------------------------------------------- + * Exercise all the memory allocation interfaces, followed by a free, to ensure + * that all combinations work cleanly, w/ and w/o shared memory. + * + * - TYPED_MALLOC(), TYPED_ZALLOC() - + * - TYPED_ALIGNED_MALLOC(), TYPED_ALIGNED_ZALLOC() + * + * - TYPED_ARRAY_MALLOC(), TYPED_ARRAY_ZALLOC() + * TYPED_FLEXIBLE_STRUCT_MALLOC(), TYPED_FLEXIBLE_STRUCT_ZALLOC() + * These interfaces need an on-stack platform_memfrag{} struct for allocation + * and to call the free() interface. + * + * For case of test execution with shared memory, do a small verification + * that used / free memory metrics are correct before/after allocation/free. + * ---------------------------------------------------------------------------- + */ +CTEST2(platform_api, test_TYPED_MALLOC) +{ + size_t used_bytes_before_alloc = 0; + size_t free_bytes_before_alloc = 0; + size_t used_bytes_after_free = 0; + size_t free_bytes_after_free = 0; + + if (data->use_shmem) { + used_bytes_before_alloc = platform_shmbytes_used(data->hid); + free_bytes_before_alloc = platform_shmbytes_free(data->hid); + } + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_MALLOC(data->hid, structp); + platform_free(&memfrag_structp); + if (data->use_shmem) { + used_bytes_after_free = platform_shmbytes_used(data->hid); + free_bytes_after_free = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(used_bytes_before_alloc, used_bytes_after_free); + ASSERT_EQUAL(free_bytes_before_alloc, free_bytes_after_free); + } +} + +CTEST2(platform_api, test_TYPED_ZALLOC) +{ + size_t used_bytes_before_alloc = 0; + size_t free_bytes_before_alloc = 0; + size_t used_bytes_after_free = 0; + size_t free_bytes_after_free = 0; + + if (data->use_shmem) { + used_bytes_before_alloc = platform_shmbytes_used(data->hid); + free_bytes_before_alloc = platform_shmbytes_free(data->hid); + } + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_ZALLOC(data->hid, structp); + platform_free(&memfrag_structp); + if (data->use_shmem) { + used_bytes_after_free = platform_shmbytes_used(data->hid); + free_bytes_after_free = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(used_bytes_before_alloc, used_bytes_after_free); + ASSERT_EQUAL(free_bytes_before_alloc, free_bytes_after_free); + } +} + +CTEST2(platform_api, test_TYPED_MALLOC_free_and_MALLOC) +{ + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_MALLOC(data->hid, structp); + any_struct *save_structp = structp; + platform_free(&memfrag_structp); + + platform_memfrag memfrag_new_structp; + any_struct *new_structp = TYPED_MALLOC(data->hid, new_structp); + + // Memory for small structures should be recycled from shared memory + ASSERT_TRUE(!data->use_shmem || (save_structp == new_structp)); + + platform_free(&memfrag_new_structp); +} + +CTEST2(platform_api, test_TYPED_ALIGNED_ZALLOC) +{ + size_t used_bytes_before_alloc = 0; + size_t free_bytes_before_alloc = 0; + size_t used_bytes_after_free = 0; + size_t free_bytes_after_free = 0; + + if (data->use_shmem) { + used_bytes_before_alloc = platform_shmbytes_used(data->hid); + free_bytes_before_alloc = platform_shmbytes_free(data->hid); + } + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_ALIGNED_ZALLOC( + data->hid, PLATFORM_CACHELINE_SIZE, structp, sizeof(*structp) * 7); + + platform_free(&memfrag_structp); + if (data->use_shmem) { + used_bytes_after_free = platform_shmbytes_used(data->hid); + free_bytes_after_free = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(used_bytes_before_alloc, used_bytes_after_free); + ASSERT_EQUAL(free_bytes_before_alloc, free_bytes_after_free); + } +} + +CTEST2(platform_api, test_TYPED_ARRAY_MALLOC) +{ + int nitems = 10; + size_t used_bytes_before_alloc = 0; + size_t free_bytes_before_alloc = 0; + size_t used_bytes_after_free = 0; + size_t free_bytes_after_free = 0; + + if (data->use_shmem) { + used_bytes_before_alloc = platform_shmbytes_used(data->hid); + free_bytes_before_alloc = platform_shmbytes_free(data->hid); + } + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_ARRAY_MALLOC(data->hid, structp, nitems); + + platform_free(&memfrag_structp); + if (data->use_shmem) { + used_bytes_after_free = platform_shmbytes_used(data->hid); + free_bytes_after_free = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(used_bytes_before_alloc, used_bytes_after_free); + ASSERT_EQUAL(free_bytes_before_alloc, free_bytes_after_free); + } +} + +CTEST2(platform_api, test_platform_free_interface) +{ + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_MALLOC(data->hid, structp); + platform_free_mem(data->hid, structp, memfrag_size(&memfrag_structp)); +} + +/* + * Dumb test-case to show how -not- to invoke the platform_free() API. + * If you allocate using platform_memfrag{}, but free the allocated memory + * directly, you will certainly get a memory leak. + */ +CTEST2_SKIP(platform_api, test_incorrect_usage_of_free) +{ + int nitems = 13; + size_t used_bytes_before_alloc = 0; + size_t free_bytes_before_alloc = 0; + size_t memory_allocated = 0; + size_t used_bytes_after_free = 0; + size_t free_bytes_after_free = 0; + + if (data->use_shmem) { + used_bytes_before_alloc = platform_shmbytes_used(data->hid); + free_bytes_before_alloc = platform_shmbytes_free(data->hid); + } + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_ARRAY_MALLOC(data->hid, structp, nitems); + memory_allocated = memfrag_structp.size; + + // Incorrect usage of free ... Memory fragment will be freed but there will + // be an error in computing memory usage metrics, resulting in a slow + // memory leak (of sorts). + platform_free(&memfrag_structp); + if (data->use_shmem) { + used_bytes_after_free = platform_shmbytes_used(data->hid); + free_bytes_after_free = platform_shmbytes_free(data->hid); + + // These asserts document "an error condition", just so the test can pass. + ASSERT_NOT_EQUAL(used_bytes_before_alloc, used_bytes_after_free); + ASSERT_NOT_EQUAL(free_bytes_before_alloc, free_bytes_after_free); + + printf("memory_allocated=%lu" + ", used_bytes_after_free=%lu (!= used_bytes_before_alloc=%lu)" + ", free_bytes_after_free=%lu (!= free_bytes_before_alloc=%lu)\n", + memory_allocated, + used_bytes_after_free, + used_bytes_before_alloc, + free_bytes_after_free, + free_bytes_before_alloc); + } +} + +/* + * White-box test to verify small free-fragment free-list management. + * We track only some small ranges of sizes in the free-list: + * 32 < x <= 64, <= 128, <= 256, <= 512 + * This test case is designed carefully to allocate a fragment in the + * range (256, 512]. Then it's freed. A smaller fragment that falls in + * this bucket is requested, which should find and reallocate the + * free'd fragment. + */ +CTEST2(platform_api, test_TYPED_ARRAY_MALLOC_free_and_MALLOC) +{ + int nitems = 10; + platform_memfrag memfrag_arrayp; + any_struct *arrayp = TYPED_ARRAY_MALLOC(data->hid, arrayp, nitems); + + platform_free(&memfrag_arrayp); + + // If you re-request the same array, memory fragment should be recycled + platform_memfrag memfrag_new_arrayp; + any_struct *new_arrayp = TYPED_ARRAY_MALLOC(data->hid, new_arrayp, nitems); + ASSERT_TRUE(!data->use_shmem || (arrayp == new_arrayp)); + platform_free(&memfrag_new_arrayp); + + // Allocating a smaller array should also recycle memory fragment. + // We recycle fragments in sizes of powers-of-2. So, use a new size + // so it will trigger a search in the free-list that the previous + // fragment's free ended up in. + nitems = 9; + new_arrayp = TYPED_ARRAY_MALLOC(data->hid, new_arrayp, nitems); + ASSERT_TRUE(!data->use_shmem || (arrayp == new_arrayp)); + platform_free(&memfrag_new_arrayp); +} + +/* + * Allocate and free small fragments of different size so that they fall + * into different buckets. Free them in random order to exercise the + * management of the array tracking allocated small fragments. + */ +CTEST2(platform_api, test_alloc_free_multiple_small_frags) +{ + size_t used_bytes_before_allocs = 0; + size_t free_bytes_before_allocs = 0; + size_t memory_allocated = 0; + + if (data->use_shmem) { + used_bytes_before_allocs = platform_shmbytes_used(data->hid); + free_bytes_before_allocs = platform_shmbytes_free(data->hid); + } + platform_memfrag memfrag_s1_40b; + char *s1_40b = TYPED_ARRAY_MALLOC(data->hid, s1_40b, 40); + memory_allocated += memfrag_size(&memfrag_s1_40b); + + platform_memfrag memfrag_s1_80b; + char *s1_80b = TYPED_ARRAY_MALLOC(data->hid, s1_80b, 80); + memory_allocated += memfrag_size(&memfrag_s1_80b); + + platform_memfrag memfrag_s1_160b; + char *s1_160b = TYPED_ARRAY_MALLOC(data->hid, s1_160b, 160); + memory_allocated += memfrag_size(&memfrag_s1_160b); + + platform_memfrag memfrag_s1_200b; + char *s1_200b = TYPED_ARRAY_MALLOC(data->hid, s1_200b, 200); + memory_allocated += memfrag_size(&memfrag_s1_200b); + + size_t used_bytes_after_allocs = 0; + if (data->use_shmem) { + used_bytes_after_allocs = platform_shmbytes_used(data->hid); + + ASSERT_EQUAL((used_bytes_after_allocs - used_bytes_before_allocs), + memory_allocated); + } + + platform_free(&memfrag_s1_80b); + + platform_free(&memfrag_s1_40b); + + platform_free(&memfrag_s1_160b); + + platform_free(&memfrag_s1_200b); + + size_t used_bytes_after_frees = 0; + size_t free_bytes_after_frees = 0; + if (data->use_shmem) { + used_bytes_after_frees = platform_shmbytes_used(data->hid); + free_bytes_after_frees = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(used_bytes_before_allocs, used_bytes_after_frees); + ASSERT_EQUAL(free_bytes_before_allocs, free_bytes_after_frees); + } +} + +CTEST2(platform_api, test_large_TYPED_MALLOC) +{ + platform_memfrag memfrag_iter; + trunk_range_iterator *iter = TYPED_MALLOC(data->hid, iter); + platform_free(&memfrag_iter); +} + +/* + * Basic test case to verify that memory for large fragments is being + * recycled as expected. + */ +CTEST2(platform_api, test_large_TYPED_MALLOC_free_and_MALLOC) +{ + platform_memfrag memfrag_iter; + trunk_range_iterator *iter = TYPED_MALLOC(data->hid, iter); + // This struct should be larger than the threshold at which large + // free fragment strategy kicks-in. + ASSERT_TRUE(sizeof(*iter) >= SHM_LARGE_FRAG_SIZE); + + trunk_range_iterator *save_iter = iter; + platform_free(&memfrag_iter); + + platform_memfrag memfrag_new_iter; + trunk_range_iterator *new_iter = TYPED_MALLOC(data->hid, new_iter); + + // Memory for large structures should be recycled from shared memory + ASSERT_TRUE(!data->use_shmem || (save_iter == new_iter), + "use_shmem=%d, save_iter=%p, new_iter=%p" + ", sizeof() requested struct=%lu", + data->use_shmem, + save_iter, + new_iter, + sizeof(*iter)); + platform_free(&memfrag_new_iter); +} + +CTEST2(platform_api, test_TYPED_ARRAY_MALLOC_MF) +{ + size_t old_mem_used = + (data->use_shmem ? platform_shmbytes_used(data->hid) : 0); + size_t old_mem_free = + (data->use_shmem ? platform_shmbytes_free(data->hid) : 0); + + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_ARRAY_MALLOC(data->hid, structp, 20); + platform_free(&memfrag_structp); + + size_t new_mem_used = + (data->use_shmem ? platform_shmbytes_used(data->hid) : 0); + size_t new_mem_free = + (data->use_shmem ? platform_shmbytes_free(data->hid) : 0); + ASSERT_EQUAL(old_mem_used, new_mem_used); + ASSERT_EQUAL(old_mem_free, new_mem_free); +} + +CTEST2(platform_api, test_TYPED_ARRAY_ZALLOC_MF) +{ + size_t old_mem_used = + (data->use_shmem ? platform_shmbytes_used(data->hid) : 0); + + platform_memfrag memfrag_structp; + any_struct *structp = TYPED_ARRAY_ZALLOC(data->hid, structp, 10); + platform_free(&memfrag_structp); + + size_t new_mem_used = + (data->use_shmem ? platform_shmbytes_used(data->hid) : 0); + ASSERT_EQUAL(old_mem_used, new_mem_used); +} diff --git a/tests/unit/splinter_shmem_oom_test.c b/tests/unit/splinter_shmem_oom_test.c new file mode 100644 index 00000000..fcc70c6b --- /dev/null +++ b/tests/unit/splinter_shmem_oom_test.c @@ -0,0 +1,308 @@ +// Copyright 2021 VMware, Inc. +// SPDX-License-Identifier: Apache-2.0 + +/* + * ----------------------------------------------------------------------------- + * splinter_shmem_oom_test.c -- + * + * Slightly slow-running test that will induce OOM in shared memory. + * ----------------------------------------------------------------------------- + */ +#include "splinterdb/public_platform.h" +#include "platform.h" +#include "unit_tests.h" +#include "ctest.h" // This is required for all test-case files. +#include "shmem.h" +#include "splinterdb/splinterdb.h" +#include "splinterdb/default_data_config.h" +#include "util.h" + +#define TEST_MAX_KEY_SIZE 42 // Just something to get going ... + +// Test these many threads concurrently performing memory allocation. +#define TEST_MAX_THREADS 8 + +// Size of an on-stack buffer used for testing +#define WB_ONSTACK_BUFSIZE 30 + +/* + * To test heavily concurrent memory allocation from the shared memory, each + * thread will allocate a small fragment described by this structure. We then + * validate that the fragments are not clobbered by concurrent allocations. + */ +typedef struct shm_memfrag { + threadid owner; + struct shm_memfrag *next; +} shm_memfrag; + +// Configuration for each worker thread +typedef struct { + splinterdb *splinter; + platform_thread this_thread_id; // OS-generated thread ID + threadid exp_thread_idx; // Splinter-generated expected thread index + shm_memfrag *start; // Start of chain of allocated memfrags +} thread_config; + +// Function prototypes +static void +setup_cfg_for_test(splinterdb_config *out_cfg, data_config *default_data_cfg); + +static void +exec_thread_memalloc(void *arg); + +/* + * Global data declaration macro: + */ +CTEST_DATA(splinter_shmem_oom) +{ + // Declare heap handles to shake out shared memory based allocation. + size_t shmem_capacity; // In bytes + platform_heap_id hid; +}; + +// By default, all test cases will deal with small shared memory segment. +CTEST_SETUP(splinter_shmem_oom) +{ + data->shmem_capacity = (256 * MiB); // bytes + platform_status rc = platform_heap_create( + platform_get_module_id(), data->shmem_capacity, TRUE, &data->hid); + ASSERT_TRUE(SUCCESS(rc)); + + // Enable tracing all allocs / frees from shmem for this test. + platform_enable_tracing_shm_ops(); +} + +// Tear down the test shared segment. +CTEST_TEARDOWN(splinter_shmem_oom) +{ + platform_status rc = platform_heap_destroy(&data->hid); + ASSERT_TRUE(SUCCESS(rc)); +} + +/* + * --------------------------------------------------------------------------- + * Test allocation requests that result in an OOM from shared segment. + * Verify limits of memory allocation and handling of free/used bytes. + * These stats are maintained w/o full spinlocks, so will be approximate + * in concurrent scenarios. But for single-threaded allocations, these stats + * should be accurate even when shmem-OOMs occur. + * --------------------------------------------------------------------------- + */ +CTEST2(splinter_shmem_oom, test_allocations_causing_OOMs) +{ + int keybuf_size = 64; + + // Self-documenting assertion ... to future-proof this area. + ASSERT_EQUAL(keybuf_size, PLATFORM_CACHELINE_SIZE); + + void *next_free = platform_shm_next_free_addr(data->hid); + platform_memfrag memfrag_keybuf; + uint8 *keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, keybuf_size); + + // Validate returned memory-ptr, knowing that no pad bytes were needed. + ASSERT_TRUE((void *)keybuf == next_free); + + next_free = platform_shm_next_free_addr(data->hid); + + size_t space_left = + (data->shmem_capacity - (keybuf_size + platform_shm_ctrlblock_size())); + + ASSERT_EQUAL(space_left, platform_shmbytes_free(data->hid)); + + platform_error_log("\nNOTE: Test case intentionally triggers out-of-space" + " errors in shared segment. 'Insufficient memory'" + " error messages below are to be expected.\n"); + + // Note that although we have asked for 1 more byte than free space available + // the allocation interfaces round-up the # bytes for alignment. So the + // requested # of bytes will be a bit larger than free space in the error + // message you will see below. + keybuf_size = (space_left + 1); + uint8 *keybuf_oom = + TYPED_ARRAY_MALLOC_MF(NULL, data->hid, keybuf_oom, keybuf_size); + ASSERT_TRUE(keybuf_oom == NULL); + + // Free space counter is not touched if allocation fails. + ASSERT_EQUAL(space_left, platform_shmbytes_free(data->hid)); + + // As every memory request is rounded-up for alignment, the space left + // counter should always be an integral multiple of this constant. + ASSERT_EQUAL(0, (space_left % PLATFORM_CACHELINE_SIZE)); + + // If we request exactly what's available, it should succeed. + keybuf_size = space_left; + platform_memfrag memfrag_keybuf_no_oom; + uint8 *keybuf_no_oom = + TYPED_ARRAY_MALLOC(data->hid, keybuf_no_oom, keybuf_size); + ASSERT_TRUE(keybuf_no_oom != NULL); + + CTEST_LOG_INFO("Successfully allocated all remaining %lu bytes " + "from shared segment.\n", + space_left); + + // We should be out of space by now. + ASSERT_EQUAL(0, platform_shmbytes_free(data->hid)); + + // This should fail. + keybuf_size = 1; + keybuf_oom = TYPED_ARRAY_MALLOC_MF(NULL, data->hid, keybuf_oom, keybuf_size); + ASSERT_TRUE(keybuf_oom == NULL); + + // Free allocated memory before exiting. + platform_free(&memfrag_keybuf); + platform_free(&memfrag_keybuf_no_oom); +} + +/* + * --------------------------------------------------------------------------- + * test_concurrent_allocs_by_n_threads() - Verify concurrency control + * implemented during shared memory allocation. + * + * Exercise concurrent memory allocations from the shared memory of small + * memory fragments. Each thread will record its ownership on the fragment + * allocated. After all memory is exhausted, we cross-check the chain of + * fragments allocated by each thread to verify that fragment still shows up + * as owned by the allocating thread. + * + * In the rudimentary version of allocation from shared memory, we did not have + * any concurrency control for allocations. So, it's likely that we may have + * been clobbering allocated memory. + * + * This test case does a basic verification of the fixes implemented to avoid + * such races during concurrent memory allocation. + * + * NOTE: This test case will exit immediately upon finding the first fragment + * whose ownership is flawed. That may still leave many other fragments waiting + * to be discovered with flawed ownership. + * --------------------------------------------------------------------------- + */ +CTEST2(splinter_shmem_oom, test_concurrent_allocs_by_n_threads) +{ + splinterdb *kvsb; + splinterdb_config cfg; + data_config default_data_cfg; + + platform_disable_tracing_shm_ops(); + + ZERO_STRUCT(cfg); + ZERO_STRUCT(default_data_cfg); + + default_data_config_init(TEST_MAX_KEY_SIZE, &default_data_cfg); + setup_cfg_for_test(&cfg, &default_data_cfg); + + int rv = splinterdb_create(&cfg, &kvsb); + ASSERT_EQUAL(0, rv); + + // Setup multiple threads for concurrent memory allocation. + platform_thread new_thread; + thread_config thread_cfg[TEST_MAX_THREADS]; + thread_config *thread_cfgp = NULL; + int tctr = 0; + platform_status rc = STATUS_OK; + + ZERO_ARRAY(thread_cfg); + + platform_error_log("\nExecute %d concurrent threads peforming memory" + " allocation till we run out of memory in the shared" + " segment.\n'Insufficient memory' error messages" + " below are to be expected.\n", + TEST_MAX_THREADS); + + // Start-up n-threads, record their expected thread-IDs, which will be + // validated by the thread's execution function below. + for (tctr = 1, thread_cfgp = &thread_cfg[tctr]; + tctr < ARRAY_SIZE(thread_cfg); + tctr++, thread_cfgp++) + { + // These are independent of the new thread's creation. + thread_cfgp->splinter = kvsb; + thread_cfgp->exp_thread_idx = tctr; + + rc = platform_thread_create( + &new_thread, FALSE, exec_thread_memalloc, thread_cfgp, NULL); + ASSERT_TRUE(SUCCESS(rc)); + + thread_cfgp->this_thread_id = new_thread; + } + + // Complete execution of n-threads. Worker fn does the validation. + for (tctr = 1, thread_cfgp = &thread_cfg[tctr]; + tctr < ARRAY_SIZE(thread_cfg); + tctr++, thread_cfgp++) + { + rc = platform_thread_join(thread_cfgp->this_thread_id); + ASSERT_TRUE(SUCCESS(rc)); + } + + // Now run thru memory fragments allocated by each thread and verify that + // the identity recorded is kosher. If the same memory fragment was allocated + // to multiple threads, we should catch that error here. + for (tctr = 1, thread_cfgp = &thread_cfg[tctr]; + tctr < ARRAY_SIZE(thread_cfg); + tctr++, thread_cfgp++) + { + shm_memfrag *this_frag = thread_cfgp->start; + while (this_frag) { + ASSERT_EQUAL(tctr, + this_frag->owner, + "Owner=%lu of memory frag=%p is not expected owner=%lu\n", + this_frag->owner, + this_frag, + tctr); + this_frag = this_frag->next; + } + } + + rv = splinterdb_close(&kvsb); + ASSERT_EQUAL(0, rv); + + platform_enable_tracing_shm_ops(); +} + +static void +setup_cfg_for_test(splinterdb_config *out_cfg, data_config *default_data_cfg) +{ + *out_cfg = (splinterdb_config){.filename = TEST_DB_NAME, + .cache_size = 512 * Mega, + .disk_size = 2 * Giga, + .use_shmem = TRUE, + .data_cfg = default_data_cfg}; +} + +/* + * exec_thread_memalloc() - Worker fn for each thread to do concurrent memory + * allocation from the shared segment. + */ +static void +exec_thread_memalloc(void *arg) +{ + thread_config *thread_cfg = (thread_config *)arg; + splinterdb *kvs = thread_cfg->splinter; + + splinterdb_register_thread(kvs); + + // Allocate a new memory fragment and connect head to output variable for + // thread + shm_memfrag **fragpp = &thread_cfg->start; + shm_memfrag *new_frag = NULL; + + uint64 nallocs = 0; + threadid this_thread_idx = thread_cfg->exp_thread_idx; + + // Keep allocating fragments till we run out of memory. + // Build a linked list of memory fragments for this thread. + platform_memfrag memfrag_new_frag; + while ((new_frag = TYPED_ZALLOC(platform_get_heap_id(), new_frag)) != NULL) { + *fragpp = new_frag; + new_frag->owner = this_thread_idx; + fragpp = &new_frag->next; + nallocs++; + } + splinterdb_deregister_thread(kvs); + + platform_default_log( + "Thread-ID=%lu allocated %lu memory fragments of %lu bytes each.\n", + this_thread_idx, + nallocs, + sizeof(*new_frag)); +} diff --git a/tests/unit/splinter_shmem_test.c b/tests/unit/splinter_shmem_test.c index eab78903..e6f35604 100644 --- a/tests/unit/splinter_shmem_test.c +++ b/tests/unit/splinter_shmem_test.c @@ -6,6 +6,8 @@ * splinter_shmem_test.c -- * * Exercises the interfaces in SplinterDB shared memory allocation module. + * Also includes tests for memory management of fingerprint object, an + * area that was very troubling during development. * ----------------------------------------------------------------------------- */ #include "splinterdb/public_platform.h" @@ -15,37 +17,25 @@ #include "shmem.h" #include "splinterdb/splinterdb.h" #include "splinterdb/default_data_config.h" +#include "util.h" #define TEST_MAX_KEY_SIZE 42 // Just something to get going ... -// Test these many threads concurrently performing memory allocation. -#define TEST_MAX_THREADS 8 +// Size of an on-stack buffer used for testing +#define WB_ONSTACK_BUFSIZE 30 -/* - * To test heavily concurrent memory allocation from the shared memory, each - * thread will allocate a small fragment described by this structure. We then - * validate that the fragments are not clobbered by concurrent allocations. - */ -typedef struct shm_memfrag { - threadid owner; - struct shm_memfrag *next; -} shm_memfrag; - -// Configuration for each worker thread +// Thread Configuration: Only used as a struct for memory allocation. typedef struct { splinterdb *splinter; platform_thread this_thread_id; // OS-generated thread ID threadid exp_thread_idx; // Splinter-generated expected thread index - shm_memfrag *start; // Start of chain of allocated memfrags + void *start; // Start of chain of allocated memfrags } thread_config; // Function prototypes static void setup_cfg_for_test(splinterdb_config *out_cfg, data_config *default_data_cfg); -static void -exec_thread_memalloc(void *arg); - /* * Global data declaration macro: */ @@ -115,14 +105,16 @@ CTEST2(splinter_shmem, test_aligned_allocations) // Self-documenting assertion ... to future-proof this area. ASSERT_EQUAL(keybuf_size, PLATFORM_CACHELINE_SIZE); - void *next_free = platform_shm_next_free_addr(data->hid); - uint8 *keybuf = TYPED_MANUAL_MALLOC(data->hid, keybuf, keybuf_size); + void *next_free = platform_shm_next_free_addr(data->hid); + platform_memfrag memfrag_keybuf; + uint8 *keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, keybuf_size); // Validate returned memory-ptrs, knowing that no pad bytes were needed. ASSERT_TRUE((void *)keybuf == next_free); - next_free = platform_shm_next_free_addr(data->hid); - uint8 *msgbuf = TYPED_MANUAL_MALLOC(data->hid, msgbuf, msgbuf_size); + next_free = platform_shm_next_free_addr(data->hid); + platform_memfrag memfrag_msgbuf; + uint8 *msgbuf = TYPED_ARRAY_MALLOC(data->hid, msgbuf, msgbuf_size); ASSERT_TRUE((void *)msgbuf == next_free); // Sum of requested alloc-sizes == total # of used-bytes @@ -144,9 +136,10 @@ CTEST2(splinter_shmem, test_aligned_allocations) */ CTEST2(splinter_shmem, test_unaligned_allocations) { - void *next_free = platform_shm_next_free_addr(data->hid); - int keybuf_size = 42; - uint8 *keybuf = TYPED_MANUAL_MALLOC(data->hid, keybuf, keybuf_size); + void *next_free = platform_shm_next_free_addr(data->hid); + int keybuf_size = 42; + platform_memfrag memfrag_keybuf; + uint8 *keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, keybuf_size); int keybuf_pad = platform_align_bytes_reqd(PLATFORM_CACHELINE_SIZE, keybuf_size); @@ -164,7 +157,8 @@ CTEST2(splinter_shmem, test_unaligned_allocations) int msgbuf_size = 100; int msgbuf_pad = platform_align_bytes_reqd(PLATFORM_CACHELINE_SIZE, msgbuf_size); - uint8 *msgbuf = TYPED_MANUAL_MALLOC(data->hid, msgbuf, msgbuf_size); + platform_memfrag memfrag_msgbuf; + uint8 *msgbuf = TYPED_ARRAY_MALLOC(data->hid, msgbuf, msgbuf_size); // Next allocation will abut prev-allocation + pad-bytes ASSERT_TRUE((void *)msgbuf == (void *)keybuf + keybuf_size + keybuf_pad); @@ -185,76 +179,6 @@ CTEST2(splinter_shmem, test_unaligned_allocations) exp_free); } -/* - * --------------------------------------------------------------------------- - * Test allocation requests that result in an OOM from shared segment. - * Verify limits of memory allocation and handling of free/used bytes. - * These stats are maintained w/o full spinlocks, so will be approximate - * in concurrent scenarios. But for single-threaded allocations, these stats - * should be accurate even when shmem-OOMs occur. - * --------------------------------------------------------------------------- - */ -CTEST2(splinter_shmem, test_allocations_causing_OOMs) -{ - int keybuf_size = 64; - - // Self-documenting assertion ... to future-proof this area. - ASSERT_EQUAL(keybuf_size, PLATFORM_CACHELINE_SIZE); - - void *next_free = platform_shm_next_free_addr(data->hid); - uint8 *keybuf = TYPED_MANUAL_MALLOC(data->hid, keybuf, keybuf_size); - - // Validate returned memory-ptr, knowing that no pad bytes were needed. - ASSERT_TRUE((void *)keybuf == next_free); - - next_free = platform_shm_next_free_addr(data->hid); - - size_t space_left = - (data->shmem_capacity - (keybuf_size + platform_shm_ctrlblock_size())); - - ASSERT_EQUAL(space_left, platform_shmbytes_free(data->hid)); - - platform_error_log("\nNOTE: Test case intentionally triggers out-of-space" - " errors in shared segment. 'Insufficient memory'" - " error messages below are to be expected.\n"); - - // Note that although we have asked for 1 more byte than free space available - // the allocation interfaces round-up the # bytes for alignment. So the - // requested # of bytes will be a bit larger than free space in the error - // message you will see below. - keybuf_size = (space_left + 1); - uint8 *keybuf_oom = TYPED_MANUAL_MALLOC(data->hid, keybuf_oom, keybuf_size); - ASSERT_TRUE(keybuf_oom == NULL); - - // Free space counter is not touched if allocation fails. - ASSERT_EQUAL(space_left, platform_shmbytes_free(data->hid)); - - // As every memory request is rounded-up for alignment, the space left - // counter should always be an integral multiple of this constant. - ASSERT_EQUAL(0, (space_left % PLATFORM_CACHELINE_SIZE)); - - // If we request exactly what's available, it should succeed. - keybuf_size = space_left; - uint8 *keybuf_no_oom = - TYPED_MANUAL_MALLOC(data->hid, keybuf_no_oom, keybuf_size); - ASSERT_TRUE(keybuf_no_oom != NULL); - CTEST_LOG_INFO("Successfully allocated all remaining %lu bytes " - "from shared segment.\n", - space_left); - - // We should be out of space by now. - ASSERT_EQUAL(0, platform_shmbytes_free(data->hid)); - - // This should fail. - keybuf_size = 1; - keybuf_oom = TYPED_MANUAL_MALLOC(data->hid, keybuf_oom, keybuf_size); - ASSERT_TRUE(keybuf_oom == NULL); - - // Free allocated memory before exiting. - platform_free(data->hid, keybuf); - platform_free(data->hid, keybuf_no_oom); -} - /* * --------------------------------------------------------------------------- * Test allocation interface using platform_get_heap_id() accessor, which @@ -269,144 +193,100 @@ CTEST2(splinter_shmem, test_allocations_using_get_heap_id) { int keybuf_size = 64; - void *next_free = platform_shm_next_free_addr(data->hid); - uint8 *keybuf = - TYPED_MANUAL_MALLOC(platform_get_heap_id(), keybuf, keybuf_size); + void *next_free = platform_shm_next_free_addr(data->hid); + platform_memfrag memfrag_keybuf; + uint8 *keybuf = + TYPED_ARRAY_MALLOC(platform_get_heap_id(), keybuf, keybuf_size); // Validate returned memory-ptrs, knowing that no pad bytes were needed. ASSERT_TRUE((void *)keybuf == next_free); + + platform_memfrag *mf = &memfrag_keybuf; + platform_free(mf); } /* - * --------------------------------------------------------------------------- - * Currently 'free' is a no-op; no space is released. Do minimal testing of - * this feature, to ensure that at least the code flow is exectuing correctly. - * --------------------------------------------------------------------------- + * Basic test of 'free' where a freed-fragment goes to a free-list. Verify that + * the freed-fragment is found in the expected free-list, by-size. */ -CTEST2(splinter_shmem, test_free) +CTEST2(splinter_shmem, test_basic_free_list_size) { - int keybuf_size = 64; - uint8 *keybuf = TYPED_MANUAL_MALLOC(data->hid, keybuf, keybuf_size); + int keybuf_size = 64; + platform_memfrag memfrag_keybuf; + uint8 *keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, keybuf_size); - int msgbuf_size = (2 * keybuf_size); - uint8 *msgbuf = TYPED_MANUAL_MALLOC(data->hid, msgbuf, msgbuf_size); + // Fragment is still allocated, so should not be in any free-list(s). + ASSERT_EQUAL(0, platform_shm_find_freed_frag(data->hid, keybuf, NULL)); - size_t mem_used = platform_shmbytes_used(data->hid); + platform_free(&memfrag_keybuf); - void *next_free = platform_shm_next_free_addr(data->hid); + // A freed-fragment should go its appropriate free-list by-size. + ASSERT_EQUAL(keybuf_size, + platform_shm_find_freed_frag(data->hid, keybuf, NULL)); - platform_free(data->hid, keybuf); + // Variation testing out padding due to alignment + keybuf_size = 100; + keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, keybuf_size); - // Even though we freed some memory, the next addr-to-allocate is unchanged. - ASSERT_TRUE(next_free == platform_shm_next_free_addr(data->hid)); + // Memory allocation would have padded bytes up to cache line alignment. + size_t exp_memfrag_size = keybuf_size; + exp_memfrag_size += + platform_align_bytes_reqd(PLATFORM_CACHELINE_SIZE, keybuf_size); + ASSERT_EQUAL(exp_memfrag_size, memfrag_size(&memfrag_keybuf)); - // Space used remains unchanged, as free didn't quite return any memory - ASSERT_EQUAL(mem_used, platform_shmbytes_used(data->hid)); + platform_free(&memfrag_keybuf); + ASSERT_EQUAL(exp_memfrag_size, + platform_shm_find_freed_frag(data->hid, keybuf, NULL)); } /* * --------------------------------------------------------------------------- - * test_concurrent_allocs_by_n_threads() - Verify concurrency control - * implemented during shared memory allocation. - * - * Exercise concurrent memory allocations from the shared memory of small - * memory fragments. Each thread will record its ownership on the fragment - * allocated. After all memory is exhausted, we cross-check the chain of - * fragments allocated by each thread to verify that fragment still shows up - * as owned by the allocating thread. - * - * In the rudimentary version of allocation from shared memory, we did not have - * any concurrency control for allocations. So, it's likely that we may have - * been clobbering allocated memory. - * - * This test case does a basic verification of the fixes implemented to avoid - * such races during concurrent memory allocation. - * - * NOTE: This test case will exit immediately upon finding the first fragment - * whose ownership is flawed. That may still leave many other fragments waiting - * to be discovered with flawed ownership. + * Currently 'free' of small fragments is implemented by returning the freed + * fragment to a free-list, by size of the fragment. Currently, we are only + * tracking free-lists of specific sizes. Verify that after a fragment is + * freed that the free / used counts book-keeping is done right. We should be + * re-allocate the same freed fragment subsequently, as long as the size is + * sufficient. * --------------------------------------------------------------------------- */ -CTEST2(splinter_shmem, test_concurrent_allocs_by_n_threads) +CTEST2(splinter_shmem, test_free) { - splinterdb *kvsb; - splinterdb_config cfg; - data_config default_data_cfg; + int keybuf_size = 64; + platform_memfrag memfrag_keybuf; + uint8 *keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, keybuf_size); - platform_disable_tracing_shm_ops(); + int msgbuf_size = (2 * keybuf_size); + platform_memfrag memfrag_msgbuf; + uint8 *msgbuf = TYPED_ARRAY_MALLOC(data->hid, msgbuf, msgbuf_size); - ZERO_STRUCT(cfg); - ZERO_STRUCT(default_data_cfg); + size_t mem_used = platform_shmbytes_used(data->hid); - default_data_config_init(TEST_MAX_KEY_SIZE, &default_data_cfg); - setup_cfg_for_test(&cfg, &default_data_cfg); + void *next_free = platform_shm_next_free_addr(data->hid); - int rv = splinterdb_create(&cfg, &kvsb); - ASSERT_EQUAL(0, rv); - - // Setup multiple threads for concurrent memory allocation. - platform_thread new_thread; - thread_config thread_cfg[TEST_MAX_THREADS]; - thread_config *thread_cfgp = NULL; - int tctr = 0; - platform_status rc = STATUS_OK; - - ZERO_ARRAY(thread_cfg); - - platform_error_log("\nExecute %d concurrent threads peforming memory" - " allocation till we run out of memory in the shared" - " segment.\n'Insufficient memory' error messages" - " below are to be expected.\n", - TEST_MAX_THREADS); - - // Start-up n-threads, record their expected thread-IDs, which will be - // validated by the thread's execution function below. - for (tctr = 1, thread_cfgp = &thread_cfg[tctr]; - tctr < ARRAY_SIZE(thread_cfg); - tctr++, thread_cfgp++) - { - // These are independent of the new thread's creation. - thread_cfgp->splinter = kvsb; - thread_cfgp->exp_thread_idx = tctr; - - rc = platform_thread_create( - &new_thread, FALSE, exec_thread_memalloc, thread_cfgp, NULL); - ASSERT_TRUE(SUCCESS(rc)); - - thread_cfgp->this_thread_id = new_thread; - } - - // Complete execution of n-threads. Worker fn does the validation. - for (tctr = 1, thread_cfgp = &thread_cfg[tctr]; - tctr < ARRAY_SIZE(thread_cfg); - tctr++, thread_cfgp++) - { - rc = platform_thread_join(thread_cfgp->this_thread_id); - ASSERT_TRUE(SUCCESS(rc)); - } - - // Now run thru memory fragments allocated by each thread and verify that - // the identity recorded is kosher. If the same memory fragment was allocated - // to multiple threads, we should catch that error here. - for (tctr = 1, thread_cfgp = &thread_cfg[tctr]; - tctr < ARRAY_SIZE(thread_cfg); - tctr++, thread_cfgp++) - { - shm_memfrag *this_frag = thread_cfgp->start; - while (this_frag) { - ASSERT_EQUAL(tctr, - this_frag->owner, - "Owner=%lu of memory frag=%p is not expected owner=%lu\n", - this_frag->owner, - this_frag, - tctr); - this_frag = this_frag->next; - } - } - - splinterdb_close(&kvsb); + platform_memfrag *mf = &memfrag_keybuf; + platform_free(mf); - platform_enable_tracing_shm_ops(); + // Even though we freed some memory, the next addr-to-allocate is unchanged. + ASSERT_TRUE(next_free == platform_shm_next_free_addr(data->hid)); + + // Space used should go down as a fragment has been freed. + mem_used -= keybuf_size; + ASSERT_EQUAL(mem_used, platform_shmbytes_used(data->hid)); + + // The freed fragment should be re-allocated, upon re-request. + // Note, that there is a small discrepancy creeping in here. The caller may + // have got a larger fragment returned, but its size is not immediately known + // to the caller. Caller will end up free'ing a fragment specifying the size + // as its requested size. Shmem book-keeping will return this free fragment + // to a free-list for smaller sized fragments. (Minor issue.) + size_t smaller_size = 32; // will get rounded up to cache-linesize, 64 bytes + uint8 *smaller_keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, smaller_size); + ASSERT_TRUE(keybuf == smaller_keybuf); + + // Even though we only asked for a smaller fragment, a larger free-fragemnt + // was allocated. Check the book-keeping. + mem_used += keybuf_size; + ASSERT_EQUAL(mem_used, platform_shmbytes_used(data->hid)); } /* @@ -417,30 +297,33 @@ CTEST2(splinter_shmem, test_concurrent_allocs_by_n_threads) * unchanged. * --------------------------------------------------------------------------- */ -CTEST2(splinter_shmem, test_realloc_of_large_fragment) +CTEST2(splinter_shmem, test_alloc_free_and_reuse_of_large_fragment) { void *next_free = platform_shm_next_free_addr(data->hid); // Large fragments are tracked if their size >= this size. - size_t size = (1 * MiB); - uint8 *keybuf = TYPED_MANUAL_MALLOC(data->hid, keybuf, size); + size_t size = (1 * MiB); + platform_memfrag memfrag_keybuf; + uint8 *keybuf = TYPED_ARRAY_MALLOC(data->hid, keybuf, size); // Validate that a new large fragment will create a new allocation. ASSERT_TRUE((void *)keybuf == next_free); // Re-establish next-free-ptr after this large allocation. We will use it // below to assert that this location will not change when we re-use this - // large fragment for reallocation after it's been freed. + // large fragment for re-allocation after it's been freed. next_free = platform_shm_next_free_addr(data->hid); - // Save this off, as free below will NULL out handle. + // Save this off ... uint8 *keybuf_old = keybuf; - // If you free this fragment and reallocate exactly the same size, + // If you free this fragment and re-allocate exactly the same size, // it should recycle the freed fragment. - platform_free(data->hid, keybuf); + platform_memfrag *mf = &memfrag_keybuf; + platform_free(mf); - uint8 *keybuf_new = TYPED_MANUAL_MALLOC(data->hid, keybuf_new, size); + platform_memfrag memfrag_keybuf_new; + uint8 *keybuf_new = TYPED_ARRAY_MALLOC(data->hid, keybuf_new, size); ASSERT_TRUE((keybuf_old == keybuf_new), "keybuf_old=%p, keybuf_new=%p\n", keybuf_old, @@ -449,39 +332,58 @@ CTEST2(splinter_shmem, test_realloc_of_large_fragment) // We have re-used freed fragment, so the next-free-ptr should be unchanged. ASSERT_TRUE(next_free == platform_shm_next_free_addr(data->hid)); - platform_free(data->hid, keybuf_new); + platform_free(&memfrag_keybuf_new); } /* * --------------------------------------------------------------------------- - * Test that free followed by a request of the same size will reallocate the - * recently-freed fragment, avoiding any existing in-use fragments of the same - * size. + * Test that free followed by a request of the same size (of a large fragment) + * will re-allocate the recently-freed large fragment, avoiding any existing + * in-use large fragments of the same size. * --------------------------------------------------------------------------- */ -CTEST2(splinter_shmem, test_free_realloc_around_inuse_fragments) +CTEST2(splinter_shmem, test_free_reuse_around_inuse_large_fragments) { void *next_free = platform_shm_next_free_addr(data->hid); - // Large fragments are tracked if their size >= this size. - size_t size = (1 * MiB); - uint8 *keybuf1_1MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf1_1MiB, size); - uint8 *keybuf2_1MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf2_1MiB, size); - uint8 *keybuf3_1MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf3_1MiB, size); + // Large fragments are tracked if their size >= (at least) this size. + size_t size = (1 * MiB); + platform_memfrag memfrag_keybuf1_1MiB; + uint8 *keybuf1_1MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf1_1MiB, size); + + // Throw-in allocation for some random struct, to ensure that these large + // fragments are not contiguous + platform_memfrag memfrag_filler_cfg1; + thread_config *filler_cfg1 = TYPED_MALLOC(data->hid, filler_cfg1); + + platform_memfrag memfrag_keybuf2_1MiB; + uint8 *keybuf2_1MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf2_1MiB, size); + + platform_memfrag memfrag_filler_cfg2; + thread_config *filler_cfg2 = TYPED_MALLOC(data->hid, filler_cfg2); + + platform_memfrag memfrag_keybuf3_1MiB; + uint8 *keybuf3_1MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf3_1MiB, size); + + platform_memfrag memfrag_filler_cfg3; + thread_config *filler_cfg3 = TYPED_MALLOC(data->hid, filler_cfg3); // Re-establish next-free-ptr after this large allocation. We will use it // below to assert that this location will not change when we re-use a - // large fragment for reallocation after it's been freed. + // large fragment for re-allocation after it's been freed. next_free = platform_shm_next_free_addr(data->hid); // Save off fragment handles as free will NULL out ptr. uint8 *old_keybuf2_1MiB = keybuf2_1MiB; - // Free the middle fragment that should get reallocated, below. - platform_free(data->hid, keybuf2_1MiB); + // Free the middle fragment. That fragment should get reused, below. + platform_memfrag memfrag = { + .hid = data->hid, .addr = keybuf2_1MiB, .size = size}; + platform_memfrag *mf = &memfrag; + platform_free(mf); // Re-request (new) fragments of the same size. - keybuf2_1MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf2_1MiB, size); + keybuf2_1MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf2_1MiB, size); ASSERT_TRUE((keybuf2_1MiB == old_keybuf2_1MiB), "Expected to satisfy new 1MiB request at %p" " with old 1MiB fragment ptr at %p\n", @@ -493,22 +395,30 @@ CTEST2(splinter_shmem, test_free_realloc_around_inuse_fragments) // As large-fragments allocated / freed are tracked in an array, verify // that we will find the 1st one upon a re-request after a free. uint8 *old_keybuf1_1MiB = keybuf1_1MiB; - platform_free(data->hid, keybuf1_1MiB); - platform_free(data->hid, keybuf2_1MiB); + mf = &memfrag_keybuf1_1MiB; + platform_free(mf); + + mf = &memfrag_keybuf2_1MiB; + platform_free(mf); // This re-request should re-allocate the 1st free fragment found. - keybuf2_1MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf2_1MiB, size); + keybuf2_1MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf2_1MiB, size); ASSERT_TRUE((keybuf2_1MiB == old_keybuf1_1MiB), "Expected to satisfy new 1MiB request at %p" " with old 1MiB fragment ptr at %p\n", keybuf2_1MiB, old_keybuf1_1MiB); - // We've already freed keybuf1_1MiB; can't free a NULL ptr again. - // platform_free(data->hid, keybuf1_1MiB); + mf = &memfrag_keybuf2_1MiB; + platform_free(mf); + + mf = &memfrag_keybuf3_1MiB; + platform_free(mf); - platform_free(data->hid, keybuf2_1MiB); - platform_free(data->hid, keybuf3_1MiB); + // Memory fragments of typed objects can be freed directly. + platform_free(&memfrag_filler_cfg1); + platform_free(&memfrag_filler_cfg2); + platform_free(&memfrag_filler_cfg3); } /* @@ -526,23 +436,26 @@ CTEST2(splinter_shmem, test_free_realloc_around_inuse_fragments) * and then satisfy the next request with the free 5 MiB fragment. * --------------------------------------------------------------------------- */ -CTEST2(splinter_shmem, test_realloc_of_free_fragments_uses_first_fit) +CTEST2(splinter_shmem, test_reuse_of_free_fragments_uses_first_fit) { void *next_free = platform_shm_next_free_addr(data->hid); // Large fragments are tracked if their size >= this size. - size_t size = (1 * MiB); - uint8 *keybuf_1MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf_1MiB, size); + size_t size = (1 * MiB); + platform_memfrag memfrag_keybuf_1MiB; + uint8 *keybuf_1MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf_1MiB, size); - size = (5 * MiB); - uint8 *keybuf_5MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf_5MiB, size); + size = (5 * MiB); + platform_memfrag memfrag_keybuf_5MiB; + uint8 *keybuf_5MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf_5MiB, size); - size = (2 * MiB); - uint8 *keybuf_2MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf_2MiB, size); + size = (2 * MiB); + platform_memfrag memfrag_keybuf_2MiB; + uint8 *keybuf_2MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf_2MiB, size); // Re-establish next-free-ptr after this large allocation. We will use it // below to assert that this location will not change when we re-use a - // large fragment for reallocation after it's been freed. + // large fragment for re-allocation after it's been freed. next_free = platform_shm_next_free_addr(data->hid); // Save off fragment handles as free will NULL out ptr. @@ -550,14 +463,21 @@ CTEST2(splinter_shmem, test_realloc_of_free_fragments_uses_first_fit) uint8 *old_keybuf_5MiB = keybuf_5MiB; uint8 *old_keybuf_2MiB = keybuf_2MiB; + platform_memfrag *mf = NULL; + // Order in which we free these fragments does not matter. - platform_free(data->hid, keybuf_1MiB); - platform_free(data->hid, keybuf_2MiB); - platform_free(data->hid, keybuf_5MiB); + mf = &memfrag_keybuf_1MiB; + platform_free(mf); + + mf = &memfrag_keybuf_2MiB; + platform_free(mf); + + mf = &memfrag_keybuf_5MiB; + platform_free(mf); // Re-request (new) fragments in diff size order. size = (2 * MiB); - keybuf_2MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf_2MiB, size); + keybuf_2MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf_2MiB, size); ASSERT_TRUE((keybuf_2MiB == old_keybuf_5MiB), "Expected to satisfy new 2MiB request at %p" " with old 5MiB fragment ptr at %p\n", @@ -569,17 +489,630 @@ CTEST2(splinter_shmem, test_realloc_of_free_fragments_uses_first_fit) ASSERT_TRUE(next_free == platform_shm_next_free_addr(data->hid)); size = (5 * MiB); - keybuf_5MiB = TYPED_MANUAL_MALLOC(data->hid, keybuf_5MiB, size); + keybuf_5MiB = TYPED_ARRAY_MALLOC(data->hid, keybuf_5MiB, size); // We allocated a new fragment at next-free-ptr ASSERT_TRUE(keybuf_5MiB != old_keybuf_1MiB); ASSERT_TRUE(keybuf_5MiB != old_keybuf_2MiB); ASSERT_TRUE(keybuf_5MiB == next_free); - platform_free(data->hid, keybuf_2MiB); - platform_free(data->hid, keybuf_5MiB); + mf = &memfrag_keybuf_2MiB; + platform_free(mf); + + mf = &memfrag_keybuf_5MiB; + platform_free(mf); +} + +/* + * --------------------------------------------------------------------------- + * Test case to verify that configuration checks that shared segment size + * is "big enough" to allocate memory for RC-allocator cache's lookup + * array. For very large devices, with insufficiently sized shared memory + * config, we will not be able to boot-up. + * --------------------------------------------------------------------------- + */ +CTEST2(splinter_shmem, test_large_dev_with_small_shmem_error_handling) +{ + splinterdb *kvsb; + splinterdb_config cfg; + data_config default_data_cfg; + + platform_disable_tracing_shm_ops(); + + ZERO_STRUCT(cfg); + ZERO_STRUCT(default_data_cfg); + + default_data_config_init(TEST_MAX_KEY_SIZE, &default_data_cfg); + setup_cfg_for_test(&cfg, &default_data_cfg); + + // This config should cause a failure while trying to allocate + // clockcache for very-large-device in small shared memory. + cfg.shmem_size = (1 * Giga); + cfg.disk_size = (10 * Tera); + + int rc = splinterdb_create(&cfg, &kvsb); + ASSERT_NOT_EQUAL(0, rc); + + platform_enable_tracing_shm_ops(); +} + +/* + * --------------------------------------------------------------------------- + * Basic test to verify that memory is correctly freed through realloc() + * interface. Here, memory for an oldptr is expanded thru + * platform_shm_realloc(). This is similar to C-realloc() API. Verify that + * memory fragments are correctly freed. And that used / free space accounting + * is done properly. + * + * Verify that for a proper sequence of alloc / realloc / free operations, the + * used / free space metrics are correctly restored to their initial state. + * --------------------------------------------------------------------------- + */ +CTEST2(splinter_shmem, test_small_frag_platform_realloc) +{ + size_t shmused_initial = platform_shmbytes_used(data->hid); + size_t shmfree_initial = platform_shmbytes_free(data->hid); + + // As memory allocated is rounded-up to PLATFORM_CACHELINE_SIZE, ask for + // memory just short of a cache line size. This way, when we double our + // request, for realloc, we will necessarily go thru realloc(), rather than + // re-using this padded memory fragment. + size_t oldsize = (PLATFORM_CACHELINE_SIZE - 2 * sizeof(void *)); + + platform_memfrag memfrag_oldptr; + + char *oldptr = TYPED_ARRAY_MALLOC(data->hid, oldptr, oldsize); + size_t mf_oldsize = memfrag_size(&memfrag_oldptr); + ASSERT_TRUE((oldsize < mf_oldsize), + "Requested oldsize=%lu is expected to be" + " < mf_oldsize=%lu bytes", + oldsize, + mf_oldsize); + + size_t old_shmfree = platform_shmbytes_free(data->hid); + size_t old_shmused = platform_shmbytes_used(data->hid); + + // Free-memory should have gone down by size of memfrag allocated + ASSERT_EQUAL(old_shmfree, (shmfree_initial - memfrag_size(&memfrag_oldptr))); + + // Used-memory should have gone up by size of memfrag allocated + ASSERT_EQUAL(old_shmused, (shmused_initial + memfrag_size(&memfrag_oldptr))); + + size_t newsize = (2 * oldsize); + + char *newptr = platform_realloc(&memfrag_oldptr, newsize); + ASSERT_TRUE(newptr != oldptr); + + // Expect that newsize was padded up for cacheline alignment + size_t mf_newsize = memfrag_size(&memfrag_oldptr); + + ASSERT_TRUE((mf_newsize > newsize), + "Memory fragment's new size=%lu bytes should be >" + "newsize=%lu bytes, oldsize=%lu", + mf_newsize, + newsize, + oldsize); + + // realloc() (is expected to) pad-up newsize to cache-line alignment + ASSERT_TRUE(platform_shm_next_free_cacheline_aligned(data->hid)); + + // Check free space accounting + size_t new_shmfree = platform_shmbytes_free(data->hid); + size_t exp_shmfree = (old_shmfree + mf_oldsize - mf_newsize); + ASSERT_TRUE((exp_shmfree == new_shmfree), + "Expected free space=%lu bytes != actual free space=%lu bytes" + ", diff=%lu bytes. ", + exp_shmfree, + new_shmfree, + diff_size_t(exp_shmfree, new_shmfree)); + + // Check used space accounting after realloc() + size_t new_shmused = platform_shmbytes_used(data->hid); + size_t exp_shmused = (old_shmused - mf_oldsize + mf_newsize); + ASSERT_TRUE((exp_shmused == new_shmused), + "Expected used space=%lu bytes != actual used space=%lu bytes" + ", diff=%lu bytes. ", + exp_shmused, + new_shmused, + diff_size_t(exp_shmused, new_shmused)); + + // We should be able to re-cycle the memory used by oldptr before realloc() + // for another memory fragment of the same size + platform_memfrag memfrag_nextptr; + char *nextptr = TYPED_ARRAY_MALLOC(data->hid, nextptr, oldsize); + ASSERT_TRUE(nextptr == oldptr); + + platform_memfrag memfrag_anotherptr; + char *anotherptr = TYPED_ARRAY_MALLOC(data->hid, anotherptr, (10 * oldsize)); + ASSERT_TRUE(anotherptr != oldptr); + ASSERT_TRUE(anotherptr != nextptr); + + platform_memfrag *mf = &memfrag_anotherptr; + platform_free(mf); + + mf = &memfrag_nextptr; + platform_free(mf); + + // Here's the trick in book-keeping. As oldptr was realloc()'ed, its size + // went up from what was tracked in its memfrag_oldptr. + // So, to correctly get free space accounting, and to not 'leak' memory, we + // need to re-establish the fragment's correct identity before freeing it. + platform_free_mem(data->hid, newptr, mf_newsize); + + // Confirm that free/used space metrics go back to initial values + new_shmused = platform_shmbytes_used(data->hid); + new_shmfree = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(shmused_initial, + new_shmused, + "shmused_initial=%lu != new_shmused=%lu, diff=%lu. ", + shmused_initial, + new_shmused, + diff_size_t(new_shmused, shmused_initial)); + + ASSERT_EQUAL(shmfree_initial, + new_shmfree, + "shmfree_initial=%lu != new_shmfree=%lu, diff=%lu. ", + shmfree_initial, + new_shmfree, + diff_size_t(shmfree_initial, new_shmfree)); +} + +/* + * --------------------------------------------------------------------------- + * Exercise realloc() of a small-fragment to a large-fragment. + * + * Verify that: + * - We round-up to cacheline alignment even for large fragment requests + * - For a proper sequence of alloc / realloc / free operations, the + * used / free space metrics are correctly restored to their initial state. + * --------------------------------------------------------------------------- + */ +CTEST2(splinter_shmem, test_small_frag_platform_realloc_to_large_frag) +{ + size_t shmused_initial = platform_shmbytes_used(data->hid); + size_t shmfree_initial = platform_shmbytes_free(data->hid); + + // Allocate a small fragment here + size_t oldsize = ((2 * PLATFORM_CACHELINE_SIZE) - 2 * sizeof(void *)); + platform_memfrag memfrag_oldptr; + char *oldptr = TYPED_ARRAY_MALLOC(data->hid, oldptr, oldsize); + + size_t mf_oldsize = memfrag_size(&memfrag_oldptr); + size_t old_shmfree = platform_shmbytes_free(data->hid); + size_t old_shmused = platform_shmbytes_used(data->hid); + + size_t old_memfrag_size = memfrag_size(&memfrag_oldptr); + // Free-memory should have gone down by size of memfrag allocated + ASSERT_EQUAL(old_shmfree, (shmfree_initial - old_memfrag_size)); + + // Used-memory should have gone up by size of memfrag allocated + ASSERT_EQUAL(old_shmused, (shmused_initial + old_memfrag_size)); + + // Request a very large fragment, just shy of the alignment size. + size_t newsize = (2 * SHM_LARGE_FRAG_SIZE) - 16; + size_t expected_newsisze = newsize; + expected_newsisze += + platform_align_bytes_reqd(PLATFORM_CACHELINE_SIZE, expected_newsisze); + + char *newptr = platform_realloc(&memfrag_oldptr, newsize); + ASSERT_TRUE(newptr != oldptr); + + // Expect realloc() to have aligned to cache-line size + size_t mf_newsize = memfrag_size(&memfrag_oldptr); + ASSERT_EQUAL(expected_newsisze, + mf_newsize, + "expected_newsisze=%lu, mf_newsize=%lu", + expected_newsisze, + mf_newsize); + + // Check free space accounting + size_t new_shmfree = platform_shmbytes_free(data->hid); + size_t exp_shmfree = (old_shmfree + mf_oldsize - mf_newsize); + ASSERT_TRUE((exp_shmfree == new_shmfree), + "Expected free space=%lu bytes != actual free space=%lu bytes" + ", diff=%lu bytes. ", + exp_shmfree, + new_shmfree, + diff_size_t(exp_shmfree, new_shmfree)); + + // Check used space accounting after realloc() allocated a new large fragment + size_t new_shmused = platform_shmbytes_used(data->hid); + size_t exp_shmused = (old_shmused - mf_oldsize + mf_newsize); + ASSERT_TRUE((exp_shmused == new_shmused), + "Expected used space=%lu bytes != actual used space=%lu bytes" + ", diff=%lu bytes. ", + exp_shmused, + new_shmused, + diff_size_t(exp_shmused, new_shmused)); + + platform_free_mem(data->hid, newptr, expected_newsisze); + + // When large fragments are 'freed', they are not really accounted in the + // used/free bytes metrics. This is because, these large-fragments are + // already 'used', waiting to be re-cycled to the new request. + // Confirm that free/used space metrics go back to expected values. + new_shmused = platform_shmbytes_used(data->hid); + new_shmfree = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(exp_shmfree, + new_shmfree, + "exp_shmfree=%lu != new_shmfree=%lu, diff=%lu. ", + exp_shmfree, + new_shmfree, + diff_size_t(exp_shmfree, new_shmfree)); + + ASSERT_EQUAL(exp_shmused, + new_shmused, + "exp_shmused=%lu != new_shmused=%lu, diff=%lu. ", + exp_shmused, + new_shmused, + diff_size_t(exp_shmused, shmused_initial)); +} + +/* + * --------------------------------------------------------------------------- + * Exercise realloc() of a large-fragment to another large-fragment. + * + * Verify that: + * - We round-up to cacheline alignment even for large fragment requests + * - For a proper sequence of alloc / realloc / free operations, the + * used / free space metrics are correctly restored to their initial state. + * --------------------------------------------------------------------------- + */ +CTEST2(splinter_shmem, test_large_frag_platform_realloc_to_large_frag) +{ + size_t shmused_initial = platform_shmbytes_used(data->hid); + size_t shmfree_initial = platform_shmbytes_free(data->hid); + + platform_memfrag memfrag_oldptr; + // Allocate a large fragment here + + size_t oldsize = (2 * SHM_LARGE_FRAG_SIZE); + char *oldptr = TYPED_ARRAY_MALLOC(data->hid, oldptr, oldsize); + + size_t mf_oldsize = memfrag_size(&memfrag_oldptr); + size_t old_shmfree = platform_shmbytes_free(data->hid); + size_t old_shmused = platform_shmbytes_used(data->hid); + + // Free-memory should have gone down by size of memfrag allocated + ASSERT_EQUAL(old_shmfree, (shmfree_initial - mf_oldsize)); + + // Used-memory should have gone up by size of memfrag allocated + ASSERT_EQUAL(old_shmused, (shmused_initial + mf_oldsize)); + + // Request a larger fragment. (Alignment issues covered earlier ...) + size_t newsize = (4 * SHM_LARGE_FRAG_SIZE) - 20; + size_t expected_newsisze = newsize; + expected_newsisze += + platform_align_bytes_reqd(PLATFORM_CACHELINE_SIZE, expected_newsisze); + + char *newptr = platform_realloc(&memfrag_oldptr, newsize); + ASSERT_TRUE(newptr != oldptr); + + // Expect realloc() to have aligned to cache-line size + size_t mf_newsize = memfrag_size(&memfrag_oldptr); + ASSERT_EQUAL(expected_newsisze, + mf_newsize, + "expected_newsisze=%lu, mf_newsize=%lu", + expected_newsisze, + mf_newsize); + + // Check free space accounting. Memory used by old large-fragment being + // freed is not accounted in shared memory's memory metrics + size_t new_shmfree = platform_shmbytes_free(data->hid); + size_t exp_shmfree = (old_shmfree - mf_newsize); + ASSERT_TRUE((exp_shmfree == new_shmfree), + "Expected free space=%lu bytes != actual free space=%lu bytes" + ", diff=%lu bytes. " + "oldsize=%lu, mf_oldsize=%lu, newsize=%lu, mf_newsize=%lu", + exp_shmfree, + new_shmfree, + diff_size_t(exp_shmfree, new_shmfree), + oldsize, + mf_oldsize, + newsize, + mf_newsize); + + // Check used space accounting after realloc() allocated a new large fragment + size_t new_shmused = platform_shmbytes_used(data->hid); + size_t exp_shmused = (old_shmused + mf_newsize); + ASSERT_TRUE((exp_shmused == new_shmused), + "Expected used space=%lu bytes != actual used space=%lu bytes" + ", diff=%lu bytes. ", + exp_shmused, + new_shmused, + diff_size_t(exp_shmused, new_shmused)); + + // You -must- specify the right size when free'ing even a large fragment. + // Otherwise, debug asserts will trip. + platform_free_mem(data->hid, newptr, mf_newsize); + return; + + // When large fragments are 'freed', they are not really accounted in the + // used/free bytes metrics. This is because, these large-fragments are + // already 'used', waiting to be re-cycled to the new request. + // Confirm that free/used space metrics go back to expected values. + new_shmused = platform_shmbytes_used(data->hid); + new_shmfree = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(exp_shmfree, + new_shmfree, + "exp_shmfree=%lu != new_shmfree=%lu, diff=%lu. ", + exp_shmfree, + new_shmfree, + diff_size_t(exp_shmfree, new_shmfree)); + + ASSERT_EQUAL(exp_shmused, + new_shmused, + "exp_shmused=%lu != new_shmused=%lu, diff=%lu. ", + exp_shmused, + new_shmused, + diff_size_t(exp_shmused, shmused_initial)); +} + +/* + * --------------------------------------------------------------------------- + * Writable buffer interface tests: Which exercise shared memory related APIs. + * --------------------------------------------------------------------------- + */ +/* + * Resizing of writable buffers goes through realloc(). Cross-check that memory + * metrics are correctly done through this code-flow. + */ +CTEST2(splinter_shmem, test_writable_buffer_resize_empty_buffer) +{ + size_t shmused_initial = platform_shmbytes_used(data->hid); + size_t shmfree_initial = platform_shmbytes_free(data->hid); + + writable_buffer wb_data; + writable_buffer *wb = &wb_data; + + writable_buffer_init(wb, data->hid); + uint64 new_length = 20; + writable_buffer_resize(wb, new_length); + + ASSERT_EQUAL(new_length, writable_buffer_length(wb)); + + // We should have done some memory allocation. + ASSERT_TRUE(wb->can_free); + ASSERT_NOT_NULL(writable_buffer_data(wb)); + writable_buffer_deinit(wb); + + // Confirm that free/used space metrics go back to initial values + size_t new_shmused = platform_shmbytes_used(data->hid); + size_t new_shmfree = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(shmused_initial, + new_shmused, + "shmused_initial=%lu != new_shmused=%lu, diff=%lu. ", + shmused_initial, + new_shmused, + (new_shmused - shmused_initial)); + + ASSERT_EQUAL(shmfree_initial, + new_shmfree, + "shmfree_initial=%lu != new_shmfree=%lu, diff=%lu. ", + shmfree_initial, + new_shmfree, + (shmfree_initial - new_shmfree)); +} + +/* + * --------------------------------------------------------------------------- + * Test resizing of writable buffers that initially had an on-stack buffer. + * Resizing goes through realloc(). Cross-check that memory metrics are + * correctly done through this code-flow. + */ +CTEST2(splinter_shmem, test_writable_buffer_resize_onstack_buffer) +{ + size_t shmused_initial = platform_shmbytes_used(data->hid); + size_t shmfree_initial = platform_shmbytes_free(data->hid); + + writable_buffer wb_data; + writable_buffer *wb = &wb_data; + + char buf[WB_ONSTACK_BUFSIZE]; + writable_buffer_init_with_buffer( + wb, data->hid, sizeof(buf), (void *)buf, WRITABLE_BUFFER_NULL_LENGTH); + + size_t new_length = (10 * sizeof(buf)); + writable_buffer_resize(wb, new_length); + + ASSERT_EQUAL(new_length, writable_buffer_length(wb)); + + // We should have done some memory allocation. + ASSERT_TRUE(wb->can_free); + + void *dataptr = writable_buffer_data(wb); + ASSERT_NOT_NULL(dataptr); + ASSERT_TRUE((void *)buf != dataptr); + + writable_buffer_deinit(wb); + + // Confirm that free/used space metrics go back to initial values + size_t new_shmused = platform_shmbytes_used(data->hid); + size_t new_shmfree = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(shmused_initial, + new_shmused, + "shmused_initial=%lu != new_shmused=%lu, diff=%lu. ", + shmused_initial, + new_shmused, + (new_shmused - shmused_initial)); + + ASSERT_EQUAL(shmfree_initial, + new_shmfree, + "shmfree_initial=%lu != new_shmfree=%lu, diff=%lu. ", + shmfree_initial, + new_shmfree, + (shmfree_initial - new_shmfree)); +} + +/* + * --------------------------------------------------------------------------- + * Test resizing of writable buffers that go through 'append' interface + * correctly manage the fragment's capacity as was initially allocated from + * shared memory. This is a test case for a small shmem-specific 'bug' in + * writable_buffer_ensure_space() -> platform_realloc(), whereby we weren't + * specifying the right 'oldsize' for a fragment being realloc()'ed. + */ +CTEST2(splinter_shmem, test_writable_buffer_resize_vs_capacity) +{ + size_t shmused_initial = platform_shmbytes_used(data->hid); + size_t shmfree_initial = platform_shmbytes_free(data->hid); + + writable_buffer wb_data; + writable_buffer *wb = &wb_data; + + writable_buffer_init(wb, data->hid); + const char *input_str = "Hello World!"; + writable_buffer_append(wb, strlen(input_str), (const void *)input_str); + + // Min fragment allocated is of one cache line + ASSERT_EQUAL(PLATFORM_CACHELINE_SIZE, writable_buffer_capacity(wb)); + + void *data_ptr = writable_buffer_data(wb); + + // If you append another short string that fits within buffer capacity, + // no reallocation should occur. + input_str = "Another Hello World!"; + writable_buffer_append(wb, strlen(input_str), (const void *)input_str); + + void *new_data_ptr = writable_buffer_data(wb); + ASSERT_TRUE(data_ptr == new_data_ptr); + + size_t old_buffer_capacity = writable_buffer_capacity(wb); + + // Now if you append a bigger chunk so that the writable buffer's capacity + // is exceeded, it will be realloc()'ed. + char filler[PLATFORM_CACHELINE_SIZE]; + memset(filler, 'X', sizeof(filler)); + writable_buffer_append(wb, sizeof(filler), (const void *)filler); + + // Should allocate a new memory fragment, so data ptr must change + new_data_ptr = writable_buffer_data(wb); + ASSERT_FALSE(data_ptr == new_data_ptr); + + size_t freed_frag_size_as_found = 0; + + // Old writable-buffer should have been freed to the free-list + // corresponding to its capacity. + ASSERT_EQUAL(old_buffer_capacity, + platform_shm_find_freed_frag( + data->hid, data_ptr, &freed_frag_size_as_found)); + + // The buffer should have been freed with its right capacity as 'size', + // but there was a latent bug that was tripping up this assertion. + ASSERT_EQUAL(old_buffer_capacity, + freed_frag_size_as_found, + "Expected free size=%lu, found free size of frag=%lu. ", + old_buffer_capacity, + freed_frag_size_as_found); + + writable_buffer_deinit(wb); + + // Confirm that free/used space metrics go back to initial values + size_t new_shmused = platform_shmbytes_used(data->hid); + size_t new_shmfree = platform_shmbytes_free(data->hid); + + ASSERT_EQUAL(shmused_initial, + new_shmused, + "shmused_initial=%lu != new_shmused=%lu, diff=%lu. ", + shmused_initial, + new_shmused, + (new_shmused - shmused_initial)); + + ASSERT_EQUAL(shmfree_initial, + new_shmfree, + "shmfree_initial=%lu != new_shmfree=%lu, diff=%lu. ", + shmfree_initial, + new_shmfree, + (shmfree_initial - new_shmfree)); +} + +/* + * --------------------------------------------------------------------------- + * Test cases exercising fingerprint object management. + * --------------------------------------------------------------------------- + */ +CTEST2(splinter_shmem, test_fingerprint_basic) +{ + size_t mem_free_prev = platform_shmbytes_free(data->hid); + + size_t nitems = (1 * KiB); + fp_hdr fp; + uint32 *fp_arr = fingerprint_init(&fp, data->hid, nitems); + ASSERT_TRUE(fp_arr == fingerprint_start(&fp)); + ASSERT_EQUAL(nitems, fingerprint_ntuples(&fp)); + ASSERT_FALSE(fingerprint_is_empty(&fp)); + + size_t mem_free_now = platform_shmbytes_free(data->hid); + ASSERT_TRUE(mem_free_now == (mem_free_prev - fingerprint_size(&fp))); + + fingerprint_deinit(data->hid, &fp); + ASSERT_TRUE(fingerprint_is_empty(&fp)); + mem_free_now = platform_shmbytes_free(data->hid); + ASSERT_TRUE(mem_free_now == mem_free_prev); +} + +/* Verify move operation */ +CTEST2(splinter_shmem, test_fingerprint_move) +{ + size_t nitems = (1 * KiB); + fp_hdr fp_src; + uint32 *fp_src_arr = fingerprint_init(&fp_src, data->hid, nitems); + + size_t src_size = fingerprint_size(&fp_src); + + fp_hdr fp_dst = {0}; + uint32 *fp_dst_arr = fingerprint_move(&fp_dst, &fp_src); + + // Fingerprint is now owned by destination object + ASSERT_TRUE(fp_dst_arr == fingerprint_start(&fp_dst)); + ASSERT_TRUE(fp_dst_arr == fp_src_arr); + ASSERT_EQUAL(nitems, fingerprint_ntuples(&fp_dst)); + + size_t dst_size = fingerprint_size(&fp_dst); + ASSERT_EQUAL(src_size, dst_size); + + // Source is empty + ASSERT_TRUE(fingerprint_is_empty(&fp_src)); + ASSERT_EQUAL(0, fingerprint_ntuples(&fp_src)); +} + +/* Verify sequence of alias, unalias, only then you can do a deinit on src */ +CTEST2(splinter_shmem, test_fingerprint_alias_unalias_deinit) +{ + size_t nitems = (1 * KiB); + fp_hdr fp_src; + uint32 *fp_src_arr = fingerprint_init(&fp_src, data->hid, nitems); + + size_t src_size = fingerprint_size(&fp_src); + + fp_hdr fp_dst = {0}; + uint32 *fp_dst_arr = fingerprint_alias(&fp_dst, &fp_src); + + // Fingerprint is now owned by destination object + ASSERT_TRUE(fp_dst_arr == fingerprint_start(&fp_dst)); + ASSERT_TRUE(fp_dst_arr == fp_src_arr); + ASSERT_EQUAL(nitems, fingerprint_ntuples(&fp_dst)); + + size_t dst_size = fingerprint_size(&fp_dst); + ASSERT_EQUAL(src_size, dst_size); + + // Source is still not empty + ASSERT_TRUE(!fingerprint_is_empty(&fp_src)); + ASSERT_EQUAL(nitems, fingerprint_ntuples(&fp_src)); + + // You have to unalias dst from src before you can release src's memory + fp_dst_arr = fingerprint_unalias(&fp_dst); + ASSERT_TRUE(fp_dst_arr == NULL); + + fingerprint_deinit(data->hid, &fp_src); } + static void setup_cfg_for_test(splinterdb_config *out_cfg, data_config *default_data_cfg) { @@ -589,40 +1122,3 @@ setup_cfg_for_test(splinterdb_config *out_cfg, data_config *default_data_cfg) .use_shmem = TRUE, .data_cfg = default_data_cfg}; } - -/* - * exec_thread_memalloc() - Worker fn for each thread to do concurrent memory - * allocation from the shared segment. - */ -static void -exec_thread_memalloc(void *arg) -{ - thread_config *thread_cfg = (thread_config *)arg; - splinterdb *kvs = thread_cfg->splinter; - - splinterdb_register_thread(kvs); - - // Allocate a new memory fragment and connect head to output variable for - // thread - shm_memfrag **fragpp = &thread_cfg->start; - shm_memfrag *new_frag = NULL; - - uint64 nallocs = 0; - threadid this_thread_idx = thread_cfg->exp_thread_idx; - - // Keep allocating fragments till we run out of memory. - // Build a linked list of memory fragments for this thread. - while ((new_frag = TYPED_ZALLOC(platform_get_heap_id(), new_frag)) != NULL) { - *fragpp = new_frag; - new_frag->owner = this_thread_idx; - fragpp = &new_frag->next; - nallocs++; - } - splinterdb_deregister_thread(kvs); - - platform_default_log( - "Thread-ID=%lu allocated %lu memory fragments of %lu bytes each.\n", - this_thread_idx, - nallocs, - sizeof(*new_frag)); -} diff --git a/tests/unit/splinter_test.c b/tests/unit/splinter_test.c index a410bc1b..d55a54e3 100644 --- a/tests/unit/splinter_test.c +++ b/tests/unit/splinter_test.c @@ -102,6 +102,12 @@ CTEST_DATA(splinter) task_system *tasks; test_message_generator gen; + // Sizes of memory fragments allocated for these structs. + size_t splinter_cfg_mf_size; + size_t io_mf_size; + size_t cache_cfg_mf_size; + size_t clock_cache_mf_size; + // Test execution related configuration test_exec_config test_exec_cfg; }; @@ -137,10 +143,15 @@ CTEST_SETUP(splinter) platform_assert_status_ok(rc); // Allocate memory for global config structures - data->splinter_cfg = TYPED_ARRAY_MALLOC(data->hid, data->splinter_cfg, - num_tables); + platform_memfrag memfrag_splinter_cfg; + data->splinter_cfg = TYPED_ARRAY_MALLOC_MF(&memfrag_splinter_cfg, data->hid, data->splinter_cfg, + num_tables); + data->splinter_cfg_mf_size = memfrag_size(&memfrag_splinter_cfg); - data->cache_cfg = TYPED_ARRAY_MALLOC(data->hid, data->cache_cfg, num_tables); + platform_memfrag memfrag_cache_cfg; + data->cache_cfg = TYPED_ARRAY_MALLOC_MF(&memfrag_cache_cfg, data->hid, data->cache_cfg, + num_tables); + data->cache_cfg_mf_size = memfrag_size(&memfrag_cache_cfg); ZERO_STRUCT(data->test_exec_cfg); @@ -178,8 +189,10 @@ CTEST_SETUP(splinter) } // Allocate and initialize the IO sub-system. - data->io = TYPED_MALLOC(data->hid, data->io); + platform_memfrag memfrag_io; + data->io = TYPED_MALLOC_MF(&memfrag_io, data->hid, data->io); ASSERT_TRUE((data->io != NULL)); + data->io_mf_size = memfrag_size(&memfrag_io); rc = io_handle_init(data->io, &data->io_cfg, data->hid); data->tasks = NULL; @@ -191,8 +204,10 @@ CTEST_SETUP(splinter) rc_allocator_init(&data->al, &data->al_cfg, (io_handle *)data->io, data->hid, platform_get_module_id()); - data->clock_cache = TYPED_ARRAY_MALLOC(data->hid, data->clock_cache, num_caches); + platform_memfrag memfrag_clock_cache; + data->clock_cache = TYPED_ARRAY_MALLOC_MF(&memfrag_clock_cache, data->hid, data->clock_cache, num_caches); ASSERT_TRUE((data->clock_cache != NULL)); + data->clock_cache_mf_size = memfrag_size(&memfrag_clock_cache); for (uint8 idx = 0; idx < num_caches; idx++) { rc = clockcache_init(&data->clock_cache[idx], @@ -215,7 +230,7 @@ CTEST_SETUP(splinter) CTEST_TEARDOWN(splinter) { clockcache_deinit(data->clock_cache); - platform_free(data->hid, data->clock_cache); + platform_free_mem(data->hid, data->clock_cache, data->clock_cache_mf_size); allocator *alp = (allocator *)&data->al; allocator_assert_noleaks(alp); @@ -224,14 +239,15 @@ CTEST_TEARDOWN(splinter) test_deinit_task_system(data->hid, &data->tasks); io_handle_deinit(data->io); - platform_free(data->hid, data->io); + platform_free_mem(data->hid, data->io, data->io_mf_size); if (data->cache_cfg) { - platform_free(data->hid, data->cache_cfg); + platform_free_mem(data->hid, data->cache_cfg, data->cache_cfg_mf_size); } if (data->splinter_cfg) { - platform_free(data->hid, data->splinter_cfg); + platform_free_mem( + data->hid, data->splinter_cfg, data->splinter_cfg_mf_size); } platform_heap_destroy(&data->hid); diff --git a/tests/unit/splinterdb_forked_child_test.c b/tests/unit/splinterdb_forked_child_test.c index e15752eb..50a5ad8a 100644 --- a/tests/unit/splinterdb_forked_child_test.c +++ b/tests/unit/splinterdb_forked_child_test.c @@ -148,23 +148,23 @@ CTEST2(splinterdb_forked_child, test_data_structures_handles) // all valid addresses allocated from the shared segment setup by the main // process. - ASSERT_TRUE(platform_valid_addr_in_heap( + ASSERT_TRUE(platform_isvalid_addr_in_heap( spl_heap_id, splinterdb_get_task_system_handle(spl_handle))); ASSERT_TRUE( - platform_valid_addr_in_heap(splinterdb_get_heap_id(spl_handle), - splinterdb_get_io_handle(spl_handle))); + platform_isvalid_addr_in_heap(splinterdb_get_heap_id(spl_handle), + splinterdb_get_io_handle(spl_handle))); - ASSERT_TRUE(platform_valid_addr_in_heap( + ASSERT_TRUE(platform_isvalid_addr_in_heap( spl_heap_id, splinterdb_get_allocator_handle(spl_handle))); - ASSERT_TRUE(platform_valid_addr_in_heap( + ASSERT_TRUE(platform_isvalid_addr_in_heap( spl_heap_id, splinterdb_get_cache_handle(spl_handle))); - ASSERT_TRUE(platform_valid_addr_in_heap( + ASSERT_TRUE(platform_isvalid_addr_in_heap( spl_heap_id, splinterdb_get_trunk_handle(spl_handle))); - ASSERT_TRUE(platform_valid_addr_in_heap( + ASSERT_TRUE(platform_isvalid_addr_in_heap( spl_heap_id, splinterdb_get_memtable_context_handle(spl_handle))); // Before registering w/Splinter, child process is still at tid==0. @@ -190,7 +190,8 @@ CTEST2(splinterdb_forked_child, test_data_structures_handles) // We would get assertions tripping from BTree iterator code here, // if the fix in platform_buffer_create_mmap() to use MAP_SHARED // was not in-place. - splinterdb_close(&spl_handle); + rc = splinterdb_close(&spl_handle); + ASSERT_EQUAL(0, rc); } else { // Child should not attempt to run the rest of the tests exit(0); @@ -291,7 +292,8 @@ CTEST2(splinterdb_forked_child, test_one_insert_then_close_bug) // We would get assertions tripping from BTree iterator code here, // if the fix in platform_buffer_create_mmap() to use MAP_SHARED // was not in-place. - splinterdb_close(&spl_handle); + rc = splinterdb_close(&spl_handle); + ASSERT_EQUAL(0, rc); } else { // child should not attempt to run the rest of the tests exit(0); @@ -414,7 +416,8 @@ CTEST2(splinterdb_forked_child, " Resuming parent ...\n", platform_getpid(), platform_get_tid()); - splinterdb_close(&spl_handle); + rc = splinterdb_close(&spl_handle); + ASSERT_EQUAL(0, rc); } else { // child should not attempt to run the rest of the tests exit(0); @@ -452,6 +455,9 @@ CTEST2(splinterdb_forked_child, test_multiple_forked_process_doing_IOs) // hammering at it with large #s of inserts. splinterdb_cfg.cache_size = (1 * Giga); + // Bump up disk size based on # of concurrent child processes + splinterdb_cfg.disk_size *= data->num_forked_procs; + splinterdb_cfg.filename = "splinterdb_forked_child_test_db"; splinterdb *spl_handle; // To a running SplinterDB instance @@ -542,7 +548,8 @@ CTEST2(splinterdb_forked_child, test_multiple_forked_process_doing_IOs) platform_getpid(), platform_get_tid()); - splinterdb_close(&spl_handle); + rc = splinterdb_close(&spl_handle); + ASSERT_EQUAL(0, rc); } else { // child should not attempt to run the rest of the tests exit(0); @@ -628,6 +635,10 @@ do_many_inserts(splinterdb *kvsb, uint64 num_inserts) } } uint64 elapsed_ns = platform_timestamp_elapsed(start_time); + uint64 elapsed_s = NSEC_TO_SEC(elapsed_ns); + if (elapsed_s == 0) { + elapsed_s = 1; + } platform_default_log("%s()::%d:Thread-%lu Inserted %lu million KV-pairs in " "%lu s, %lu rows/s\n", @@ -635,8 +646,8 @@ do_many_inserts(splinterdb *kvsb, uint64 num_inserts) __LINE__, thread_idx, ictr, // outer-loop ends at #-of-Millions inserted - NSEC_TO_SEC(elapsed_ns), - (num_inserts / NSEC_TO_SEC(elapsed_ns))); + elapsed_s, + (num_inserts / elapsed_s)); } static void diff --git a/tests/unit/splinterdb_quick_test.c b/tests/unit/splinterdb_quick_test.c index 0071ee43..1ab32738 100644 --- a/tests/unit/splinterdb_quick_test.c +++ b/tests/unit/splinterdb_quick_test.c @@ -112,6 +112,10 @@ CTEST_SETUP(splinterdb_quick) data->cfg.use_shmem = config_parse_use_shmem(Ctest_argc, (char **)Ctest_argv); + // Always turn ON stats-gathering so we cover common stats-related + // code-flows in this quick-test. + data->cfg.use_stats = TRUE; + int rc = splinterdb_create(&data->cfg, &data->kvsb); ASSERT_EQUAL(0, rc); ASSERT_TRUE(TEST_MAX_VALUE_SIZE @@ -122,7 +126,8 @@ CTEST_SETUP(splinterdb_quick) CTEST_TEARDOWN(splinterdb_quick) { if (data->kvsb) { - splinterdb_close(&data->kvsb); + int rc = splinterdb_close(&data->kvsb); + ASSERT_EQUAL(0, rc); } } @@ -273,7 +278,8 @@ CTEST2(splinterdb_quick, test_value_size_gt_max_value_size) { size_t too_large_value_len = MAX_INLINE_MESSAGE_SIZE(LAIO_DEFAULT_PAGE_SIZE) + 1; - char *too_large_value_data; + char *too_large_value_data; + platform_memfrag memfrag_too_large_value_data; too_large_value_data = TYPED_ARRAY_MALLOC( data->cfg.heap_id, too_large_value_data, too_large_value_len); memset(too_large_value_data, 'z', too_large_value_len); @@ -284,7 +290,7 @@ CTEST2(splinterdb_quick, test_value_size_gt_max_value_size) data->kvsb, slice_create(sizeof("foo"), "foo"), too_large_value); ASSERT_EQUAL(EINVAL, rc); - platform_free(data->cfg.heap_id, too_large_value_data); + platform_free(&memfrag_too_large_value_data); } /* diff --git a/tests/unit/splinterdb_stress_test.c b/tests/unit/splinterdb_stress_test.c index 348dc7bf..46d4c304 100644 --- a/tests/unit/splinterdb_stress_test.c +++ b/tests/unit/splinterdb_stress_test.c @@ -219,60 +219,6 @@ CTEST2(splinterdb_stress, test_iterator_over_many_kvs) splinterdb_iterator_deinit(it); } -/* - * Test case that inserts large # of KV-pairs, and goes into a code path - * reported by issue# 458, tripping a debug assert. This test case also - * triggered the failure(s) reported by issue # 545. - * FIXME: This test still runs into an assertion "filter->addr != 0" - * from trunk_inc_filter(), which is being triaged separately. - */ -CTEST2_SKIP(splinterdb_stress, test_issue_458_mini_destroy_unused_debug_assert) -{ - char key_data[TEST_KEY_SIZE]; - char val_data[TEST_VALUE_SIZE]; - - uint64 test_start_time = platform_get_timestamp(); - - for (uint64 ictr = 0, jctr = 0; ictr < 100; ictr++) { - - uint64 start_time = platform_get_timestamp(); - - for (jctr = 0; jctr < MILLION; jctr++) { - - uint64 id = (ictr * MILLION) + jctr; - snprintf(key_data, sizeof(key_data), "%lu", id); - snprintf(val_data, sizeof(val_data), "Row-%lu", id); - - slice key = slice_create(strlen(key_data), key_data); - slice val = slice_create(strlen(val_data), val_data); - - int rc = splinterdb_insert(data->kvsb, key, val); - ASSERT_EQUAL(0, rc); - } - uint64 elapsed_ns = platform_timestamp_elapsed(start_time); - uint64 test_elapsed_ns = platform_timestamp_elapsed(test_start_time); - - uint64 elapsed_s = NSEC_TO_SEC(elapsed_ns); - if (elapsed_s == 0) { - elapsed_s = 1; - } - uint64 test_elapsed_s = NSEC_TO_SEC(test_elapsed_ns); - if (test_elapsed_s == 0) { - test_elapsed_s = 1; - } - - CTEST_LOG_INFO( - "\n" // PLATFORM_CR - "Inserted %lu million KV-pairs" - ", this batch: %lu s, %lu rows/s, cumulative: %lu s, %lu rows/s ...", - (ictr + 1), - elapsed_s, - (jctr / elapsed_s), - test_elapsed_s, - (((ictr + 1) * jctr) / test_elapsed_s)); - } -} - // Per-thread workload static void * exec_worker_thread(void *w) diff --git a/tests/unit/task_system_test.c b/tests/unit/task_system_test.c index 82771c67..3be0185a 100644 --- a/tests/unit/task_system_test.c +++ b/tests/unit/task_system_test.c @@ -30,6 +30,7 @@ #include "config.h" // Reqd for definition of master_config{} #include "trunk.h" // Needed for trunk_get_scratch_size() #include "task.h" +#include "shmem.h" #include "splinterdb/splinterdb.h" #include "splinterdb/default_data_config.h" @@ -91,6 +92,7 @@ CTEST_DATA(task_system) task_system *tasks; uint64 active_threads_bitmask; + bool use_shmem; // Test is being run with --use-shmem }; /* @@ -111,8 +113,10 @@ CTEST_SETUP(task_system) platform_get_module_id(), heap_capacity, use_shmem, &data->hid); platform_assert_status_ok(rc); + // Allocate and initialize the IO sub-system. - data->ioh = TYPED_MALLOC(data->hid, data->ioh); + platform_memfrag memfrag_ioh; + data->ioh = TYPED_MALLOC_MF(&memfrag_ioh, data->hid, data->ioh); ASSERT_TRUE((data->ioh != NULL)); // Do minimal IO config setup, using default IO values. @@ -459,6 +463,103 @@ CTEST2(task_system, test_use_all_but_one_threads_for_bg_threads) set_log_streams_for_tests(MSG_LEVEL_INFO); } +/* + * ------------------------------------------------------------------------ + * Test creation of one new thread using external Splinter interfaces and + * ask for a huge amount of scratch space, which is much bigger than what + * can be allocated from shared-memory. This will cause thread creation to + * fail. We want to exercise backout code in task_create_thread_with_hooks() + * to ensure that allocated memory is freed correctly. + * ------------------------------------------------------------------------ + */ +CTEST2(task_system, test_create_thread_with_huge_scratch_space) +{ + platform_thread new_thread; + thread_config thread_cfg; + + // Test case is only relevant when run with shared segment, as it's + // constructed to induce an OOM while allocating memory. + if (!data->use_shmem) { + return; + } + ZERO_STRUCT(thread_cfg); + + threadid main_thread_idx = platform_get_tid(); + ASSERT_EQUAL(main_thread_idx, 0, "main_thread_idx=%lu", main_thread_idx); + + // Setup thread-specific struct, needed for validation in thread's worker fn + thread_cfg.tasks = data->tasks; + + // Main thread is at index 0 + thread_cfg.exp_thread_idx = 1; + thread_cfg.active_threads_bitmask = task_active_tasks_mask(data->tasks); + + platform_status rc = STATUS_OK; + + rc = task_thread_create("test_one_thread", + exec_one_thread_use_extern_apis, + &thread_cfg, + (1 * GiB), + data->tasks, + data->hid, + &new_thread); + ASSERT_FALSE(SUCCESS(rc)); +} + +/* + * ------------------------------------------------------------------------ + * Test creation of one new thread using external Splinter interfaces and + * ask for an amount of scratch space that consumes all free space. This will + * cause the next request for memory, while creating a thread, to fail. + * We want to exercise backout code in task_create_thread_with_hooks() + * to ensure that allocated memory is freed correctly. + * ------------------------------------------------------------------------ + */ +CTEST2(task_system, test_create_thread_using_all_avail_mem_for_scratch_space) +{ + platform_thread new_thread; + thread_config thread_cfg; + + // Test case is only relevant when run with shared segment, as it's + // constructed to induce an OOM while allocating memory. + if (!data->use_shmem) { + return; + } + ZERO_STRUCT(thread_cfg); + + threadid main_thread_idx = platform_get_tid(); + ASSERT_EQUAL(main_thread_idx, 0, "main_thread_idx=%lu", main_thread_idx); + + // Setup thread-specific struct, needed for validation in thread's worker fn + thread_cfg.tasks = data->tasks; + + // Main thread is at index 0 + thread_cfg.exp_thread_idx = 1; + thread_cfg.active_threads_bitmask = task_active_tasks_mask(data->tasks); + + platform_status rc = STATUS_OK; + + // When the test is being run using shared memory, probe the shmem API to + // find out how much free space is available currently. Consume all but a + // few bytes for the scratch space. This will trigger an OOM while trying to + // allocate thread-related structures -after- scratch space has been + // allocated. Tickling this code-flow exercises backout code path where there + // were some bugs due to improper use of platform_free(). + + size_t scratch_space = + (data->use_shmem ? (platform_shmbytes_free(data->hid) - 16) + : trunk_get_scratch_size()); + + rc = task_thread_create("test_one_thread", + exec_one_thread_use_extern_apis, + &thread_cfg, + scratch_space, + data->tasks, + data->hid, + &new_thread); + ASSERT_FALSE(SUCCESS(rc)); +} + /* Wrapper function to create Splinter Task system w/o background threads. */ static platform_status create_task_system_without_bg_threads(void *datap) diff --git a/tests/unit/unit_tests.h b/tests/unit/unit_tests.h index 319618e0..d84e5694 100644 --- a/tests/unit/unit_tests.h +++ b/tests/unit/unit_tests.h @@ -14,6 +14,7 @@ #define Kilo (1024UL) #define Mega (1024UL * Kilo) #define Giga (1024UL * Mega) +#define Tera (1024UL * Giga) void set_log_streams_for_tests(msg_level exp_msg_level); diff --git a/tests/unit/writable_buffer_test.c b/tests/unit/writable_buffer_test.c index 3380aab1..e51ac2e8 100644 --- a/tests/unit/writable_buffer_test.c +++ b/tests/unit/writable_buffer_test.c @@ -416,15 +416,6 @@ CTEST2(writable_buffer, test_writable_buffer_append) "Unexpected data: '%s'\n", (char *)writable_buffer_data(wb)); - // Currently, reallocation from shared-memory will not reuse the existing - // memory fragment, even if there is some room in it for the append. (That's - // an optimization which needs additional memory fragment-size info which - // is currently not available to the allocator.) - if (data->use_shmem) { - const void *new_data2 = writable_buffer_data(wb); - ASSERT_TRUE((new_data != new_data2)); - } - writable_buffer_deinit(wb); }