diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index bdd196b04..56ec58de9 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -14,7 +14,7 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT) add_umf_library( NAME disjoint_pool TYPE STATIC - SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS} + SRCS pool_disjoint.c ../critnib/critnib.c ${POOL_EXTRA_SRCS} LIBS ${POOL_EXTRA_LIBS}) target_compile_definitions(disjoint_pool diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c new file mode 100644 index 000000000..20e315656 --- /dev/null +++ b/src/pool/pool_disjoint.c @@ -0,0 +1,1259 @@ +/* + * Copyright (C) 2022-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "critnib/critnib.h" +#include "uthash/utlist.h" + +#include "base_alloc_global.h" +#include "provider_tracking.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" +#include "utils_math.h" +#include "utils_sanitizers.h" + +typedef struct bucket_t bucket_t; +typedef struct slab_t slab_t; +typedef struct slab_list_item_t slab_list_item_t; +typedef struct disjoint_pool_t disjoint_pool_t; + +slab_t *create_slab(bucket_t *bucket); +void destroy_slab(slab_t *slab); + +void *slab_get(const slab_t *slab); +void *slab_get_end(const slab_t *slab); +void *slab_get_chunk(slab_t *slab); + +bool slab_has_avail(const slab_t *slab); +void slab_free_chunk(slab_t *slab, void *ptr); + +void slab_reg(slab_t *slab); +void slab_reg_by_addr(void *addr, slab_t *slab); +void slab_unreg(slab_t *slab); +void slab_unreg_by_addr(void *addr, slab_t *slab); + +bucket_t *create_bucket(size_t sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_t *shared_limits); +void destroy_bucket(bucket_t *bucket); + +void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); +bool bucket_can_pool(bucket_t *bucket, bool *to_pool); +void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool); +void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); +void *bucket_get_chunk(bucket_t *bucket, bool *from_pool); +size_t bucket_chunk_cut_off(bucket_t *bucket); +size_t bucket_capacity(bucket_t *bucket); +void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool); +void bucket_count_alloc(bucket_t *bucket, bool from_pool); + +void *bucket_get_slab(bucket_t *bucket, bool *from_pool); +size_t bucket_slab_alloc_size(bucket_t *bucket); +size_t bucket_slab_min_size(bucket_t *bucket); +slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); +slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool); +void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool); + +static __TLS umf_result_t TLS_last_allocation_error; + +// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is +// requested. The implementation distinguishes between allocations of size +// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. +// Allocation requests smaller than ChunkCutoff use chunks taken from a single +// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation +// size, and 8-byte allocations, only 1 in ~8000 requests results in a new +// coarse-grain allocation. Freeing results only in a chunk of a larger +// allocation to be marked as available and no real return to the system. An +// allocation is returned to the system only when all chunks in the larger +// allocation are freed by the program. Allocations larger than ChunkCutOff use +// a separate coarse-grain allocation for each request. These are subject to +// "pooling". That is, when such an allocation is freed by the program it is +// retained in a pool. The pool is available for future allocations, which means +// there are fewer actual coarse-grain allocations/deallocations. + +// The largest size which is allocated via the allocator. +// Allocations with size > CutOff bypass the pool and +// go directly to the provider. +static size_t CutOff = (size_t)1 << 31; // 2GB + +// Temporary solution for disabling memory poisoning. This is needed because +// AddressSanitizer does not support memory poisoning for GPU allocations. +// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 +#ifndef POISON_MEMORY +#define POISON_MEMORY 0 +#endif + +/*static */ void annotate_memory_inaccessible(void *ptr, size_t size) { + (void)ptr; + (void)size; +#if (POISON_MEMORY != 0) + utils_annotate_memory_inaccessible(ptr, size); +#endif +} + +/*static*/ void annotate_memory_undefined(void *ptr, size_t size) { + (void)ptr; + (void)size; +#if (POISON_MEMORY != 0) + utils_annotate_memory_undefined(ptr, size); +#endif +} + +void annotate_memory_inaccessible(void *ptr, size_t size); +void annotate_memory_undefined(void *ptr, size_t size); + +typedef struct slab_list_item_t slab_list_item_t; + +typedef struct bucket_t { + size_t size; + + // Linked list of slabs which have at least 1 available chunk. + slab_list_item_t *available_slabs; + + // Linked list of slabs with 0 available chunk. + slab_list_item_t *unavailable_slabs; + + // Protects the bucket and all the corresponding slabs + utils_mutex_t bucket_lock; + + // Reference to the allocator context, used access memory allocation + // routines, slab map and etc. + disjoint_pool_t *pool; + + umf_disjoint_pool_shared_limits_t *shared_limits; + + // For buckets used in chunked mode, a counter of slabs in the pool. + // For allocations that use an entire slab each, the entries in the Available + // list are entries in the pool.Each slab is available for a new + // allocation.The size of the Available list is the size of the pool. + // For allocations that use slabs in chunked mode, slabs will be in the + // Available list if any one or more of their chunks is free.The entire slab + // is not necessarily free, just some chunks in the slab are free. To + // implement pooling we will allow one slab in the Available list to be + // entirely empty. Normally such a slab would have been freed. But + // now we don't, and treat this slab as "in the pool". + // When a slab becomes entirely free we have to decide whether to return it + // to the provider or keep it allocated. A simple check for size of the + // Available list is not sufficient to check whether any slab has been + // pooled yet.We would have to traverse the entire Available listand check + // if any of them is entirely free. Instead we keep a counter of entirely + // empty slabs within the Available list to speed up the process of checking + // if a slab in this bucket is already pooled. + size_t chunked_slabs_in_pool; + + // Statistics + size_t alloc_pool_count; + size_t free_count; + size_t curr_slabs_in_use; + size_t curr_slabs_in_pool; + size_t max_slabs_in_pool; + size_t alloc_count; + size_t max_slabs_in_use; +} bucket_t; + +// Represents the allocated memory block of size 'slab_min_size' +// Internally, it splits the memory block into chunks. The number of +// chunks depends of the size of a Bucket which created the Slab. +// Note: Bucket's methods are responsible for thread safety of Slab access, +// so no locking happens here. +typedef struct slab_t { + // Pointer to the allocated memory of slab_min_size bytes + void *mem_ptr; + size_t slab_size; + + // Represents the current state of each chunk: if the bit is set then the + // chunk is allocated, and if the chunk is free for allocation otherwise + bool *chunks; + size_t num_chunks; + + // Total number of allocated chunks at the moment. + size_t num_allocated; + + // The bucket which the slab belongs to + bucket_t *bucket; + + // Hints where to start search for free chunk in a slab + size_t first_free_chunk_idx; + + // Store iterator to the corresponding node in avail/unavail list + // to achieve O(1) removal + slab_list_item_t *iter; +} slab_t; + +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + +typedef struct umf_disjoint_pool_shared_limits_t { + size_t max_size; + size_t total_size; // requires atomic access +} umf_disjoint_pool_shared_limits_t; + +umf_disjoint_pool_shared_limits_t * +umfDisjointPoolSharedLimitsCreate(size_t max_size) { + umf_disjoint_pool_shared_limits_t *ptr = + umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t)); + ptr->max_size = max_size; + ptr->total_size = 0; + return ptr; +} + +void umfDisjointPoolSharedLimitsDestroy( + umf_disjoint_pool_shared_limits_t *limits) { + umf_ba_global_free(limits); +} + +typedef struct disjoint_pool_t { + // It's important for the map to be destroyed last after buckets and their + // slabs This is because slab's destructor removes the object from the map. + critnib *known_slabs; // (void *, slab_t *) + + // TODO: prev std::shared_timed_mutex - ok? + utils_mutex_t known_slabs_map_lock; + + // Handle to the memory provider + umf_memory_provider_handle_t provider; + + // Store as unique_ptrs since Bucket is not Movable(because of std::mutex) + bucket_t **buckets; + size_t buckets_num; + + // Configuration for this instance + umf_disjoint_pool_params_t params; + + umf_disjoint_pool_shared_limits_t *default_shared_limits; + + // Used in algorithm for finding buckets + size_t min_bucket_size_exp; + + // Coarse-grain allocation min alignment + size_t provider_min_page_size; +} disjoint_pool_t; + +slab_t *create_slab(bucket_t *bucket) { + // In case bucket size is not a multiple of SlabMinSize, we would have + // some padding at the end of the slab. + slab_t *slab = umf_ba_global_alloc(sizeof(slab_t)); + + // TODO check res and errors here and everywhere + // TODO use logger + slab->num_allocated = 0; + slab->first_free_chunk_idx = 0; + slab->bucket = bucket; + + slab->iter = + (slab_list_item_t *)umf_ba_global_alloc(sizeof(slab_list_item_t)); + slab->iter->val = slab; + slab->iter->prev = slab->iter->next = NULL; + + slab->num_chunks = bucket_slab_min_size(bucket) / bucket->size; + slab->chunks = umf_ba_global_alloc(sizeof(bool) * slab->num_chunks); + memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks); + + slab->slab_size = bucket_slab_alloc_size(bucket); + + // NOTE: originally slabs memory were allocated without alignment + // with this registering a slab is simpler and doesn't require multimap + umf_memory_provider_handle_t provider = bucket->pool->provider; + umf_result_t res = + umfMemoryProviderAlloc(provider, slab->slab_size, + bucket_slab_min_size(bucket), &slab->mem_ptr); + + if (res == UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) { + destroy_slab(slab); + return NULL; + } + + annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size); + fprintf(stderr, "[DP create_slab] bucket: %p, slab_size: %zu\n", + (void *)bucket, slab->slab_size); + + return slab; +} + +void destroy_slab(slab_t *slab) { + fprintf(stderr, "[DP destroy_slab] bucket: %p, slab_size: %zu\n", + (void *)slab->bucket, slab->slab_size); + + umf_memory_provider_handle_t provider = slab->bucket->pool->provider; + umf_result_t res = + umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size); + assert(res == UMF_RESULT_SUCCESS); + (void)res; + + umf_ba_global_free(slab->chunks); + umf_ba_global_free(slab->iter); + umf_ba_global_free(slab); +} + +// Return the index of the first available chunk, SIZE_MAX otherwise +size_t slab_find_first_available_chunk_idx(const slab_t *slab) { + // Use the first free chunk index as a hint for the search. + bool *chunk = slab->chunks + sizeof(bool) * slab->first_free_chunk_idx; + while (chunk != slab->chunks + sizeof(bool) * slab->num_chunks) { + // false means not used + if (*chunk == false) { + size_t idx = (chunk - slab->chunks) / sizeof(bool); + fprintf(stderr, + "[DP slab_find_first_available_chunk_idx] idx: %zu\n", idx); + return idx; + } + chunk++; + } + + fprintf(stderr, "[DP slab_find_first_available_chunk_idx] idx: SIZE_MAX\n"); + return SIZE_MAX; +} + +void *slab_get_chunk(slab_t *slab) { + // assert(slab->num_allocated != slab->num_chunks); + + const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); + // Free chunk must exist, otherwise we would have allocated another slab + assert(chunk_idx != SIZE_MAX); + + void *free_chunk = + (uint8_t *)slab->mem_ptr + chunk_idx * slab->bucket->size; + // mark as used + slab->chunks[chunk_idx] = true; + slab->num_allocated += 1; + + // Use the found index as the next hint + slab->first_free_chunk_idx = chunk_idx; + + fprintf(stderr, "[DP slab_get_chunk] num_allocated: %zu\n", + slab->num_allocated); + + return free_chunk; +} + +// TODO remove? why need getter/setter? +void *slab_get(const slab_t *slab) { return slab->mem_ptr; } +void *slab_get_end(const slab_t *slab) { + return (uint8_t *)slab->mem_ptr + bucket_slab_min_size(slab->bucket); +} + +void slab_free_chunk(slab_t *slab, void *ptr) { + // This method should be called through bucket(since we might remove the + // slab as a result), therefore all locks are done on that level. + + // Make sure that we're in the right slab + assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab)); + + // Even if the pointer p was previously aligned, it's still inside the + // corresponding chunk, so we get the correct index here. + size_t chunk_idx = + ((uint8_t *)ptr - (uint8_t *)slab->mem_ptr) / slab->bucket->size; + + // Make sure that the chunk was allocated + assert(slab->chunks[chunk_idx] && "double free detected"); + slab->chunks[chunk_idx] = false; + slab->num_allocated -= 1; + + if (chunk_idx < slab->first_free_chunk_idx) { + slab->first_free_chunk_idx = chunk_idx; + } + + fprintf(stderr, + "[DP slab_free_chunk] chunk_idx: %zu, num_allocated: %zu, " + "first_free_chunk_idx: %zu\n", + chunk_idx, slab->num_allocated, slab->first_free_chunk_idx); +} + +bool slab_has_avail(const slab_t *slab) { + return slab->num_allocated != slab->num_chunks; +} + +void slab_reg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size + // I also decr end_addr by 1 + void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab), + bucket_slab_min_size(bucket)); + void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1; + + fprintf(stderr, "[DP slab_reg] slab: %p, start: %p, end %p\n", (void *)slab, + start_addr, end_addr); + + slab_reg_by_addr(start_addr, slab); + slab_reg_by_addr(end_addr, slab); +} + +void slab_unreg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size + // I also decr end_addr by 1 + void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab), + bucket_slab_min_size(bucket)); + void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1; + + fprintf(stderr, "[DP slab_unreg] slab: %p, start: %p, end %p\n", + (void *)slab, start_addr, end_addr); + + slab_unreg_by_addr(start_addr, slab); + slab_unreg_by_addr(end_addr, slab); +} + +bucket_t *create_bucket(size_t Sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_t *shared_limits) { + + bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t)); + + bucket->size = Sz; + bucket->pool = pool; + bucket->available_slabs = NULL; + bucket->unavailable_slabs = NULL; + bucket->chunked_slabs_in_pool = 0; + bucket->alloc_pool_count = 0; + bucket->free_count = 0; + bucket->curr_slabs_in_use = 0; + bucket->curr_slabs_in_pool = 0; + bucket->max_slabs_in_pool = 0; + bucket->alloc_count = 0; + bucket->max_slabs_in_use = 0; + bucket->shared_limits = shared_limits; + + utils_mutex_init(&bucket->bucket_lock); + + return bucket; +} + +void destroy_bucket(bucket_t *bucket) { + // use an extra tmp to store the next iterator before destroying the slab + slab_list_item_t *it = NULL, *tmp = NULL; + LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { destroy_slab(it->val); } + LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) { + destroy_slab(it->val); + } + + utils_mutex_destroy_not_free(&bucket->bucket_lock); + umf_ba_global_free(bucket); +} + +// The lock must be acquired before calling this method +void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool) { + *to_pool = true; + + // In case if the slab was previously full and now has 1 available + // chunk, it should be moved to the list of available slabs + if (slab->num_allocated == (slab->num_chunks - 1)) { + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + DL_DELETE(bucket->unavailable_slabs, slab_it); + DL_PREPEND(bucket->available_slabs, slab_it); + } + + // Check if slab is empty, and pool it if we can. + if (slab->num_allocated == 0) { + // The slab is now empty. + // If pool has capacity then put the slab in the pool. + // The ToPool parameter indicates whether the Slab will be put in the + // pool or freed. + if (!bucket_can_pool(bucket, to_pool)) { + // Note: since the slab is stored as unique_ptr, just remove it from + // the list to destroy the object. + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + slab_unreg(slab_it->val); + DL_DELETE(bucket->available_slabs, slab_it); + destroy_slab(slab_it->val); + } + } +} + +void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_free_chunk(slab, ptr); + bucket_on_free_chunk(bucket, slab, to_pool); + + utils_mutex_unlock(&bucket->bucket_lock); +} + +void bucket_count_alloc(bucket_t *bucket, bool from_pool) { + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } +} + +void *bucket_get_chunk(bucket_t *bucket, bool *from_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool); + if (slab_it == NULL) { + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + void *free_chunk = slab_get_chunk(slab_it->val); + + // If the slab is full, move it to unavailable slabs and update its iterator + if (!(slab_has_avail(slab_it->val))) { + DL_DELETE(bucket->available_slabs, slab_it); + DL_PREPEND(bucket->unavailable_slabs, slab_it); + } + + utils_mutex_unlock(&bucket->bucket_lock); + return free_chunk; +} + +size_t bucket_chunk_cut_off(bucket_t *bucket) { + return bucket_slab_min_size(bucket) / 2; +} + +size_t bucket_slab_alloc_size(bucket_t *bucket) { + return utils_max(bucket->size, bucket_slab_min_size(bucket)); +} + +size_t bucket_slab_min_size(bucket_t *bucket) { + return bucket->pool->params.SlabMinSize; +} + +slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, + bool *from_pool) { + // Return a slab that will be used for a single allocation. + if (bucket->available_slabs == NULL) { + slab_t *slab = create_slab(bucket); + if (slab == NULL) { + //assert(0); + return NULL; + } + + slab_reg(slab); + DL_PREPEND(bucket->available_slabs, slab->iter); + *from_pool = false; + bucket_update_stats(bucket, 1, 0); + } else { + bucket_decrement_pool(bucket, from_pool); + } + + return bucket->available_slabs; +} + +void *bucket_get_slab(bucket_t *bucket, bool *from_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool); + if (slab_it == NULL) { + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + slab_t *slab = slab_it->val; + void *ptr = slab_get(slab); + + DL_DELETE(bucket->available_slabs, slab_it); + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + + utils_mutex_unlock(&bucket->bucket_lock); + return ptr; +} + +void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) { + utils_mutex_lock(&bucket->bucket_lock); + + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + if (bucket_can_pool(bucket, to_pool)) { + DL_DELETE(bucket->unavailable_slabs, slab_it); + slab_it->prev = NULL; + DL_PREPEND(bucket->available_slabs, slab_it); + } else { + slab_unreg(slab_it->val); + DL_DELETE(bucket->unavailable_slabs, slab_it); + destroy_slab(slab_it->val); + } + utils_mutex_unlock(&bucket->bucket_lock); +} + +slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) { + if (bucket->available_slabs == NULL) { + slab_t *slab = create_slab(bucket); + if (slab == NULL) { + // TODO log + // TODO replace asserts + return NULL; + } + + slab_reg(slab); + DL_PREPEND(bucket->available_slabs, slab->iter); + bucket_update_stats(bucket, 1, 0); + *from_pool = false; + } else { + slab_t *slab = bucket->available_slabs->val; + if (slab->num_allocated == 0) { + // If this was an empty slab, it was in the pool. + // Now it is no longer in the pool, so update count. + --bucket->chunked_slabs_in_pool; + bucket_decrement_pool(bucket, from_pool); + } else { + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; + } + } + + return bucket->available_slabs; +} + +size_t bucket_capacity(bucket_t *bucket) { + // For buckets used in chunked mode, just one slab in pool is sufficient. + // For larger buckets, the capacity could be more and is adjustable. + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + return 1; + } else { + return bucket->pool->params.Capacity; + } +} + +void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { + if (bucket->pool->params.PoolTrace == 0) { + return; + } + + bucket->curr_slabs_in_use += in_use; + bucket->max_slabs_in_use = + utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use); + bucket->curr_slabs_in_pool += in_pool; + bucket->max_slabs_in_pool = + utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool); + + // Increment or decrement current pool sizes based on whether + // slab was added to or removed from pool. + bucket->pool->params.CurPoolSize += + in_pool * bucket_slab_alloc_size(bucket); +} + +// If a slab was available in the pool then note that the current pooled +// size has reduced by the size of a slab in this bucket. +void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { + *from_pool = true; + bucket_update_stats(bucket, 1, -1); + utils_fetch_and_add64(&bucket->shared_limits->total_size, + -(long long)bucket_slab_alloc_size(bucket)); +} + +bool bucket_can_pool(bucket_t *bucket, bool *to_pool) { + size_t new_free_slabs_in_bucket; + + // Check if this bucket is used in chunked form or as full slabs. + bool chunkedBucket = bucket->size <= bucket_chunk_cut_off(bucket); + if (chunkedBucket) { + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; + } else { + // TODO optimize + size_t avail_num = 0; + slab_list_item_t *it = NULL; + DL_FOREACH(bucket->available_slabs, it) { avail_num++; } + new_free_slabs_in_bucket = avail_num + 1; + } + + if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { + size_t pool_size = 0; + utils_atomic_load_acquire(&bucket->shared_limits->total_size, + &pool_size); + while (true) { + size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket); + + if (bucket->shared_limits->max_size < new_pool_size) { + break; + } + +// TODO!!! +#ifdef _WIN32 + if (bucket->shared_limits->total_size != new_pool_size) { + bucket->shared_limits->total_size = new_pool_size; +#else + if (utils_compare_exchange(&bucket->shared_limits->total_size, + &pool_size, &new_pool_size)) { +#endif + if (chunkedBucket) { + ++bucket->chunked_slabs_in_pool; + } + + bucket_update_stats(bucket, -1, 1); + *to_pool = true; + return true; + } + } + } + + bucket_update_stats(bucket, -1, 0); + *to_pool = false; + return false; +} + +utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) { + return &bucket->pool->known_slabs_map_lock; +} + +void slab_reg_by_addr(void *addr, slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + utils_mutex_t *lock = &pool->known_slabs_map_lock; + critnib *slabs = pool->known_slabs; + + utils_mutex_lock(lock); + + // TODO multimap? + slab_t *t = (slab_t *)critnib_get(slabs, (uintptr_t)addr); + assert(t == NULL); + (void)t; + + fprintf(stderr, "[DP slab_reg_by_addr] addr: %p, slab: %p\n", addr, + (void *)slab); + critnib_insert(slabs, (uintptr_t)addr, slab, 0); + + utils_mutex_unlock(lock); +} + +void slab_unreg_by_addr(void *addr, slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + utils_mutex_t *lock = &pool->known_slabs_map_lock; + critnib *slabs = pool->known_slabs; + + utils_mutex_lock(lock); + + // debug only + // assume single-value per key + slab_t *known_slab = (slab_t *)critnib_get(slabs, (uintptr_t)addr); + assert(known_slab != NULL && "Slab is not found"); + assert(slab == known_slab); + (void)known_slab; + + fprintf(stderr, "[DP slab_unreg_by_addr] addr: %p, slab: %p\n", addr, + (void *)slab); + critnib_remove(slabs, (uintptr_t)addr); + + utils_mutex_unlock(lock); +} + +size_t AllocImpl_sizeToIdx(disjoint_pool_t *pool, size_t size) { + assert(size <= CutOff && "Unexpected size"); + assert(size > 0 && "Unexpected size"); + + size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp; + if (size < min_bucket_size) { + return 0; + } + + // Get the position of the leftmost set bit. + size_t position = getLeftmostSetBitPos(size); + + bool is_power_of_2 = 0 == (size & (size - 1)); + bool larger_than_halfway_between_powers_of_2 = + !is_power_of_2 && + (bool)((size - 1) & ((uint64_t)(1) << (position - 1))); + size_t index = (position - pool->min_bucket_size_exp) * 2 + + (int)(!is_power_of_2) + + (int)larger_than_halfway_between_powers_of_2; + + return index; +} + +umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(disjoint_pool_t *pool) { + if (pool->params.SharedLimits) { + return pool->params.SharedLimits; + } else { + return pool->default_shared_limits; + } +} + +bucket_t *AllocImpl_findBucket(disjoint_pool_t *pool, size_t Size) { + size_t calculatedIdx = AllocImpl_sizeToIdx(pool, Size); + bucket_t *bucket = pool->buckets[calculatedIdx]; + assert(bucket->size >= Size); + (void)bucket; + + if (calculatedIdx > 0) { + bucket_t *bucket_prev = pool->buckets[calculatedIdx - 1]; + assert(bucket_prev->size < Size); + (void)bucket_prev; + } + + return pool->buckets[calculatedIdx]; +} + +void AllocImpl_printStats(disjoint_pool_t *pool, bool *TitlePrinted, + size_t *HighBucketSize, size_t *HighPeakSlabsInUse, + const char *MTName) { + (void)TitlePrinted; // TODO + (void)MTName; // TODO + + *HighBucketSize = 0; + *HighPeakSlabsInUse = 0; + for (size_t i = 0; i < pool->buckets_num; i++) { + // TODO + //(*B).printStats(TitlePrinted, MTName); + bucket_t *bucket = pool->buckets[i]; + *HighPeakSlabsInUse = + utils_max(bucket->max_slabs_in_use, *HighPeakSlabsInUse); + if (bucket->alloc_count) { + *HighBucketSize = + utils_max(bucket_slab_alloc_size(bucket), *HighBucketSize); + } + } +} + +static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, + size_t size, size_t alignment) { + void *ptr; + umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + return NULL; + } + annotate_memory_inaccessible(ptr, size); + return ptr; +} + +static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider, + void *ptr) { + size_t size = 0; + + if (ptr) { + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (umf_result == UMF_RESULT_SUCCESS) { + size = allocInfo.baseSize; + } + } + + umf_result_t ret = umfMemoryProviderFree(hProvider, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + + TLS_last_allocation_error = ret; + // throw MemoryProviderError{ret}; + return ret; + } + return UMF_RESULT_SUCCESS; +} + +void *AllocImpl_allocate(disjoint_pool_t *pool, size_t Size, bool *FromPool) { + void *Ptr; + + if (Size == 0) { + return NULL; + } + + *FromPool = false; + if (Size > pool->params.MaxPoolableSize) { + Ptr = memoryProviderAlloc(pool->provider, Size, 0); + + if (Ptr == NULL) { + // TODO get code from func + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + return NULL; + } + + annotate_memory_undefined(Ptr, Size); + return Ptr; + } + + bucket_t *bucket = AllocImpl_findBucket(pool, Size); + + if (Size > bucket_chunk_cut_off(bucket)) { + Ptr = bucket_get_slab(bucket, FromPool); + } else { + Ptr = bucket_get_chunk(bucket, FromPool); + } + + if (Ptr == NULL) { + // TODO get code from func + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + return NULL; + } + + if (pool->params.PoolTrace > 1) { + bucket_count_alloc(bucket, FromPool); + } + + VALGRIND_DO_MEMPOOL_ALLOC(pool, Ptr, Size); + annotate_memory_undefined(Ptr, bucket->size); + + return Ptr; +} + +void *AllocImpl_allocate_align(disjoint_pool_t *pool, size_t Size, + size_t Alignment, bool *FromPool) { + void *Ptr; + + if (Size == 0) { + return NULL; + } + + if (Alignment <= 1) { + return AllocImpl_allocate(pool, Size, FromPool); + } + + size_t AlignedSize; + if (Alignment <= pool->provider_min_page_size) { + // This allocation will be served from a Bucket which size is multiple + // of Alignment and Slab address is aligned to provider_min_page_size + // so the address will be properly aligned. + AlignedSize = (Size > 1) ? ALIGN_UP(Size, Alignment) : Alignment; + } else { + // Slabs are only aligned to provider_min_page_size, we need to compensate + // for that in case the allocation is within pooling limit. + // TODO: consider creating properly-aligned Slabs on demand + AlignedSize = Size + Alignment - 1; + } + + // Check if requested allocation size is within pooling limit. + // If not, just request aligned pointer from the system. + *FromPool = false; + if (AlignedSize > pool->params.MaxPoolableSize) { + Ptr = memoryProviderAlloc(pool->provider, Size, Alignment); + assert(Ptr); + annotate_memory_undefined(Ptr, Size); + return Ptr; + } + + bucket_t *bucket = AllocImpl_findBucket(pool, AlignedSize); + + if (AlignedSize > bucket_chunk_cut_off(bucket)) { + Ptr = bucket_get_slab(bucket, FromPool); + } else { + Ptr = bucket_get_chunk(bucket, FromPool); + } + + assert(Ptr); + if (pool->params.PoolTrace > 1) { + bucket_count_alloc(bucket, FromPool); + } + + VALGRIND_DO_MEMPOOL_ALLOC(pool, ALIGN_UP((size_t)Ptr, Alignment), Size); + annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size); + return (void *)ALIGN_UP((size_t)Ptr, Alignment); +} + +umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr, + bool *to_pool) { + if (Ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + void *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, pool->params.SlabMinSize); + + // Lock the map on read + utils_mutex_lock(&pool->known_slabs_map_lock); + + *to_pool = false; + + slab_t *slab = (slab_t *)critnib_get(pool->known_slabs, (uintptr_t)SlabPtr); + //auto Slabs = getKnownSlabs().equal_range(SlabPtr); + if (slab == NULL) { + utils_mutex_unlock(&pool->known_slabs_map_lock); + umf_result_t ret = memoryProviderFree(pool->provider, Ptr); + return ret; + } + + // TODO - no multimap + // for (auto It = Slabs.first; It != Slabs.second; ++It) { + + // The slab object won't be deleted until it's removed from the map which is + // protected by the lock, so it's safe to access it here. + if (Ptr >= slab_get(slab) && Ptr < slab_get_end(slab)) { + // Unlock the map before freeing the chunk, it may be locked on write + // there + utils_mutex_unlock(&pool->known_slabs_map_lock); + bucket_t *bucket = slab->bucket; + + if (pool->params.PoolTrace > 1) { + bucket->free_count++; + } + + VALGRIND_DO_MEMPOOL_FREE(pool, Ptr); + annotate_memory_inaccessible(Ptr, bucket->size); + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + bucket_free_chunk(bucket, Ptr, slab, to_pool); + } else { + bucket_free_slab(bucket, slab, to_pool); + } + + return UMF_RESULT_SUCCESS; + } + //} // for multimap + + utils_mutex_unlock(&pool->known_slabs_map_lock); + // There is a rare case when we have a pointer from system allocation next + // to some slab with an entry in the map. So we find a slab + // but the range checks fail. + memoryProviderFree(pool->provider, Ptr); + return UMF_RESULT_SUCCESS; +} + +/* +// TODO? +std::ostream &operator<<(std::ostream &Os, slab_t &Slab) { + Os << "Slab<" << slab_get(&Slab) << ", " << slab_get_end(&Slab) << ", " + << slab->bucket->getSize() << ">"; + return Os; +} +*/ + +/* +// TODO move +void Bucket::printStats(bool &TitlePrinted, const std::string &Label) { + if (alloc_count) { + if (!TitlePrinted) { + std::cout << Label << " memory statistics\n"; + std::cout << std::setw(14) << "Bucket Size" << std::setw(12) + << "Allocs" << std::setw(12) << "Frees" << std::setw(18) + << "Allocs from Pool" << std::setw(20) + << "Peak Slabs in Use" << std::setw(21) + << "Peak Slabs in Pool" << std::endl; + TitlePrinted = true; + } + std::cout << std::setw(14) << getSize() << std::setw(12) << alloc_count + << std::setw(12) << free_count << std::setw(18) + << allocPoolCount << std::setw(20) << max_slabs_in_use + << std::setw(21) << max_slabs_in_pool << std::endl; + } +} +*/ + +umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, + void *params, void **ppPool) { + if (!provider) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + disjoint_pool_t *disjoint_pool = + (disjoint_pool_t *)umf_ba_global_alloc(sizeof(struct disjoint_pool_t)); + if (!disjoint_pool) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_disjoint_pool_params_t *dp_params = + (umf_disjoint_pool_params_t *)params; + + // MinBucketSize parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!dp_params->MinBucketSize || + !((dp_params->MinBucketSize & (dp_params->MinBucketSize - 1)) == 0)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0); + + disjoint_pool->provider = provider; + disjoint_pool->params = *dp_params; + + utils_mutex_init(&disjoint_pool->known_slabs_map_lock); + disjoint_pool->known_slabs = critnib_new(); + + // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. + // Powers of 2 and the value halfway between the powers of 2. + size_t Size1 = disjoint_pool->params.MinBucketSize; + + // MinBucketSize cannot be larger than CutOff. + Size1 = utils_min(Size1, CutOff); + + // Buckets sized smaller than the bucket default size- 8 aren't needed. + Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); + + // Calculate the exponent for MinBucketSize used for finding buckets. + disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1); + disjoint_pool->default_shared_limits = + umfDisjointPoolSharedLimitsCreate(SIZE_MAX); + + // count number of buckets, start from 1 + disjoint_pool->buckets_num = 1; + size_t Size2 = Size1 + Size1 / 2; + size_t ts2 = Size2, ts1 = Size1; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { + disjoint_pool->buckets_num += 2; + } + disjoint_pool->buckets = (bucket_t **)umf_ba_global_alloc( + sizeof(bucket_t *) * disjoint_pool->buckets_num); + + int i = 0; + Size1 = ts1; + Size2 = ts2; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) { + disjoint_pool->buckets[i] = create_bucket( + Size1, disjoint_pool, AllocImpl_getLimits(disjoint_pool)); + disjoint_pool->buckets[i + 1] = create_bucket( + Size2, disjoint_pool, AllocImpl_getLimits(disjoint_pool)); + } + disjoint_pool->buckets[i] = create_bucket( + CutOff, disjoint_pool, AllocImpl_getLimits(disjoint_pool)); + + umf_result_t ret = umfMemoryProviderGetMinPageSize( + provider, NULL, &disjoint_pool->provider_min_page_size); + if (ret != UMF_RESULT_SUCCESS) { + disjoint_pool->provider_min_page_size = 0; + } + + *ppPool = (void *)disjoint_pool; + + return UMF_RESULT_SUCCESS; +} + +void *disjoint_pool_malloc(void *pool, size_t size) { + // For full-slab allocations indicates + // whether slab is from Pool. + + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + bool FromPool; + void *Ptr = AllocImpl_allocate(hPool, size, &FromPool); + + if (hPool->params.PoolTrace > 2) { + const char *MT = hPool->params.Name; + (void)MT; + //std::cout << "Allocated " << std::setw(8) << size << " " << MT + // << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" + // << Ptr << std::endl; + } + return Ptr; +} + +void *disjoint_pool_calloc(void *pool, size_t num, size_t size) { + (void)pool; + (void)num; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) { + (void)pool; + (void)ptr; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + bool FromPool; + void *Ptr = AllocImpl_allocate_align(hPool, size, alignment, &FromPool); + + if (hPool->params.PoolTrace > 2) { + const char *MT = hPool->params.Name; + (void)MT; + //std::cout << "Allocated " << std::setw(8) << size << " " << MT + // << " bytes aligned at " << alignment << " from " + // << (FromPool ? "Pool" : "Provider") << " ->" << Ptr + // << std::endl; + } + + return Ptr; +} + +size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { + (void)pool; + (void)ptr; + + // Not supported + return 0; +} + +umf_result_t disjoint_pool_free(void *pool, void *ptr) { + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + bool ToPool; + umf_result_t ret = AllocImpl_deallocate(hPool, ptr, &ToPool); + /* + if (ret == UMF_RESULT_SUCCESS) { + + if (impl->getParams().PoolTrace > 2) { + auto MT = impl->getParams().Name; + std::cout << "Freed " << MT << " " << ptr << " to " + << (ToPool ? "Pool" : "Provider") + << ", Current total pool size " + << impl->getLimits()->TotalSize.load() + << ", Current pool size for " << MT << " " + << impl->getParams().CurPoolSize << "\n"; + } + }*/ + return ret; +} + +umf_result_t disjoint_pool_get_last_allocation_error(void *pool) { + (void)pool; + + return TLS_last_allocation_error; +} + +// Define destructor for use with unique_ptr +void disjoint_pool_finalize(void *pool) { + + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + for (size_t i = 0; i < hPool->buckets_num; i++) { + destroy_bucket(hPool->buckets[i]); + } + + VALGRIND_DO_DESTROY_MEMPOOL(hPool); + + umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits); + critnib_delete(hPool->known_slabs); + + utils_mutex_destroy_not_free(&hPool->known_slabs_map_lock); + + umf_ba_global_free(hPool); + + /* + if (impl->getParams().PoolTrace > 1) { + bool TitlePrinted = false; + size_t HighBucketSize; + size_t HighPeakSlabsInUse; + auto name = impl->getParams().Name; + //try { // cannot throw in destructor + impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, + name); + if (TitlePrinted) { + std::cout << "Current Pool Size " + << impl->getLimits()->TotalSize.load() << std::endl; + std::cout << "Suggested Setting=;" + << std::string(1, (char)tolower(name[0])) + << std::string(name + 1) << ":" << HighBucketSize << "," + << HighPeakSlabsInUse << ",64K" << std::endl; + } + //} catch (...) { // ignore exceptions + // } + } + */ +} + +static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = disjoint_pool_initialize, + .finalize = disjoint_pool_finalize, + .malloc = disjoint_pool_malloc, + .calloc = disjoint_pool_calloc, + .realloc = disjoint_pool_realloc, + .aligned_malloc = disjoint_pool_aligned_malloc, + .malloc_usable_size = disjoint_pool_malloc_usable_size, + .free = disjoint_pool_free, + .get_last_allocation_error = disjoint_pool_get_last_allocation_error, +}; + +umf_memory_pool_ops_t *umfDisjointPoolOps(void) { + return &UMF_DISJOINT_POOL_OPS; +} diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp deleted file mode 100644 index 2cf8df7a4..000000000 --- a/src/pool/pool_disjoint.cpp +++ /dev/null @@ -1,1131 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO: replace with logger? -#include - -#include "provider/provider_tracking.h" - -#include "../cpp_helpers.hpp" -#include "pool_disjoint.h" -#include "umf.h" -#include "utils_log.h" -#include "utils_math.h" -#include "utils_sanitizers.h" - -// Temporary solution for disabling memory poisoning. This is needed because -// AddressSanitizer does not support memory poisoning for GPU allocations. -// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 -#ifndef POISON_MEMORY -#define POISON_MEMORY 0 -#endif - -static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_inaccessible(ptr, size); -#endif -} - -static inline void annotate_memory_undefined([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_undefined(ptr, size); -#endif -} - -typedef struct umf_disjoint_pool_shared_limits_t { - size_t MaxSize; - std::atomic TotalSize; -} umf_disjoint_pool_shared_limits_t; - -class DisjointPool { - public: - class AllocImpl; - using Config = umf_disjoint_pool_params_t; - - umf_result_t initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_t *parameters); - void *malloc(size_t size); - void *calloc(size_t, size_t); - void *realloc(void *, size_t); - void *aligned_malloc(size_t size, size_t alignment); - size_t malloc_usable_size(void *); - umf_result_t free(void *ptr); - umf_result_t get_last_allocation_error(); - - DisjointPool(); - ~DisjointPool(); - - private: - std::unique_ptr impl; -}; - -umf_disjoint_pool_shared_limits_t * -umfDisjointPoolSharedLimitsCreate(size_t MaxSize) { - return new umf_disjoint_pool_shared_limits_t{MaxSize, 0}; -} - -void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_t *limits) { - delete limits; -} - -// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is -// requested. The implementation distinguishes between allocations of size -// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. -// Allocation requests smaller than ChunkCutoff use chunks taken from a single -// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation -// size, and 8-byte allocations, only 1 in ~8000 requests results in a new -// coarse-grain allocation. Freeing results only in a chunk of a larger -// allocation to be marked as available and no real return to the system. An -// allocation is returned to the system only when all chunks in the larger -// allocation are freed by the program. Allocations larger than ChunkCutOff use -// a separate coarse-grain allocation for each request. These are subject to -// "pooling". That is, when such an allocation is freed by the program it is -// retained in a pool. The pool is available for future allocations, which means -// there are fewer actual coarse-grain allocations/deallocations. - -// The largest size which is allocated via the allocator. -// Allocations with size > CutOff bypass the pool and -// go directly to the provider. -static constexpr size_t CutOff = (size_t)1 << 31; // 2GB - -// Aligns the pointer down to the specified alignment -// (e.g. returns 8 for Size = 13, Alignment = 8) -static void *AlignPtrDown(void *Ptr, const size_t Alignment) { - return reinterpret_cast((reinterpret_cast(Ptr)) & - (~(Alignment - 1))); -} - -// Aligns the pointer up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static void *AlignPtrUp(void *Ptr, const size_t Alignment) { - void *AlignedPtr = AlignPtrDown(Ptr, Alignment); - // Special case when the pointer is already aligned - if (Ptr == AlignedPtr) { - return Ptr; - } - return static_cast(AlignedPtr) + Alignment; -} - -// Aligns the value up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static size_t AlignUp(size_t Val, size_t Alignment) { - assert(Alignment > 0); - return (Val + Alignment - 1) & (~(Alignment - 1)); -} - -typedef struct MemoryProviderError { - umf_result_t code; -} MemoryProviderError_t; - -class Bucket; - -// Represents the allocated memory block of size 'SlabMinSize' -// Internally, it splits the memory block into chunks. The number of -// chunks depends of the size of a Bucket which created the Slab. -// Note: Bucket's methods are responsible for thread safety of Slab access, -// so no locking happens here. -class Slab { - - // Pointer to the allocated memory of SlabMinSize bytes - void *MemPtr; - - // Represents the current state of each chunk: - // if the bit is set then the chunk is allocated - // the chunk is free for allocation otherwise - std::vector Chunks; - - // Total number of allocated chunks at the moment. - size_t NumAllocated = 0; - - // The bucket which the slab belongs to - Bucket &bucket; - - using ListIter = std::list>::iterator; - - // Store iterator to the corresponding node in avail/unavail list - // to achieve O(1) removal - ListIter SlabListIter; - - // Hints where to start search for free chunk in a slab - size_t FirstFreeChunkIdx = 0; - - // Return the index of the first available chunk, SIZE_MAX otherwise - size_t FindFirstAvailableChunkIdx() const; - - // Register/Unregister the slab in the global slab address map. - void regSlab(Slab &); - void unregSlab(Slab &); - static void regSlabByAddr(void *, Slab &); - static void unregSlabByAddr(void *, Slab &); - - public: - Slab(Bucket &); - ~Slab(); - - void setIterator(ListIter It) { SlabListIter = It; } - ListIter getIterator() const { return SlabListIter; } - - size_t getNumAllocated() const { return NumAllocated; } - - // Get pointer to allocation that is one piece of this slab. - void *getChunk(); - - // Get pointer to allocation that is this entire slab. - void *getSlab(); - - void *getPtr() const { return MemPtr; } - void *getEnd() const; - - size_t getChunkSize() const; - size_t getNumChunks() const { return Chunks.size(); } - - bool hasAvail(); - - Bucket &getBucket(); - const Bucket &getBucket() const; - - void freeChunk(void *Ptr); -}; - -class Bucket { - const size_t Size; - - // List of slabs which have at least 1 available chunk. - std::list> AvailableSlabs; - - // List of slabs with 0 available chunk. - std::list> UnavailableSlabs; - - // Protects the bucket and all the corresponding slabs - std::mutex BucketLock; - - // Reference to the allocator context, used access memory allocation - // routines, slab map and etc. - DisjointPool::AllocImpl &OwnAllocCtx; - - // For buckets used in chunked mode, a counter of slabs in the pool. - // For allocations that use an entire slab each, the entries in the Available - // list are entries in the pool.Each slab is available for a new - // allocation.The size of the Available list is the size of the pool. - // For allocations that use slabs in chunked mode, slabs will be in the - // Available list if any one or more of their chunks is free.The entire slab - // is not necessarily free, just some chunks in the slab are free. To - // implement pooling we will allow one slab in the Available list to be - // entirely empty. Normally such a slab would have been freed. But - // now we don't, and treat this slab as "in the pool". - // When a slab becomes entirely free we have to decide whether to return it - // to the provider or keep it allocated. A simple check for size of the - // Available list is not sufficient to check whether any slab has been - // pooled yet. We would have to traverse the entire Available list and check - // if any of them is entirely free. Instead we keep a counter of entirely - // empty slabs within the Available list to speed up the process of checking - // if a slab in this bucket is already pooled. - size_t chunkedSlabsInPool; - - // Statistics - size_t allocPoolCount; - size_t freeCount; - size_t currSlabsInUse; - size_t currSlabsInPool; - size_t maxSlabsInPool; - - public: - // Statistics - size_t allocCount; - size_t maxSlabsInUse; - - Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx) - : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), - allocPoolCount(0), freeCount(0), currSlabsInUse(0), - currSlabsInPool(0), maxSlabsInPool(0), allocCount(0), - maxSlabsInUse(0) {} - - // Get pointer to allocation that is one piece of an available slab in this - // bucket. - void *getChunk(bool &FromPool); - - // Get pointer to allocation that is a full slab in this bucket. - void *getSlab(bool &FromPool); - - // Return the allocation size of this bucket. - size_t getSize() const { return Size; } - - // Free an allocation that is one piece of a slab in this bucket. - void freeChunk(void *Ptr, Slab &Slab, bool &ToPool); - - // Free an allocation that is a full slab in this bucket. - void freeSlab(Slab &Slab, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle(); - - DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; } - - // Check whether an allocation to be freed can be placed in the pool. - bool CanPool(bool &ToPool); - - // The minimum allocation size for any slab. - size_t SlabMinSize(); - - // The allocation size for a slab in this bucket. - size_t SlabAllocSize(); - - // The minimum size of a chunk from this bucket's slabs. - size_t ChunkCutOff(); - - // The number of slabs in this bucket that can be in the pool. - size_t Capacity(); - - // The maximum allocation size subject to pooling. - size_t MaxPoolableSize(); - - // Update allocation count - void countAlloc(bool FromPool); - - // Update free count - void countFree(); - - // Update statistics of Available/Unavailable - void updateStats(int InUse, int InPool); - - // Print bucket statistics - void printStats(bool &TitlePrinted, const std::string &Label); - - private: - void onFreeChunk(Slab &, bool &ToPool); - - // Update statistics of pool usage, and indicate that an allocation was made - // from the pool. - void decrementPool(bool &FromPool); - - // Get a slab to be used for chunked allocations. - decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); - - // Get a slab that will be used as a whole for a single allocation. - decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); -}; - -class DisjointPool::AllocImpl { - // It's important for the map to be destroyed last after buckets and their - // slabs This is because slab's destructor removes the object from the map. - std::unordered_multimap KnownSlabs; - std::shared_timed_mutex KnownSlabsMapLock; - - // Handle to the memory provider - umf_memory_provider_handle_t MemHandle; - - // Store as unique_ptrs since Bucket is not Movable(because of std::mutex) - std::vector> Buckets; - - // Configuration for this instance - umf_disjoint_pool_params_t params; - - umf_disjoint_pool_shared_limits_t DefaultSharedLimits = { - (std::numeric_limits::max)(), 0}; - - // Used in algorithm for finding buckets - std::size_t MinBucketSizeExp; - - // Coarse-grain allocation min alignment - size_t ProviderMinPageSize; - - public: - AllocImpl(umf_memory_provider_handle_t hProvider, - umf_disjoint_pool_params_t *params) - : MemHandle{hProvider}, params(*params) { - - VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0); - - // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. - // Powers of 2 and the value halfway between the powers of 2. - auto Size1 = this->params.MinBucketSize; - // MinBucketSize cannot be larger than CutOff. - Size1 = std::min(Size1, CutOff); - // Buckets sized smaller than the bucket default size- 8 aren't needed. - Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); - // Calculate the exponent for MinBucketSize used for finding buckets. - MinBucketSizeExp = (size_t)log2Utils(Size1); - auto Size2 = Size1 + Size1 / 2; - for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { - Buckets.push_back(std::make_unique(Size1, *this)); - Buckets.push_back(std::make_unique(Size2, *this)); - } - Buckets.push_back(std::make_unique(CutOff, *this)); - - auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr, - &ProviderMinPageSize); - if (ret != UMF_RESULT_SUCCESS) { - ProviderMinPageSize = 0; - } - } - - ~AllocImpl() { VALGRIND_DO_DESTROY_MEMPOOL(this); } - - void *allocate(size_t Size, size_t Alignment, bool &FromPool); - void *allocate(size_t Size, bool &FromPool); - void deallocate(void *Ptr, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle() { return MemHandle; } - - std::shared_timed_mutex &getKnownSlabsMapLock() { - return KnownSlabsMapLock; - } - std::unordered_multimap &getKnownSlabs() { - return KnownSlabs; - } - - size_t SlabMinSize() { return params.SlabMinSize; }; - - umf_disjoint_pool_params_t &getParams() { return params; } - - umf_disjoint_pool_shared_limits_t *getLimits() { - if (params.SharedLimits) { - return params.SharedLimits; - } else { - return &DefaultSharedLimits; - } - }; - - void printStats(bool &TitlePrinted, size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, const std::string &Label); - - private: - Bucket &findBucket(size_t Size); - std::size_t sizeToIdx(size_t Size); -}; - -static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, - size_t size, size_t alignment = 0) { - void *ptr; - auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } - annotate_memory_inaccessible(ptr, size); - return ptr; -} - -static void memoryProviderFree(umf_memory_provider_handle_t hProvider, - void *ptr) { - size_t size = 0; - - if (ptr) { - umf_alloc_info_t allocInfo = {NULL, 0, NULL}; - umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); - if (umf_result == UMF_RESULT_SUCCESS) { - size = allocInfo.baseSize; - } - } - - auto ret = umfMemoryProviderFree(hProvider, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } -} - -bool operator==(const Slab &Lhs, const Slab &Rhs) { - return Lhs.getPtr() == Rhs.getPtr(); -} - -std::ostream &operator<<(std::ostream &Os, const Slab &Slab) { - Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", " - << Slab.getBucket().getSize() << ">"; - return Os; -} - -Slab::Slab(Bucket &Bkt) - : // In case bucket size is not a multiple of SlabMinSize, we would have - // some padding at the end of the slab. - Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, - bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { - auto SlabSize = Bkt.SlabAllocSize(); - MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize); - regSlab(*this); -} - -Slab::~Slab() { - try { - unregSlab(*this); - } catch (std::exception &e) { - LOG_ERR("DisjointPool: unexpected error: %s", e.what()); - } - - try { - memoryProviderFree(bucket.getMemHandle(), MemPtr); - } catch (MemoryProviderError &e) { - LOG_ERR("DisjointPool: error from memory provider: %d", e.code); - - if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { - const char *message = ""; - int error = 0; - - try { - umfMemoryProviderGetLastNativeError( - umfGetLastFailedMemoryProvider(), &message, &error); - LOG_ERR("Native error msg: %s, native error code: %d", message, - error); - } catch (...) { - // ignore any additional errors from logger - } - } - } -} - -// Return the index of the first available chunk, SIZE_MAX otherwise -size_t Slab::FindFirstAvailableChunkIdx() const { - // Use the first free chunk index as a hint for the search. - auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(), - [](auto x) { return !x; }); - if (It != Chunks.end()) { - return It - Chunks.begin(); - } - - return std::numeric_limits::max(); -} - -void *Slab::getChunk() { - // assert(NumAllocated != Chunks.size()); - - const size_t ChunkIdx = FindFirstAvailableChunkIdx(); - // Free chunk must exist, otherwise we would have allocated another slab - assert(ChunkIdx != (std::numeric_limits::max())); - - void *const FreeChunk = - (static_cast(getPtr())) + ChunkIdx * getChunkSize(); - Chunks[ChunkIdx] = true; - NumAllocated += 1; - - // Use the found index as the next hint - FirstFreeChunkIdx = ChunkIdx; - - return FreeChunk; -} - -void *Slab::getSlab() { return getPtr(); } - -Bucket &Slab::getBucket() { return bucket; } -const Bucket &Slab::getBucket() const { return bucket; } - -size_t Slab::getChunkSize() const { return bucket.getSize(); } - -void Slab::regSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - Map.insert({Addr, Slab}); -} - -void Slab::unregSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - - auto Slabs = Map.equal_range(Addr); - // At least the must get the current slab from the map. - assert(Slabs.first != Slabs.second && "Slab is not found"); - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - if (It->second == Slab) { - Map.erase(It); - return; - } - } - - assert(false && "Slab is not found"); -} - -void Slab::regSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - regSlabByAddr(StartAddr, Slab); - regSlabByAddr(EndAddr, Slab); -} - -void Slab::unregSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - unregSlabByAddr(StartAddr, Slab); - unregSlabByAddr(EndAddr, Slab); -} - -void Slab::freeChunk(void *Ptr) { - // This method should be called through bucket(since we might remove the slab - // as a result), therefore all locks are done on that level. - - // Make sure that we're in the right slab - assert(Ptr >= getPtr() && Ptr < getEnd()); - - // Even if the pointer p was previously aligned, it's still inside the - // corresponding chunk, so we get the correct index here. - auto ChunkIdx = (static_cast(Ptr) - static_cast(MemPtr)) / - getChunkSize(); - - // Make sure that the chunk was allocated - assert(Chunks[ChunkIdx] && "double free detected"); - - Chunks[ChunkIdx] = false; - NumAllocated -= 1; - - if (ChunkIdx < FirstFreeChunkIdx) { - FirstFreeChunkIdx = ChunkIdx; - } -} - -void *Slab::getEnd() const { - return static_cast(getPtr()) + bucket.SlabMinSize(); -} - -bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } - -// If a slab was available in the pool then note that the current pooled -// size has reduced by the size of a slab in this bucket. -void Bucket::decrementPool(bool &FromPool) { - FromPool = true; - updateStats(1, -1); - OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize(); -} - -auto Bucket::getAvailFullSlab(bool &FromPool) - -> decltype(AvailableSlabs.begin()) { - // Return a slab that will be used for a single allocation. - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - FromPool = false; - updateStats(1, 0); - } else { - decrementPool(FromPool); - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getSlab(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailFullSlab(FromPool); - auto *FreeSlab = (*SlabIt)->getSlab(); - auto It = - UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - return FreeSlab; -} - -void Bucket::freeSlab(Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - if (CanPool(ToPool)) { - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - (*It)->setIterator(It); - } else { - UnavailableSlabs.erase(SlabIter); - } -} - -auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { - - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - - updateStats(1, 0); - FromPool = false; - } else { - if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { - // If this was an empty slab, it was in the pool. - // Now it is no longer in the pool, so update count. - --chunkedSlabsInPool; - decrementPool(FromPool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - FromPool = true; - } - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getChunk(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailSlab(FromPool); - auto *FreeChunk = (*SlabIt)->getChunk(); - - // If the slab is full, move it to unavailable slabs and update its iterator - if (!((*SlabIt)->hasAvail())) { - auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), - std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - } - - return FreeChunk; -} - -void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - - Slab.freeChunk(Ptr); - - onFreeChunk(Slab, ToPool); -} - -// The lock must be acquired before calling this method -void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { - ToPool = true; - - // In case if the slab was previously full and now has 1 available - // chunk, it should be moved to the list of available slabs - if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) { - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - - (*It)->setIterator(It); - } - - // Check if slab is empty, and pool it if we can. - if (Slab.getNumAllocated() == 0) { - // The slab is now empty. - // If pool has capacity then put the slab in the pool. - // The ToPool parameter indicates whether the Slab will be put in the - // pool or freed. - if (!CanPool(ToPool)) { - // Note: since the slab is stored as unique_ptr, just remove it from - // the list to destroy the object. - auto It = Slab.getIterator(); - assert(It != AvailableSlabs.end()); - AvailableSlabs.erase(It); - } - } -} - -bool Bucket::CanPool(bool &ToPool) { - size_t NewFreeSlabsInBucket; - // Check if this bucket is used in chunked form or as full slabs. - bool chunkedBucket = getSize() <= ChunkCutOff(); - if (chunkedBucket) { - NewFreeSlabsInBucket = chunkedSlabsInPool + 1; - } else { - NewFreeSlabsInBucket = AvailableSlabs.size() + 1; - } - if (Capacity() >= NewFreeSlabsInBucket) { - size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize; - while (true) { - size_t NewPoolSize = PoolSize + SlabAllocSize(); - - if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) { - break; - } - - if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong( - PoolSize, NewPoolSize)) { - if (chunkedBucket) { - ++chunkedSlabsInPool; - } - - updateStats(-1, 1); - ToPool = true; - return true; - } - } - } - - updateStats(-1, 0); - ToPool = false; - return false; -} - -umf_memory_provider_handle_t Bucket::getMemHandle() { - return OwnAllocCtx.getMemHandle(); -} - -size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; } - -size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } - -size_t Bucket::Capacity() { - // For buckets used in chunked mode, just one slab in pool is sufficient. - // For larger buckets, the capacity could be more and is adjustable. - if (getSize() <= ChunkCutOff()) { - return 1; - } else { - return OwnAllocCtx.getParams().Capacity; - } -} - -size_t Bucket::MaxPoolableSize() { - return OwnAllocCtx.getParams().MaxPoolableSize; -} - -size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; } - -void Bucket::countAlloc(bool FromPool) { - ++allocCount; - if (FromPool) { - ++allocPoolCount; - } -} - -void Bucket::countFree() { ++freeCount; } - -void Bucket::updateStats(int InUse, int InPool) { - if (OwnAllocCtx.getParams().PoolTrace == 0) { - return; - } - currSlabsInUse += InUse; - maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); - currSlabsInPool += InPool; - maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); - // Increment or decrement current pool sizes based on whether - // slab was added to or removed from pool. - OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize(); -} - -void Bucket::printStats(bool &TitlePrinted, const std::string &Label) { - if (allocCount) { - if (!TitlePrinted) { - std::cout << Label << " memory statistics\n"; - std::cout << std::setw(14) << "Bucket Size" << std::setw(12) - << "Allocs" << std::setw(12) << "Frees" << std::setw(18) - << "Allocs from Pool" << std::setw(20) - << "Peak Slabs in Use" << std::setw(21) - << "Peak Slabs in Pool" << std::endl; - TitlePrinted = true; - } - std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount - << std::setw(12) << freeCount << std::setw(18) - << allocPoolCount << std::setw(20) << maxSlabsInUse - << std::setw(21) << maxSlabsInPool << std::endl; - } -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - FromPool = false; - if (Size > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(Size); - - if (Size > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size); - annotate_memory_undefined(Ptr, Bucket.getSize()); - - return Ptr; -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment, - bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - if (Alignment <= 1) { - return allocate(Size, FromPool); - } - - size_t AlignedSize; - if (Alignment <= ProviderMinPageSize) { - // This allocation will be served from a Bucket which size is multiple - // of Alignment and Slab address is aligned to ProviderMinPageSize - // so the address will be properly aligned. - AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment; - } else { - // Slabs are only aligned to ProviderMinPageSize, we need to compensate - // for that in case the allocation is within pooling limit. - // TODO: consider creating properly-aligned Slabs on demand - AlignedSize = Size + Alignment - 1; - } - - // Check if requested allocation size is within pooling limit. - // If not, just request aligned pointer from the system. - FromPool = false; - if (AlignedSize > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(AlignedSize); - - if (AlignedSize > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size); - annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size); - return AlignPtrUp(Ptr, Alignment); -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) { - assert(Size <= CutOff && "Unexpected size"); - assert(Size > 0 && "Unexpected size"); - - size_t MinBucketSize = (size_t)1 << MinBucketSizeExp; - if (Size < MinBucketSize) { - return 0; - } - - // Get the position of the leftmost set bit. - size_t position = getLeftmostSetBitPos(Size); - - auto isPowerOf2 = 0 == (Size & (Size - 1)); - auto largerThanHalfwayBetweenPowersOf2 = - !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1))); - auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) + - (int)largerThanHalfwayBetweenPowersOf2; - - return index; -} - -Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { - auto calculatedIdx = sizeToIdx(Size); - assert((*(Buckets[calculatedIdx])).getSize() >= Size); - if (calculatedIdx > 0) { - assert((*(Buckets[calculatedIdx - 1])).getSize() < Size); - } - - return *(Buckets[calculatedIdx]); -} - -void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) { - auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize()); - - // Lock the map on read - std::shared_lock Lk(getKnownSlabsMapLock()); - - ToPool = false; - auto Slabs = getKnownSlabs().equal_range(SlabPtr); - if (Slabs.first == Slabs.second) { - Lk.unlock(); - memoryProviderFree(getMemHandle(), Ptr); - return; - } - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - // The slab object won't be deleted until it's removed from the map which is - // protected by the lock, so it's safe to access it here. - auto &Slab = It->second; - if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) { - // Unlock the map before freeing the chunk, it may be locked on write - // there - Lk.unlock(); - auto &Bucket = Slab.getBucket(); - - if (getParams().PoolTrace > 1) { - Bucket.countFree(); - } - - VALGRIND_DO_MEMPOOL_FREE(this, Ptr); - annotate_memory_inaccessible(Ptr, Bucket.getSize()); - if (Bucket.getSize() <= Bucket.ChunkCutOff()) { - Bucket.freeChunk(Ptr, Slab, ToPool); - } else { - Bucket.freeSlab(Slab, ToPool); - } - - return; - } - } - - Lk.unlock(); - // There is a rare case when we have a pointer from system allocation next - // to some slab with an entry in the map. So we find a slab - // but the range checks fail. - memoryProviderFree(getMemHandle(), Ptr); -} - -void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, - size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, - const std::string &MTName) { - HighBucketSize = 0; - HighPeakSlabsInUse = 0; - for (auto &B : Buckets) { - (*B).printStats(TitlePrinted, MTName); - HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); - if ((*B).allocCount) { - HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); - } - } -} - -umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_t *parameters) { - if (!provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - // MinBucketSize parameter must be a power of 2 for bucket sizes - // to generate correctly. - if (!parameters->MinBucketSize || - !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - impl = std::make_unique(provider, parameters); - return UMF_RESULT_SUCCESS; -} - -void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates - // whether slab is from Pool. - bool FromPool; - auto Ptr = impl->allocate(size, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" - << Ptr << std::endl; - } - return Ptr; -} - -void *DisjointPool::calloc(size_t, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::realloc(void *, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::aligned_malloc(size_t size, size_t alignment) { - bool FromPool; - auto Ptr = impl->allocate(size, alignment, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes aligned at " << alignment << " from " - << (FromPool ? "Pool" : "Provider") << " ->" << Ptr - << std::endl; - } - return Ptr; -} - -size_t DisjointPool::malloc_usable_size(void *) { - // Not supported - return 0; -} - -umf_result_t DisjointPool::free(void *ptr) try { - bool ToPool; - impl->deallocate(ptr, ToPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Freed " << MT << " " << ptr << " to " - << (ToPool ? "Pool" : "Provider") - << ", Current total pool size " - << impl->getLimits()->TotalSize.load() - << ", Current pool size for " << MT << " " - << impl->getParams().CurPoolSize << "\n"; - } - return UMF_RESULT_SUCCESS; -} catch (MemoryProviderError &e) { - return e.code; -} - -umf_result_t DisjointPool::get_last_allocation_error() { - return umf::getPoolLastStatusRef(); -} - -DisjointPool::DisjointPool() {} - -// Define destructor for use with unique_ptr -DisjointPool::~DisjointPool() { - bool TitlePrinted = false; - size_t HighBucketSize; - size_t HighPeakSlabsInUse; - if (impl->getParams().PoolTrace > 1) { - auto name = impl->getParams().Name; - try { // cannot throw in destructor - impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, - name); - if (TitlePrinted) { - std::cout << "Current Pool Size " - << impl->getLimits()->TotalSize.load() << std::endl; - std::cout << "Suggested Setting=;" - << std::string(1, (char)tolower(name[0])) - << std::string(name + 1) << ":" << HighBucketSize - << "," << HighPeakSlabsInUse << ",64K" << std::endl; - } - } catch (...) { // ignore exceptions - } - } -} - -static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = - umf::poolMakeCOps(); - -umf_memory_pool_ops_t *umfDisjointPoolOps(void) { - return &UMF_DISJOINT_POOL_OPS; -} diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index 25169f6cf..611b277df 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -138,3 +138,6 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max, *out_flags = out_f; return UMF_RESULT_SUCCESS; } + +size_t utils_max(size_t a, size_t b) { return a > b ? a : b; } +size_t utils_min(size_t a, size_t b) { return a < b ? a : b; } diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index c25fda2ab..9b226246b 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -153,6 +153,10 @@ int utils_file_open_or_create(const char *path); int utils_fallocate(int fd, long offset, long len); +size_t utils_max(size_t a, size_t b); + +size_t utils_min(size_t a, size_t b); + #ifdef __cplusplus } #endif diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 155184cc4..861c621cb 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -61,11 +61,13 @@ int utils_mutex_unlock(utils_mutex_t *mutex); void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) + static __inline unsigned char utils_lssb_index(long long value) { unsigned long ret; _BitScanForward64(&ret, value); return (unsigned char)ret; } + static __inline unsigned char utils_mssb_index(long long value) { unsigned long ret; _BitScanReverse64(&ret, value); @@ -81,15 +83,25 @@ static __inline unsigned char utils_mssb_index(long long value) { #define utils_atomic_store_release(object, desired) \ InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) + #define utils_atomic_increment(object) \ InterlockedIncrement64((LONG64 volatile *)object) + #define utils_atomic_decrement(object) \ InterlockedDecrement64((LONG64 volatile *)object) + #define utils_fetch_and_add64(ptr, value) \ InterlockedExchangeAdd64((LONG64 *)(ptr), value) -#else + +#define utils_compare_exchange(object, expected, desired) \ + InterlockedCompareExchangePointer((LONG64 volatile *)object, expected, \ + desired) + +#else // !defined(_WIN32) + #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) + #define utils_atomic_load_acquire(object, dest) \ do { \ utils_annotate_acquire((void *)object); \ @@ -103,12 +115,19 @@ static __inline unsigned char utils_mssb_index(long long value) { } while (0) #define utils_atomic_increment(object) \ - __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) + __atomic_add_fetch(object, 1, memory_order_acq_rel) + #define utils_atomic_decrement(object) \ - __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL) -#define utils_fetch_and_add64 __sync_fetch_and_add + __atomic_sub_fetch(object, 1, memory_order_acq_rel) -#endif +#define utils_fetch_and_add64(object, value) \ + __atomic_fetch_add(object, value, memory_order_acq_rel) + +#define utils_compare_exchange(object, expected, desired) \ + __atomic_compare_exchange(object, expected, desired, 0 /* strong */, \ + memory_order_acq_rel, memory_order_relaxed) + +#endif // !defined(_WIN32) #ifdef __cplusplus }