diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt
index bdd196b04..56ec58de9 100644
--- a/src/pool/CMakeLists.txt
+++ b/src/pool/CMakeLists.txt
@@ -14,7 +14,7 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT)
     add_umf_library(
         NAME disjoint_pool
         TYPE STATIC
-        SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS}
+        SRCS pool_disjoint.c ../critnib/critnib.c ${POOL_EXTRA_SRCS}
         LIBS ${POOL_EXTRA_LIBS})
 
     target_compile_definitions(disjoint_pool
diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
new file mode 100644
index 000000000..20e315656
--- /dev/null
+++ b/src/pool/pool_disjoint.c
@@ -0,0 +1,1259 @@
+/*
+ * Copyright (C) 2022-2024 Intel Corporation
+ *
+ * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+*/
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <umf/memory_pool.h>
+#include <umf/memory_pool_ops.h>
+#include <umf/memory_provider.h>
+#include <umf/pools/pool_disjoint.h>
+
+#include "critnib/critnib.h"
+#include "uthash/utlist.h"
+
+#include "base_alloc_global.h"
+#include "provider_tracking.h"
+#include "utils_common.h"
+#include "utils_concurrency.h"
+#include "utils_log.h"
+#include "utils_math.h"
+#include "utils_sanitizers.h"
+
+typedef struct bucket_t bucket_t;
+typedef struct slab_t slab_t;
+typedef struct slab_list_item_t slab_list_item_t;
+typedef struct disjoint_pool_t disjoint_pool_t;
+
+slab_t *create_slab(bucket_t *bucket);
+void destroy_slab(slab_t *slab);
+
+void *slab_get(const slab_t *slab);
+void *slab_get_end(const slab_t *slab);
+void *slab_get_chunk(slab_t *slab);
+
+bool slab_has_avail(const slab_t *slab);
+void slab_free_chunk(slab_t *slab, void *ptr);
+
+void slab_reg(slab_t *slab);
+void slab_reg_by_addr(void *addr, slab_t *slab);
+void slab_unreg(slab_t *slab);
+void slab_unreg_by_addr(void *addr, slab_t *slab);
+
+bucket_t *create_bucket(size_t sz, disjoint_pool_t *pool,
+                        umf_disjoint_pool_shared_limits_t *shared_limits);
+void destroy_bucket(bucket_t *bucket);
+
+void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool);
+bool bucket_can_pool(bucket_t *bucket, bool *to_pool);
+void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool);
+void bucket_decrement_pool(bucket_t *bucket, bool *from_pool);
+void *bucket_get_chunk(bucket_t *bucket, bool *from_pool);
+size_t bucket_chunk_cut_off(bucket_t *bucket);
+size_t bucket_capacity(bucket_t *bucket);
+void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab,
+                       bool *to_pool);
+void bucket_count_alloc(bucket_t *bucket, bool from_pool);
+
+void *bucket_get_slab(bucket_t *bucket, bool *from_pool);
+size_t bucket_slab_alloc_size(bucket_t *bucket);
+size_t bucket_slab_min_size(bucket_t *bucket);
+slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
+slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool);
+void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool);
+
+static __TLS umf_result_t TLS_last_allocation_error;
+
+// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is
+// requested. The implementation distinguishes between allocations of size
+// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger.
+// Allocation requests smaller than ChunkCutoff use chunks taken from a single
+// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation
+// size, and 8-byte allocations, only 1 in ~8000 requests results in a new
+// coarse-grain allocation. Freeing results only in a chunk of a larger
+// allocation to be marked as available and no real return to the system. An
+// allocation is returned to the system only when all chunks in the larger
+// allocation are freed by the program. Allocations larger than ChunkCutOff use
+// a separate coarse-grain allocation for each request. These are subject to
+// "pooling". That is, when such an allocation is freed by the program it is
+// retained in a pool. The pool is available for future allocations, which means
+// there are fewer actual coarse-grain allocations/deallocations.
+
+// The largest size which is allocated via the allocator.
+// Allocations with size > CutOff bypass the pool and
+// go directly to the provider.
+static size_t CutOff = (size_t)1 << 31; // 2GB
+
+// Temporary solution for disabling memory poisoning. This is needed because
+// AddressSanitizer does not support memory poisoning for GPU allocations.
+// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634
+#ifndef POISON_MEMORY
+#define POISON_MEMORY 0
+#endif
+
+/*static */ void annotate_memory_inaccessible(void *ptr, size_t size) {
+    (void)ptr;
+    (void)size;
+#if (POISON_MEMORY != 0)
+    utils_annotate_memory_inaccessible(ptr, size);
+#endif
+}
+
+/*static*/ void annotate_memory_undefined(void *ptr, size_t size) {
+    (void)ptr;
+    (void)size;
+#if (POISON_MEMORY != 0)
+    utils_annotate_memory_undefined(ptr, size);
+#endif
+}
+
+void annotate_memory_inaccessible(void *ptr, size_t size);
+void annotate_memory_undefined(void *ptr, size_t size);
+
+typedef struct slab_list_item_t slab_list_item_t;
+
+typedef struct bucket_t {
+    size_t size;
+
+    // Linked list of slabs which have at least 1 available chunk.
+    slab_list_item_t *available_slabs;
+
+    // Linked list of slabs with 0 available chunk.
+    slab_list_item_t *unavailable_slabs;
+
+    // Protects the bucket and all the corresponding slabs
+    utils_mutex_t bucket_lock;
+
+    // Reference to the allocator context, used access memory allocation
+    // routines, slab map and etc.
+    disjoint_pool_t *pool;
+
+    umf_disjoint_pool_shared_limits_t *shared_limits;
+
+    // For buckets used in chunked mode, a counter of slabs in the pool.
+    // For allocations that use an entire slab each, the entries in the Available
+    // list are entries in the pool.Each slab is available for a new
+    // allocation.The size of the Available list is the size of the pool.
+    // For allocations that use slabs in chunked mode, slabs will be in the
+    // Available list if any one or more of their chunks is free.The entire slab
+    // is not necessarily free, just some chunks in the slab are free. To
+    // implement pooling we will allow one slab in the Available list to be
+    // entirely empty. Normally such a slab would have been freed. But
+    // now we don't, and treat this slab as "in the pool".
+    // When a slab becomes entirely free we have to decide whether to return it
+    // to the provider or keep it allocated. A simple check for size of the
+    // Available list is not sufficient to check whether any slab has been
+    // pooled yet.We would have to traverse the entire Available listand check
+    // if any of them is entirely free. Instead we keep a counter of entirely
+    // empty slabs within the Available list to speed up the process of checking
+    // if a slab in this bucket is already pooled.
+    size_t chunked_slabs_in_pool;
+
+    // Statistics
+    size_t alloc_pool_count;
+    size_t free_count;
+    size_t curr_slabs_in_use;
+    size_t curr_slabs_in_pool;
+    size_t max_slabs_in_pool;
+    size_t alloc_count;
+    size_t max_slabs_in_use;
+} bucket_t;
+
+// Represents the allocated memory block of size 'slab_min_size'
+// Internally, it splits the memory block into chunks. The number of
+// chunks depends of the size of a Bucket which created the Slab.
+// Note: Bucket's methods are responsible for thread safety of Slab access,
+// so no locking happens here.
+typedef struct slab_t {
+    // Pointer to the allocated memory of slab_min_size bytes
+    void *mem_ptr;
+    size_t slab_size;
+
+    // Represents the current state of each chunk: if the bit is set then the
+    // chunk is allocated, and if the chunk is free for allocation otherwise
+    bool *chunks;
+    size_t num_chunks;
+
+    // Total number of allocated chunks at the moment.
+    size_t num_allocated;
+
+    // The bucket which the slab belongs to
+    bucket_t *bucket;
+
+    // Hints where to start search for free chunk in a slab
+    size_t first_free_chunk_idx;
+
+    // Store iterator to the corresponding node in avail/unavail list
+    // to achieve O(1) removal
+    slab_list_item_t *iter;
+} slab_t;
+
+typedef struct slab_list_item_t {
+    slab_t *val;
+    struct slab_list_item_t *prev, *next;
+} slab_list_item_t;
+
+typedef struct umf_disjoint_pool_shared_limits_t {
+    size_t max_size;
+    size_t total_size; // requires atomic access
+} umf_disjoint_pool_shared_limits_t;
+
+umf_disjoint_pool_shared_limits_t *
+umfDisjointPoolSharedLimitsCreate(size_t max_size) {
+    umf_disjoint_pool_shared_limits_t *ptr =
+        umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t));
+    ptr->max_size = max_size;
+    ptr->total_size = 0;
+    return ptr;
+}
+
+void umfDisjointPoolSharedLimitsDestroy(
+    umf_disjoint_pool_shared_limits_t *limits) {
+    umf_ba_global_free(limits);
+}
+
+typedef struct disjoint_pool_t {
+    // It's important for the map to be destroyed last after buckets and their
+    // slabs This is because slab's destructor removes the object from the map.
+    critnib *known_slabs; // (void *, slab_t *)
+
+    // TODO: prev std::shared_timed_mutex - ok?
+    utils_mutex_t known_slabs_map_lock;
+
+    // Handle to the memory provider
+    umf_memory_provider_handle_t provider;
+
+    // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
+    bucket_t **buckets;
+    size_t buckets_num;
+
+    // Configuration for this instance
+    umf_disjoint_pool_params_t params;
+
+    umf_disjoint_pool_shared_limits_t *default_shared_limits;
+
+    // Used in algorithm for finding buckets
+    size_t min_bucket_size_exp;
+
+    // Coarse-grain allocation min alignment
+    size_t provider_min_page_size;
+} disjoint_pool_t;
+
+slab_t *create_slab(bucket_t *bucket) {
+    // In case bucket size is not a multiple of SlabMinSize, we would have
+    // some padding at the end of the slab.
+    slab_t *slab = umf_ba_global_alloc(sizeof(slab_t));
+
+    // TODO check res and errors here and everywhere
+    // TODO use logger
+    slab->num_allocated = 0;
+    slab->first_free_chunk_idx = 0;
+    slab->bucket = bucket;
+
+    slab->iter =
+        (slab_list_item_t *)umf_ba_global_alloc(sizeof(slab_list_item_t));
+    slab->iter->val = slab;
+    slab->iter->prev = slab->iter->next = NULL;
+
+    slab->num_chunks = bucket_slab_min_size(bucket) / bucket->size;
+    slab->chunks = umf_ba_global_alloc(sizeof(bool) * slab->num_chunks);
+    memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks);
+
+    slab->slab_size = bucket_slab_alloc_size(bucket);
+
+    // NOTE: originally slabs memory were allocated without alignment
+    // with this registering a slab is simpler and doesn't require multimap
+    umf_memory_provider_handle_t provider = bucket->pool->provider;
+    umf_result_t res =
+        umfMemoryProviderAlloc(provider, slab->slab_size,
+                               bucket_slab_min_size(bucket), &slab->mem_ptr);
+
+    if (res == UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
+        destroy_slab(slab);
+        return NULL;
+    }
+
+    annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size);
+    fprintf(stderr, "[DP create_slab] bucket: %p, slab_size: %zu\n",
+            (void *)bucket, slab->slab_size);
+
+    return slab;
+}
+
+void destroy_slab(slab_t *slab) {
+    fprintf(stderr, "[DP destroy_slab] bucket: %p, slab_size: %zu\n",
+            (void *)slab->bucket, slab->slab_size);
+
+    umf_memory_provider_handle_t provider = slab->bucket->pool->provider;
+    umf_result_t res =
+        umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size);
+    assert(res == UMF_RESULT_SUCCESS);
+    (void)res;
+
+    umf_ba_global_free(slab->chunks);
+    umf_ba_global_free(slab->iter);
+    umf_ba_global_free(slab);
+}
+
+// Return the index of the first available chunk, SIZE_MAX otherwise
+size_t slab_find_first_available_chunk_idx(const slab_t *slab) {
+    // Use the first free chunk index as a hint for the search.
+    bool *chunk = slab->chunks + sizeof(bool) * slab->first_free_chunk_idx;
+    while (chunk != slab->chunks + sizeof(bool) * slab->num_chunks) {
+        // false means not used
+        if (*chunk == false) {
+            size_t idx = (chunk - slab->chunks) / sizeof(bool);
+            fprintf(stderr,
+                    "[DP slab_find_first_available_chunk_idx] idx: %zu\n", idx);
+            return idx;
+        }
+        chunk++;
+    }
+
+    fprintf(stderr, "[DP slab_find_first_available_chunk_idx] idx: SIZE_MAX\n");
+    return SIZE_MAX;
+}
+
+void *slab_get_chunk(slab_t *slab) {
+    // assert(slab->num_allocated != slab->num_chunks);
+
+    const size_t chunk_idx = slab_find_first_available_chunk_idx(slab);
+    // Free chunk must exist, otherwise we would have allocated another slab
+    assert(chunk_idx != SIZE_MAX);
+
+    void *free_chunk =
+        (uint8_t *)slab->mem_ptr + chunk_idx * slab->bucket->size;
+    // mark as used
+    slab->chunks[chunk_idx] = true;
+    slab->num_allocated += 1;
+
+    // Use the found index as the next hint
+    slab->first_free_chunk_idx = chunk_idx;
+
+    fprintf(stderr, "[DP slab_get_chunk] num_allocated: %zu\n",
+            slab->num_allocated);
+
+    return free_chunk;
+}
+
+// TODO remove? why need getter/setter?
+void *slab_get(const slab_t *slab) { return slab->mem_ptr; }
+void *slab_get_end(const slab_t *slab) {
+    return (uint8_t *)slab->mem_ptr + bucket_slab_min_size(slab->bucket);
+}
+
+void slab_free_chunk(slab_t *slab, void *ptr) {
+    // This method should be called through bucket(since we might remove the
+    // slab as a result), therefore all locks are done on that level.
+
+    // Make sure that we're in the right slab
+    assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab));
+
+    // Even if the pointer p was previously aligned, it's still inside the
+    // corresponding chunk, so we get the correct index here.
+    size_t chunk_idx =
+        ((uint8_t *)ptr - (uint8_t *)slab->mem_ptr) / slab->bucket->size;
+
+    // Make sure that the chunk was allocated
+    assert(slab->chunks[chunk_idx] && "double free detected");
+    slab->chunks[chunk_idx] = false;
+    slab->num_allocated -= 1;
+
+    if (chunk_idx < slab->first_free_chunk_idx) {
+        slab->first_free_chunk_idx = chunk_idx;
+    }
+
+    fprintf(stderr,
+            "[DP slab_free_chunk] chunk_idx: %zu, num_allocated: %zu, "
+            "first_free_chunk_idx: %zu\n",
+            chunk_idx, slab->num_allocated, slab->first_free_chunk_idx);
+}
+
+bool slab_has_avail(const slab_t *slab) {
+    return slab->num_allocated != slab->num_chunks;
+}
+
+void slab_reg(slab_t *slab) {
+    bucket_t *bucket = slab->bucket;
+    // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size
+    // I also decr end_addr by 1
+    void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
+                                          bucket_slab_min_size(bucket));
+    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1;
+
+    fprintf(stderr, "[DP slab_reg] slab: %p, start: %p, end %p\n", (void *)slab,
+            start_addr, end_addr);
+
+    slab_reg_by_addr(start_addr, slab);
+    slab_reg_by_addr(end_addr, slab);
+}
+
+void slab_unreg(slab_t *slab) {
+    bucket_t *bucket = slab->bucket;
+    // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size
+    // I also decr end_addr by 1
+    void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
+                                          bucket_slab_min_size(bucket));
+    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1;
+
+    fprintf(stderr, "[DP slab_unreg] slab: %p, start: %p, end %p\n",
+            (void *)slab, start_addr, end_addr);
+
+    slab_unreg_by_addr(start_addr, slab);
+    slab_unreg_by_addr(end_addr, slab);
+}
+
+bucket_t *create_bucket(size_t Sz, disjoint_pool_t *pool,
+                        umf_disjoint_pool_shared_limits_t *shared_limits) {
+
+    bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t));
+
+    bucket->size = Sz;
+    bucket->pool = pool;
+    bucket->available_slabs = NULL;
+    bucket->unavailable_slabs = NULL;
+    bucket->chunked_slabs_in_pool = 0;
+    bucket->alloc_pool_count = 0;
+    bucket->free_count = 0;
+    bucket->curr_slabs_in_use = 0;
+    bucket->curr_slabs_in_pool = 0;
+    bucket->max_slabs_in_pool = 0;
+    bucket->alloc_count = 0;
+    bucket->max_slabs_in_use = 0;
+    bucket->shared_limits = shared_limits;
+
+    utils_mutex_init(&bucket->bucket_lock);
+
+    return bucket;
+}
+
+void destroy_bucket(bucket_t *bucket) {
+    // use an extra tmp to store the next iterator before destroying the slab
+    slab_list_item_t *it = NULL, *tmp = NULL;
+    LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { destroy_slab(it->val); }
+    LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) {
+        destroy_slab(it->val);
+    }
+
+    utils_mutex_destroy_not_free(&bucket->bucket_lock);
+    umf_ba_global_free(bucket);
+}
+
+// The lock must be acquired before calling this method
+void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool) {
+    *to_pool = true;
+
+    // In case if the slab was previously full and now has 1 available
+    // chunk, it should be moved to the list of available slabs
+    if (slab->num_allocated == (slab->num_chunks - 1)) {
+        slab_list_item_t *slab_it = slab->iter;
+        assert(slab_it->val != NULL);
+        DL_DELETE(bucket->unavailable_slabs, slab_it);
+        DL_PREPEND(bucket->available_slabs, slab_it);
+    }
+
+    // Check if slab is empty, and pool it if we can.
+    if (slab->num_allocated == 0) {
+        // The slab is now empty.
+        // If pool has capacity then put the slab in the pool.
+        // The ToPool parameter indicates whether the Slab will be put in the
+        // pool or freed.
+        if (!bucket_can_pool(bucket, to_pool)) {
+            // Note: since the slab is stored as unique_ptr, just remove it from
+            // the list to destroy the object.
+            slab_list_item_t *slab_it = slab->iter;
+            assert(slab_it->val != NULL);
+            slab_unreg(slab_it->val);
+            DL_DELETE(bucket->available_slabs, slab_it);
+            destroy_slab(slab_it->val);
+        }
+    }
+}
+
+void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab,
+                       bool *to_pool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_free_chunk(slab, ptr);
+    bucket_on_free_chunk(bucket, slab, to_pool);
+
+    utils_mutex_unlock(&bucket->bucket_lock);
+}
+
+void bucket_count_alloc(bucket_t *bucket, bool from_pool) {
+    ++bucket->alloc_count;
+    if (from_pool) {
+        ++bucket->alloc_pool_count;
+    }
+}
+
+void *bucket_get_chunk(bucket_t *bucket, bool *from_pool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool);
+    if (slab_it == NULL) {
+        utils_mutex_unlock(&bucket->bucket_lock);
+        return NULL;
+    }
+
+    void *free_chunk = slab_get_chunk(slab_it->val);
+
+    // If the slab is full, move it to unavailable slabs and update its iterator
+    if (!(slab_has_avail(slab_it->val))) {
+        DL_DELETE(bucket->available_slabs, slab_it);
+        DL_PREPEND(bucket->unavailable_slabs, slab_it);
+    }
+
+    utils_mutex_unlock(&bucket->bucket_lock);
+    return free_chunk;
+}
+
+size_t bucket_chunk_cut_off(bucket_t *bucket) {
+    return bucket_slab_min_size(bucket) / 2;
+}
+
+size_t bucket_slab_alloc_size(bucket_t *bucket) {
+    return utils_max(bucket->size, bucket_slab_min_size(bucket));
+}
+
+size_t bucket_slab_min_size(bucket_t *bucket) {
+    return bucket->pool->params.SlabMinSize;
+}
+
+slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket,
+                                             bool *from_pool) {
+    // Return a slab that will be used for a single allocation.
+    if (bucket->available_slabs == NULL) {
+        slab_t *slab = create_slab(bucket);
+        if (slab == NULL) {
+            //assert(0);
+            return NULL;
+        }
+
+        slab_reg(slab);
+        DL_PREPEND(bucket->available_slabs, slab->iter);
+        *from_pool = false;
+        bucket_update_stats(bucket, 1, 0);
+    } else {
+        bucket_decrement_pool(bucket, from_pool);
+    }
+
+    return bucket->available_slabs;
+}
+
+void *bucket_get_slab(bucket_t *bucket, bool *from_pool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool);
+    if (slab_it == NULL) {
+        utils_mutex_unlock(&bucket->bucket_lock);
+        return NULL;
+    }
+    slab_t *slab = slab_it->val;
+    void *ptr = slab_get(slab);
+
+    DL_DELETE(bucket->available_slabs, slab_it);
+    slab_it->prev = NULL;
+    DL_PREPEND(bucket->unavailable_slabs, slab_it);
+
+    utils_mutex_unlock(&bucket->bucket_lock);
+    return ptr;
+}
+
+void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_list_item_t *slab_it = slab->iter;
+    assert(slab_it->val != NULL);
+    if (bucket_can_pool(bucket, to_pool)) {
+        DL_DELETE(bucket->unavailable_slabs, slab_it);
+        slab_it->prev = NULL;
+        DL_PREPEND(bucket->available_slabs, slab_it);
+    } else {
+        slab_unreg(slab_it->val);
+        DL_DELETE(bucket->unavailable_slabs, slab_it);
+        destroy_slab(slab_it->val);
+    }
+    utils_mutex_unlock(&bucket->bucket_lock);
+}
+
+slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) {
+    if (bucket->available_slabs == NULL) {
+        slab_t *slab = create_slab(bucket);
+        if (slab == NULL) {
+            // TODO log
+            // TODO replace asserts
+            return NULL;
+        }
+
+        slab_reg(slab);
+        DL_PREPEND(bucket->available_slabs, slab->iter);
+        bucket_update_stats(bucket, 1, 0);
+        *from_pool = false;
+    } else {
+        slab_t *slab = bucket->available_slabs->val;
+        if (slab->num_allocated == 0) {
+            // If this was an empty slab, it was in the pool.
+            // Now it is no longer in the pool, so update count.
+            --bucket->chunked_slabs_in_pool;
+            bucket_decrement_pool(bucket, from_pool);
+        } else {
+            // Allocation from existing slab is treated as from pool for statistics.
+            *from_pool = true;
+        }
+    }
+
+    return bucket->available_slabs;
+}
+
+size_t bucket_capacity(bucket_t *bucket) {
+    // For buckets used in chunked mode, just one slab in pool is sufficient.
+    // For larger buckets, the capacity could be more and is adjustable.
+    if (bucket->size <= bucket_chunk_cut_off(bucket)) {
+        return 1;
+    } else {
+        return bucket->pool->params.Capacity;
+    }
+}
+
+void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) {
+    if (bucket->pool->params.PoolTrace == 0) {
+        return;
+    }
+
+    bucket->curr_slabs_in_use += in_use;
+    bucket->max_slabs_in_use =
+        utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use);
+    bucket->curr_slabs_in_pool += in_pool;
+    bucket->max_slabs_in_pool =
+        utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool);
+
+    // Increment or decrement current pool sizes based on whether
+    // slab was added to or removed from pool.
+    bucket->pool->params.CurPoolSize +=
+        in_pool * bucket_slab_alloc_size(bucket);
+}
+
+// If a slab was available in the pool then note that the current pooled
+// size has reduced by the size of a slab in this bucket.
+void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) {
+    *from_pool = true;
+    bucket_update_stats(bucket, 1, -1);
+    utils_fetch_and_add64(&bucket->shared_limits->total_size,
+                          -(long long)bucket_slab_alloc_size(bucket));
+}
+
+bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
+    size_t new_free_slabs_in_bucket;
+
+    // Check if this bucket is used in chunked form or as full slabs.
+    bool chunkedBucket = bucket->size <= bucket_chunk_cut_off(bucket);
+    if (chunkedBucket) {
+        new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1;
+    } else {
+        // TODO optimize
+        size_t avail_num = 0;
+        slab_list_item_t *it = NULL;
+        DL_FOREACH(bucket->available_slabs, it) { avail_num++; }
+        new_free_slabs_in_bucket = avail_num + 1;
+    }
+
+    if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) {
+        size_t pool_size = 0;
+        utils_atomic_load_acquire(&bucket->shared_limits->total_size,
+                                  &pool_size);
+        while (true) {
+            size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket);
+
+            if (bucket->shared_limits->max_size < new_pool_size) {
+                break;
+            }
+
+// TODO!!!
+#ifdef _WIN32
+            if (bucket->shared_limits->total_size != new_pool_size) {
+                bucket->shared_limits->total_size = new_pool_size;
+#else
+            if (utils_compare_exchange(&bucket->shared_limits->total_size,
+                                       &pool_size, &new_pool_size)) {
+#endif
+                if (chunkedBucket) {
+                    ++bucket->chunked_slabs_in_pool;
+                }
+
+                bucket_update_stats(bucket, -1, 1);
+                *to_pool = true;
+                return true;
+            }
+        }
+    }
+
+    bucket_update_stats(bucket, -1, 0);
+    *to_pool = false;
+    return false;
+}
+
+utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
+    return &bucket->pool->known_slabs_map_lock;
+}
+
+void slab_reg_by_addr(void *addr, slab_t *slab) {
+    bucket_t *bucket = slab->bucket;
+    disjoint_pool_t *pool = bucket->pool;
+    utils_mutex_t *lock = &pool->known_slabs_map_lock;
+    critnib *slabs = pool->known_slabs;
+
+    utils_mutex_lock(lock);
+
+    // TODO multimap?
+    slab_t *t = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
+    assert(t == NULL);
+    (void)t;
+
+    fprintf(stderr, "[DP slab_reg_by_addr] addr: %p, slab: %p\n", addr,
+            (void *)slab);
+    critnib_insert(slabs, (uintptr_t)addr, slab, 0);
+
+    utils_mutex_unlock(lock);
+}
+
+void slab_unreg_by_addr(void *addr, slab_t *slab) {
+    bucket_t *bucket = slab->bucket;
+    disjoint_pool_t *pool = bucket->pool;
+    utils_mutex_t *lock = &pool->known_slabs_map_lock;
+    critnib *slabs = pool->known_slabs;
+
+    utils_mutex_lock(lock);
+
+    // debug only
+    // assume single-value per key
+    slab_t *known_slab = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
+    assert(known_slab != NULL && "Slab is not found");
+    assert(slab == known_slab);
+    (void)known_slab;
+
+    fprintf(stderr, "[DP slab_unreg_by_addr] addr: %p, slab: %p\n", addr,
+            (void *)slab);
+    critnib_remove(slabs, (uintptr_t)addr);
+
+    utils_mutex_unlock(lock);
+}
+
+size_t AllocImpl_sizeToIdx(disjoint_pool_t *pool, size_t size) {
+    assert(size <= CutOff && "Unexpected size");
+    assert(size > 0 && "Unexpected size");
+
+    size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp;
+    if (size < min_bucket_size) {
+        return 0;
+    }
+
+    // Get the position of the leftmost set bit.
+    size_t position = getLeftmostSetBitPos(size);
+
+    bool is_power_of_2 = 0 == (size & (size - 1));
+    bool larger_than_halfway_between_powers_of_2 =
+        !is_power_of_2 &&
+        (bool)((size - 1) & ((uint64_t)(1) << (position - 1)));
+    size_t index = (position - pool->min_bucket_size_exp) * 2 +
+                   (int)(!is_power_of_2) +
+                   (int)larger_than_halfway_between_powers_of_2;
+
+    return index;
+}
+
+umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(disjoint_pool_t *pool) {
+    if (pool->params.SharedLimits) {
+        return pool->params.SharedLimits;
+    } else {
+        return pool->default_shared_limits;
+    }
+}
+
+bucket_t *AllocImpl_findBucket(disjoint_pool_t *pool, size_t Size) {
+    size_t calculatedIdx = AllocImpl_sizeToIdx(pool, Size);
+    bucket_t *bucket = pool->buckets[calculatedIdx];
+    assert(bucket->size >= Size);
+    (void)bucket;
+
+    if (calculatedIdx > 0) {
+        bucket_t *bucket_prev = pool->buckets[calculatedIdx - 1];
+        assert(bucket_prev->size < Size);
+        (void)bucket_prev;
+    }
+
+    return pool->buckets[calculatedIdx];
+}
+
+void AllocImpl_printStats(disjoint_pool_t *pool, bool *TitlePrinted,
+                          size_t *HighBucketSize, size_t *HighPeakSlabsInUse,
+                          const char *MTName) {
+    (void)TitlePrinted; // TODO
+    (void)MTName;       // TODO
+
+    *HighBucketSize = 0;
+    *HighPeakSlabsInUse = 0;
+    for (size_t i = 0; i < pool->buckets_num; i++) {
+        // TODO
+        //(*B).printStats(TitlePrinted, MTName);
+        bucket_t *bucket = pool->buckets[i];
+        *HighPeakSlabsInUse =
+            utils_max(bucket->max_slabs_in_use, *HighPeakSlabsInUse);
+        if (bucket->alloc_count) {
+            *HighBucketSize =
+                utils_max(bucket_slab_alloc_size(bucket), *HighBucketSize);
+        }
+    }
+}
+
+static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
+                                 size_t size, size_t alignment) {
+    void *ptr;
+    umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
+    if (ret != UMF_RESULT_SUCCESS) {
+        TLS_last_allocation_error = ret;
+        return NULL;
+    }
+    annotate_memory_inaccessible(ptr, size);
+    return ptr;
+}
+
+static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider,
+                                       void *ptr) {
+    size_t size = 0;
+
+    if (ptr) {
+        umf_alloc_info_t allocInfo = {NULL, 0, NULL};
+        umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo);
+        if (umf_result == UMF_RESULT_SUCCESS) {
+            size = allocInfo.baseSize;
+        }
+    }
+
+    umf_result_t ret = umfMemoryProviderFree(hProvider, ptr, size);
+    if (ret != UMF_RESULT_SUCCESS) {
+
+        TLS_last_allocation_error = ret;
+        // throw MemoryProviderError{ret};
+        return ret;
+    }
+    return UMF_RESULT_SUCCESS;
+}
+
+void *AllocImpl_allocate(disjoint_pool_t *pool, size_t Size, bool *FromPool) {
+    void *Ptr;
+
+    if (Size == 0) {
+        return NULL;
+    }
+
+    *FromPool = false;
+    if (Size > pool->params.MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(pool->provider, Size, 0);
+
+        if (Ptr == NULL) {
+            // TODO get code from func
+            TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+            return NULL;
+        }
+
+        annotate_memory_undefined(Ptr, Size);
+        return Ptr;
+    }
+
+    bucket_t *bucket = AllocImpl_findBucket(pool, Size);
+
+    if (Size > bucket_chunk_cut_off(bucket)) {
+        Ptr = bucket_get_slab(bucket, FromPool);
+    } else {
+        Ptr = bucket_get_chunk(bucket, FromPool);
+    }
+
+    if (Ptr == NULL) {
+        // TODO get code from func
+        TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+        return NULL;
+    }
+
+    if (pool->params.PoolTrace > 1) {
+        bucket_count_alloc(bucket, FromPool);
+    }
+
+    VALGRIND_DO_MEMPOOL_ALLOC(pool, Ptr, Size);
+    annotate_memory_undefined(Ptr, bucket->size);
+
+    return Ptr;
+}
+
+void *AllocImpl_allocate_align(disjoint_pool_t *pool, size_t Size,
+                               size_t Alignment, bool *FromPool) {
+    void *Ptr;
+
+    if (Size == 0) {
+        return NULL;
+    }
+
+    if (Alignment <= 1) {
+        return AllocImpl_allocate(pool, Size, FromPool);
+    }
+
+    size_t AlignedSize;
+    if (Alignment <= pool->provider_min_page_size) {
+        // This allocation will be served from a Bucket which size is multiple
+        // of Alignment and Slab address is aligned to provider_min_page_size
+        // so the address will be properly aligned.
+        AlignedSize = (Size > 1) ? ALIGN_UP(Size, Alignment) : Alignment;
+    } else {
+        // Slabs are only aligned to provider_min_page_size, we need to compensate
+        // for that in case the allocation is within pooling limit.
+        // TODO: consider creating properly-aligned Slabs on demand
+        AlignedSize = Size + Alignment - 1;
+    }
+
+    // Check if requested allocation size is within pooling limit.
+    // If not, just request aligned pointer from the system.
+    *FromPool = false;
+    if (AlignedSize > pool->params.MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(pool->provider, Size, Alignment);
+        assert(Ptr);
+        annotate_memory_undefined(Ptr, Size);
+        return Ptr;
+    }
+
+    bucket_t *bucket = AllocImpl_findBucket(pool, AlignedSize);
+
+    if (AlignedSize > bucket_chunk_cut_off(bucket)) {
+        Ptr = bucket_get_slab(bucket, FromPool);
+    } else {
+        Ptr = bucket_get_chunk(bucket, FromPool);
+    }
+
+    assert(Ptr);
+    if (pool->params.PoolTrace > 1) {
+        bucket_count_alloc(bucket, FromPool);
+    }
+
+    VALGRIND_DO_MEMPOOL_ALLOC(pool, ALIGN_UP((size_t)Ptr, Alignment), Size);
+    annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size);
+    return (void *)ALIGN_UP((size_t)Ptr, Alignment);
+}
+
+umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr,
+                                  bool *to_pool) {
+    if (Ptr == NULL) {
+        return UMF_RESULT_SUCCESS;
+    }
+
+    void *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, pool->params.SlabMinSize);
+
+    // Lock the map on read
+    utils_mutex_lock(&pool->known_slabs_map_lock);
+
+    *to_pool = false;
+
+    slab_t *slab = (slab_t *)critnib_get(pool->known_slabs, (uintptr_t)SlabPtr);
+    //auto Slabs = getKnownSlabs().equal_range(SlabPtr);
+    if (slab == NULL) {
+        utils_mutex_unlock(&pool->known_slabs_map_lock);
+        umf_result_t ret = memoryProviderFree(pool->provider, Ptr);
+        return ret;
+    }
+
+    // TODO - no multimap
+    // for (auto It = Slabs.first; It != Slabs.second; ++It) {
+
+    // The slab object won't be deleted until it's removed from the map which is
+    // protected by the lock, so it's safe to access it here.
+    if (Ptr >= slab_get(slab) && Ptr < slab_get_end(slab)) {
+        // Unlock the map before freeing the chunk, it may be locked on write
+        // there
+        utils_mutex_unlock(&pool->known_slabs_map_lock);
+        bucket_t *bucket = slab->bucket;
+
+        if (pool->params.PoolTrace > 1) {
+            bucket->free_count++;
+        }
+
+        VALGRIND_DO_MEMPOOL_FREE(pool, Ptr);
+        annotate_memory_inaccessible(Ptr, bucket->size);
+        if (bucket->size <= bucket_chunk_cut_off(bucket)) {
+            bucket_free_chunk(bucket, Ptr, slab, to_pool);
+        } else {
+            bucket_free_slab(bucket, slab, to_pool);
+        }
+
+        return UMF_RESULT_SUCCESS;
+    }
+    //} // for multimap
+
+    utils_mutex_unlock(&pool->known_slabs_map_lock);
+    // There is a rare case when we have a pointer from system allocation next
+    // to some slab with an entry in the map. So we find a slab
+    // but the range checks fail.
+    memoryProviderFree(pool->provider, Ptr);
+    return UMF_RESULT_SUCCESS;
+}
+
+/*
+// TODO?
+std::ostream &operator<<(std::ostream &Os, slab_t &Slab) {
+    Os << "Slab<" << slab_get(&Slab) << ", " << slab_get_end(&Slab) << ", "
+       << slab->bucket->getSize() << ">";
+    return Os;
+}
+*/
+
+/*
+// TODO move
+void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
+    if (alloc_count) {
+        if (!TitlePrinted) {
+            std::cout << Label << " memory statistics\n";
+            std::cout << std::setw(14) << "Bucket Size" << std::setw(12)
+                      << "Allocs" << std::setw(12) << "Frees" << std::setw(18)
+                      << "Allocs from Pool" << std::setw(20)
+                      << "Peak Slabs in Use" << std::setw(21)
+                      << "Peak Slabs in Pool" << std::endl;
+            TitlePrinted = true;
+        }
+        std::cout << std::setw(14) << getSize() << std::setw(12) << alloc_count
+                  << std::setw(12) << free_count << std::setw(18)
+                  << allocPoolCount << std::setw(20) << max_slabs_in_use
+                  << std::setw(21) << max_slabs_in_pool << std::endl;
+    }
+}
+*/
+
+umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
+                                      void *params, void **ppPool) {
+    if (!provider) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    disjoint_pool_t *disjoint_pool =
+        (disjoint_pool_t *)umf_ba_global_alloc(sizeof(struct disjoint_pool_t));
+    if (!disjoint_pool) {
+        return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+    }
+
+    umf_disjoint_pool_params_t *dp_params =
+        (umf_disjoint_pool_params_t *)params;
+
+    // MinBucketSize parameter must be a power of 2 for bucket sizes
+    // to generate correctly.
+    if (!dp_params->MinBucketSize ||
+        !((dp_params->MinBucketSize & (dp_params->MinBucketSize - 1)) == 0)) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0);
+
+    disjoint_pool->provider = provider;
+    disjoint_pool->params = *dp_params;
+
+    utils_mutex_init(&disjoint_pool->known_slabs_map_lock);
+    disjoint_pool->known_slabs = critnib_new();
+
+    // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
+    // Powers of 2 and the value halfway between the powers of 2.
+    size_t Size1 = disjoint_pool->params.MinBucketSize;
+
+    // MinBucketSize cannot be larger than CutOff.
+    Size1 = utils_min(Size1, CutOff);
+
+    // Buckets sized smaller than the bucket default size- 8 aren't needed.
+    Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
+
+    // Calculate the exponent for MinBucketSize used for finding buckets.
+    disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1);
+    disjoint_pool->default_shared_limits =
+        umfDisjointPoolSharedLimitsCreate(SIZE_MAX);
+
+    // count number of buckets, start from 1
+    disjoint_pool->buckets_num = 1;
+    size_t Size2 = Size1 + Size1 / 2;
+    size_t ts2 = Size2, ts1 = Size1;
+    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
+        disjoint_pool->buckets_num += 2;
+    }
+    disjoint_pool->buckets = (bucket_t **)umf_ba_global_alloc(
+        sizeof(bucket_t *) * disjoint_pool->buckets_num);
+
+    int i = 0;
+    Size1 = ts1;
+    Size2 = ts2;
+    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) {
+        disjoint_pool->buckets[i] = create_bucket(
+            Size1, disjoint_pool, AllocImpl_getLimits(disjoint_pool));
+        disjoint_pool->buckets[i + 1] = create_bucket(
+            Size2, disjoint_pool, AllocImpl_getLimits(disjoint_pool));
+    }
+    disjoint_pool->buckets[i] = create_bucket(
+        CutOff, disjoint_pool, AllocImpl_getLimits(disjoint_pool));
+
+    umf_result_t ret = umfMemoryProviderGetMinPageSize(
+        provider, NULL, &disjoint_pool->provider_min_page_size);
+    if (ret != UMF_RESULT_SUCCESS) {
+        disjoint_pool->provider_min_page_size = 0;
+    }
+
+    *ppPool = (void *)disjoint_pool;
+
+    return UMF_RESULT_SUCCESS;
+}
+
+void *disjoint_pool_malloc(void *pool, size_t size) {
+    // For full-slab allocations indicates
+    // whether slab is from Pool.
+
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+
+    bool FromPool;
+    void *Ptr = AllocImpl_allocate(hPool, size, &FromPool);
+
+    if (hPool->params.PoolTrace > 2) {
+        const char *MT = hPool->params.Name;
+        (void)MT;
+        //std::cout << "Allocated " << std::setw(8) << size << " " << MT
+        //          << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->"
+        //          << Ptr << std::endl;
+    }
+    return Ptr;
+}
+
+void *disjoint_pool_calloc(void *pool, size_t num, size_t size) {
+    (void)pool;
+    (void)num;
+    (void)size;
+
+    // Not supported
+    TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    return NULL;
+}
+
+void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) {
+    (void)pool;
+    (void)ptr;
+    (void)size;
+
+    // Not supported
+    TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    return NULL;
+}
+
+void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) {
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+
+    bool FromPool;
+    void *Ptr = AllocImpl_allocate_align(hPool, size, alignment, &FromPool);
+
+    if (hPool->params.PoolTrace > 2) {
+        const char *MT = hPool->params.Name;
+        (void)MT;
+        //std::cout << "Allocated " << std::setw(8) << size << " " << MT
+        //          << " bytes aligned at " << alignment << " from "
+        //          << (FromPool ? "Pool" : "Provider") << " ->" << Ptr
+        //          << std::endl;
+    }
+
+    return Ptr;
+}
+
+size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) {
+    (void)pool;
+    (void)ptr;
+
+    // Not supported
+    return 0;
+}
+
+umf_result_t disjoint_pool_free(void *pool, void *ptr) {
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+
+    bool ToPool;
+    umf_result_t ret = AllocImpl_deallocate(hPool, ptr, &ToPool);
+    /*
+    if (ret == UMF_RESULT_SUCCESS) {
+
+        if (impl->getParams().PoolTrace > 2) {
+            auto MT = impl->getParams().Name;
+            std::cout << "Freed " << MT << " " << ptr << " to "
+                      << (ToPool ? "Pool" : "Provider")
+                      << ", Current total pool size "
+                      << impl->getLimits()->TotalSize.load()
+                      << ", Current pool size for " << MT << " "
+                      << impl->getParams().CurPoolSize << "\n";
+        }
+    }*/
+    return ret;
+}
+
+umf_result_t disjoint_pool_get_last_allocation_error(void *pool) {
+    (void)pool;
+
+    return TLS_last_allocation_error;
+}
+
+// Define destructor for use with unique_ptr
+void disjoint_pool_finalize(void *pool) {
+
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+
+    for (size_t i = 0; i < hPool->buckets_num; i++) {
+        destroy_bucket(hPool->buckets[i]);
+    }
+
+    VALGRIND_DO_DESTROY_MEMPOOL(hPool);
+
+    umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits);
+    critnib_delete(hPool->known_slabs);
+
+    utils_mutex_destroy_not_free(&hPool->known_slabs_map_lock);
+
+    umf_ba_global_free(hPool);
+
+    /*
+    if (impl->getParams().PoolTrace > 1) {
+        bool TitlePrinted = false;
+        size_t HighBucketSize;
+        size_t HighPeakSlabsInUse;
+        auto name = impl->getParams().Name;
+        //try { // cannot throw in destructor
+        impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse,
+                         name);
+        if (TitlePrinted) {
+            std::cout << "Current Pool Size "
+                      << impl->getLimits()->TotalSize.load() << std::endl;
+            std::cout << "Suggested Setting=;"
+                      << std::string(1, (char)tolower(name[0]))
+                      << std::string(name + 1) << ":" << HighBucketSize << ","
+                      << HighPeakSlabsInUse << ",64K" << std::endl;
+        }
+        //} catch (...) { // ignore exceptions
+        // }
+    }
+    */
+}
+
+static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = {
+    .version = UMF_VERSION_CURRENT,
+    .initialize = disjoint_pool_initialize,
+    .finalize = disjoint_pool_finalize,
+    .malloc = disjoint_pool_malloc,
+    .calloc = disjoint_pool_calloc,
+    .realloc = disjoint_pool_realloc,
+    .aligned_malloc = disjoint_pool_aligned_malloc,
+    .malloc_usable_size = disjoint_pool_malloc_usable_size,
+    .free = disjoint_pool_free,
+    .get_last_allocation_error = disjoint_pool_get_last_allocation_error,
+};
+
+umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
+    return &UMF_DISJOINT_POOL_OPS;
+}
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
deleted file mode 100644
index 2cf8df7a4..000000000
--- a/src/pool/pool_disjoint.cpp
+++ /dev/null
@@ -1,1131 +0,0 @@
-// Copyright (C) 2023 Intel Corporation
-// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include <algorithm>
-#include <array>
-#include <atomic>
-#include <bitset>
-#include <cassert>
-#include <cctype>
-#include <iomanip>
-#include <limits>
-#include <list>
-#include <memory>
-#include <mutex>
-#include <shared_mutex>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-// TODO: replace with logger?
-#include <iostream>
-
-#include "provider/provider_tracking.h"
-
-#include "../cpp_helpers.hpp"
-#include "pool_disjoint.h"
-#include "umf.h"
-#include "utils_log.h"
-#include "utils_math.h"
-#include "utils_sanitizers.h"
-
-// Temporary solution for disabling memory poisoning. This is needed because
-// AddressSanitizer does not support memory poisoning for GPU allocations.
-// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634
-#ifndef POISON_MEMORY
-#define POISON_MEMORY 0
-#endif
-
-static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr,
-                                                [[maybe_unused]] size_t size) {
-#if (POISON_MEMORY != 0)
-    utils_annotate_memory_inaccessible(ptr, size);
-#endif
-}
-
-static inline void annotate_memory_undefined([[maybe_unused]] void *ptr,
-                                             [[maybe_unused]] size_t size) {
-#if (POISON_MEMORY != 0)
-    utils_annotate_memory_undefined(ptr, size);
-#endif
-}
-
-typedef struct umf_disjoint_pool_shared_limits_t {
-    size_t MaxSize;
-    std::atomic<size_t> TotalSize;
-} umf_disjoint_pool_shared_limits_t;
-
-class DisjointPool {
-  public:
-    class AllocImpl;
-    using Config = umf_disjoint_pool_params_t;
-
-    umf_result_t initialize(umf_memory_provider_handle_t provider,
-                            umf_disjoint_pool_params_t *parameters);
-    void *malloc(size_t size);
-    void *calloc(size_t, size_t);
-    void *realloc(void *, size_t);
-    void *aligned_malloc(size_t size, size_t alignment);
-    size_t malloc_usable_size(void *);
-    umf_result_t free(void *ptr);
-    umf_result_t get_last_allocation_error();
-
-    DisjointPool();
-    ~DisjointPool();
-
-  private:
-    std::unique_ptr<AllocImpl> impl;
-};
-
-umf_disjoint_pool_shared_limits_t *
-umfDisjointPoolSharedLimitsCreate(size_t MaxSize) {
-    return new umf_disjoint_pool_shared_limits_t{MaxSize, 0};
-}
-
-void umfDisjointPoolSharedLimitsDestroy(
-    umf_disjoint_pool_shared_limits_t *limits) {
-    delete limits;
-}
-
-// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is
-// requested. The implementation distinguishes between allocations of size
-// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger.
-// Allocation requests smaller than ChunkCutoff use chunks taken from a single
-// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation
-// size, and 8-byte allocations, only 1 in ~8000 requests results in a new
-// coarse-grain allocation. Freeing results only in a chunk of a larger
-// allocation to be marked as available and no real return to the system. An
-// allocation is returned to the system only when all chunks in the larger
-// allocation are freed by the program. Allocations larger than ChunkCutOff use
-// a separate coarse-grain allocation for each request. These are subject to
-// "pooling". That is, when such an allocation is freed by the program it is
-// retained in a pool. The pool is available for future allocations, which means
-// there are fewer actual coarse-grain allocations/deallocations.
-
-// The largest size which is allocated via the allocator.
-// Allocations with size > CutOff bypass the pool and
-// go directly to the provider.
-static constexpr size_t CutOff = (size_t)1 << 31; // 2GB
-
-// Aligns the pointer down to the specified alignment
-// (e.g. returns 8 for Size = 13, Alignment = 8)
-static void *AlignPtrDown(void *Ptr, const size_t Alignment) {
-    return reinterpret_cast<void *>((reinterpret_cast<size_t>(Ptr)) &
-                                    (~(Alignment - 1)));
-}
-
-// Aligns the pointer up to the specified alignment
-// (e.g. returns 16 for Size = 13, Alignment = 8)
-static void *AlignPtrUp(void *Ptr, const size_t Alignment) {
-    void *AlignedPtr = AlignPtrDown(Ptr, Alignment);
-    // Special case when the pointer is already aligned
-    if (Ptr == AlignedPtr) {
-        return Ptr;
-    }
-    return static_cast<char *>(AlignedPtr) + Alignment;
-}
-
-// Aligns the value up to the specified alignment
-// (e.g. returns 16 for Size = 13, Alignment = 8)
-static size_t AlignUp(size_t Val, size_t Alignment) {
-    assert(Alignment > 0);
-    return (Val + Alignment - 1) & (~(Alignment - 1));
-}
-
-typedef struct MemoryProviderError {
-    umf_result_t code;
-} MemoryProviderError_t;
-
-class Bucket;
-
-// Represents the allocated memory block of size 'SlabMinSize'
-// Internally, it splits the memory block into chunks. The number of
-// chunks depends of the size of a Bucket which created the Slab.
-// Note: Bucket's methods are responsible for thread safety of Slab access,
-// so no locking happens here.
-class Slab {
-
-    // Pointer to the allocated memory of SlabMinSize bytes
-    void *MemPtr;
-
-    // Represents the current state of each chunk:
-    // if the bit is set then the chunk is allocated
-    // the chunk is free for allocation otherwise
-    std::vector<bool> Chunks;
-
-    // Total number of allocated chunks at the moment.
-    size_t NumAllocated = 0;
-
-    // The bucket which the slab belongs to
-    Bucket &bucket;
-
-    using ListIter = std::list<std::unique_ptr<Slab>>::iterator;
-
-    // Store iterator to the corresponding node in avail/unavail list
-    // to achieve O(1) removal
-    ListIter SlabListIter;
-
-    // Hints where to start search for free chunk in a slab
-    size_t FirstFreeChunkIdx = 0;
-
-    // Return the index of the first available chunk, SIZE_MAX otherwise
-    size_t FindFirstAvailableChunkIdx() const;
-
-    // Register/Unregister the slab in the global slab address map.
-    void regSlab(Slab &);
-    void unregSlab(Slab &);
-    static void regSlabByAddr(void *, Slab &);
-    static void unregSlabByAddr(void *, Slab &);
-
-  public:
-    Slab(Bucket &);
-    ~Slab();
-
-    void setIterator(ListIter It) { SlabListIter = It; }
-    ListIter getIterator() const { return SlabListIter; }
-
-    size_t getNumAllocated() const { return NumAllocated; }
-
-    // Get pointer to allocation that is one piece of this slab.
-    void *getChunk();
-
-    // Get pointer to allocation that is this entire slab.
-    void *getSlab();
-
-    void *getPtr() const { return MemPtr; }
-    void *getEnd() const;
-
-    size_t getChunkSize() const;
-    size_t getNumChunks() const { return Chunks.size(); }
-
-    bool hasAvail();
-
-    Bucket &getBucket();
-    const Bucket &getBucket() const;
-
-    void freeChunk(void *Ptr);
-};
-
-class Bucket {
-    const size_t Size;
-
-    // List of slabs which have at least 1 available chunk.
-    std::list<std::unique_ptr<Slab>> AvailableSlabs;
-
-    // List of slabs with 0 available chunk.
-    std::list<std::unique_ptr<Slab>> UnavailableSlabs;
-
-    // Protects the bucket and all the corresponding slabs
-    std::mutex BucketLock;
-
-    // Reference to the allocator context, used access memory allocation
-    // routines, slab map and etc.
-    DisjointPool::AllocImpl &OwnAllocCtx;
-
-    // For buckets used in chunked mode, a counter of slabs in the pool.
-    // For allocations that use an entire slab each, the entries in the Available
-    // list are entries in the pool.Each slab is available for a new
-    // allocation.The size of the Available list is the size of the pool.
-    // For allocations that use slabs in chunked mode, slabs will be in the
-    // Available list if any one or more of their chunks is free.The entire slab
-    // is not necessarily free, just some chunks in the slab are free. To
-    // implement pooling we will allow one slab in the Available list to be
-    // entirely empty. Normally such a slab would have been freed. But
-    // now we don't, and treat this slab as "in the pool".
-    // When a slab becomes entirely free we have to decide whether to return it
-    // to the provider or keep it allocated. A simple check for size of the
-    // Available list is not sufficient to check whether any slab has been
-    // pooled yet. We would have to traverse the entire Available list and check
-    // if any of them is entirely free. Instead we keep a counter of entirely
-    // empty slabs within the Available list to speed up the process of checking
-    // if a slab in this bucket is already pooled.
-    size_t chunkedSlabsInPool;
-
-    // Statistics
-    size_t allocPoolCount;
-    size_t freeCount;
-    size_t currSlabsInUse;
-    size_t currSlabsInPool;
-    size_t maxSlabsInPool;
-
-  public:
-    // Statistics
-    size_t allocCount;
-    size_t maxSlabsInUse;
-
-    Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx)
-        : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0),
-          allocPoolCount(0), freeCount(0), currSlabsInUse(0),
-          currSlabsInPool(0), maxSlabsInPool(0), allocCount(0),
-          maxSlabsInUse(0) {}
-
-    // Get pointer to allocation that is one piece of an available slab in this
-    // bucket.
-    void *getChunk(bool &FromPool);
-
-    // Get pointer to allocation that is a full slab in this bucket.
-    void *getSlab(bool &FromPool);
-
-    // Return the allocation size of this bucket.
-    size_t getSize() const { return Size; }
-
-    // Free an allocation that is one piece of a slab in this bucket.
-    void freeChunk(void *Ptr, Slab &Slab, bool &ToPool);
-
-    // Free an allocation that is a full slab in this bucket.
-    void freeSlab(Slab &Slab, bool &ToPool);
-
-    umf_memory_provider_handle_t getMemHandle();
-
-    DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; }
-
-    // Check whether an allocation to be freed can be placed in the pool.
-    bool CanPool(bool &ToPool);
-
-    // The minimum allocation size for any slab.
-    size_t SlabMinSize();
-
-    // The allocation size for a slab in this bucket.
-    size_t SlabAllocSize();
-
-    // The minimum size of a chunk from this bucket's slabs.
-    size_t ChunkCutOff();
-
-    // The number of slabs in this bucket that can be in the pool.
-    size_t Capacity();
-
-    // The maximum allocation size subject to pooling.
-    size_t MaxPoolableSize();
-
-    // Update allocation count
-    void countAlloc(bool FromPool);
-
-    // Update free count
-    void countFree();
-
-    // Update statistics of Available/Unavailable
-    void updateStats(int InUse, int InPool);
-
-    // Print bucket statistics
-    void printStats(bool &TitlePrinted, const std::string &Label);
-
-  private:
-    void onFreeChunk(Slab &, bool &ToPool);
-
-    // Update statistics of pool usage, and indicate that an allocation was made
-    // from the pool.
-    void decrementPool(bool &FromPool);
-
-    // Get a slab to be used for chunked allocations.
-    decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool);
-
-    // Get a slab that will be used as a whole for a single allocation.
-    decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool);
-};
-
-class DisjointPool::AllocImpl {
-    // It's important for the map to be destroyed last after buckets and their
-    // slabs This is because slab's destructor removes the object from the map.
-    std::unordered_multimap<void *, Slab &> KnownSlabs;
-    std::shared_timed_mutex KnownSlabsMapLock;
-
-    // Handle to the memory provider
-    umf_memory_provider_handle_t MemHandle;
-
-    // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
-    std::vector<std::unique_ptr<Bucket>> Buckets;
-
-    // Configuration for this instance
-    umf_disjoint_pool_params_t params;
-
-    umf_disjoint_pool_shared_limits_t DefaultSharedLimits = {
-        (std::numeric_limits<size_t>::max)(), 0};
-
-    // Used in algorithm for finding buckets
-    std::size_t MinBucketSizeExp;
-
-    // Coarse-grain allocation min alignment
-    size_t ProviderMinPageSize;
-
-  public:
-    AllocImpl(umf_memory_provider_handle_t hProvider,
-              umf_disjoint_pool_params_t *params)
-        : MemHandle{hProvider}, params(*params) {
-
-        VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0);
-
-        // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
-        // Powers of 2 and the value halfway between the powers of 2.
-        auto Size1 = this->params.MinBucketSize;
-        // MinBucketSize cannot be larger than CutOff.
-        Size1 = std::min(Size1, CutOff);
-        // Buckets sized smaller than the bucket default size- 8 aren't needed.
-        Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
-        // Calculate the exponent for MinBucketSize used for finding buckets.
-        MinBucketSizeExp = (size_t)log2Utils(Size1);
-        auto Size2 = Size1 + Size1 / 2;
-        for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
-            Buckets.push_back(std::make_unique<Bucket>(Size1, *this));
-            Buckets.push_back(std::make_unique<Bucket>(Size2, *this));
-        }
-        Buckets.push_back(std::make_unique<Bucket>(CutOff, *this));
-
-        auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr,
-                                                   &ProviderMinPageSize);
-        if (ret != UMF_RESULT_SUCCESS) {
-            ProviderMinPageSize = 0;
-        }
-    }
-
-    ~AllocImpl() { VALGRIND_DO_DESTROY_MEMPOOL(this); }
-
-    void *allocate(size_t Size, size_t Alignment, bool &FromPool);
-    void *allocate(size_t Size, bool &FromPool);
-    void deallocate(void *Ptr, bool &ToPool);
-
-    umf_memory_provider_handle_t getMemHandle() { return MemHandle; }
-
-    std::shared_timed_mutex &getKnownSlabsMapLock() {
-        return KnownSlabsMapLock;
-    }
-    std::unordered_multimap<void *, Slab &> &getKnownSlabs() {
-        return KnownSlabs;
-    }
-
-    size_t SlabMinSize() { return params.SlabMinSize; };
-
-    umf_disjoint_pool_params_t &getParams() { return params; }
-
-    umf_disjoint_pool_shared_limits_t *getLimits() {
-        if (params.SharedLimits) {
-            return params.SharedLimits;
-        } else {
-            return &DefaultSharedLimits;
-        }
-    };
-
-    void printStats(bool &TitlePrinted, size_t &HighBucketSize,
-                    size_t &HighPeakSlabsInUse, const std::string &Label);
-
-  private:
-    Bucket &findBucket(size_t Size);
-    std::size_t sizeToIdx(size_t Size);
-};
-
-static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
-                                 size_t size, size_t alignment = 0) {
-    void *ptr;
-    auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
-    if (ret != UMF_RESULT_SUCCESS) {
-        throw MemoryProviderError{ret};
-    }
-    annotate_memory_inaccessible(ptr, size);
-    return ptr;
-}
-
-static void memoryProviderFree(umf_memory_provider_handle_t hProvider,
-                               void *ptr) {
-    size_t size = 0;
-
-    if (ptr) {
-        umf_alloc_info_t allocInfo = {NULL, 0, NULL};
-        umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo);
-        if (umf_result == UMF_RESULT_SUCCESS) {
-            size = allocInfo.baseSize;
-        }
-    }
-
-    auto ret = umfMemoryProviderFree(hProvider, ptr, size);
-    if (ret != UMF_RESULT_SUCCESS) {
-        throw MemoryProviderError{ret};
-    }
-}
-
-bool operator==(const Slab &Lhs, const Slab &Rhs) {
-    return Lhs.getPtr() == Rhs.getPtr();
-}
-
-std::ostream &operator<<(std::ostream &Os, const Slab &Slab) {
-    Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", "
-       << Slab.getBucket().getSize() << ">";
-    return Os;
-}
-
-Slab::Slab(Bucket &Bkt)
-    : // In case bucket size is not a multiple of SlabMinSize, we would have
-      // some padding at the end of the slab.
-      Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0},
-      bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} {
-    auto SlabSize = Bkt.SlabAllocSize();
-    MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize);
-    regSlab(*this);
-}
-
-Slab::~Slab() {
-    try {
-        unregSlab(*this);
-    } catch (std::exception &e) {
-        LOG_ERR("DisjointPool: unexpected error: %s", e.what());
-    }
-
-    try {
-        memoryProviderFree(bucket.getMemHandle(), MemPtr);
-    } catch (MemoryProviderError &e) {
-        LOG_ERR("DisjointPool: error from memory provider: %d", e.code);
-
-        if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) {
-            const char *message = "";
-            int error = 0;
-
-            try {
-                umfMemoryProviderGetLastNativeError(
-                    umfGetLastFailedMemoryProvider(), &message, &error);
-                LOG_ERR("Native error msg: %s, native error code: %d", message,
-                        error);
-            } catch (...) {
-                // ignore any additional errors from logger
-            }
-        }
-    }
-}
-
-// Return the index of the first available chunk, SIZE_MAX otherwise
-size_t Slab::FindFirstAvailableChunkIdx() const {
-    // Use the first free chunk index as a hint for the search.
-    auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(),
-                           [](auto x) { return !x; });
-    if (It != Chunks.end()) {
-        return It - Chunks.begin();
-    }
-
-    return std::numeric_limits<size_t>::max();
-}
-
-void *Slab::getChunk() {
-    // assert(NumAllocated != Chunks.size());
-
-    const size_t ChunkIdx = FindFirstAvailableChunkIdx();
-    // Free chunk must exist, otherwise we would have allocated another slab
-    assert(ChunkIdx != (std::numeric_limits<size_t>::max()));
-
-    void *const FreeChunk =
-        (static_cast<uint8_t *>(getPtr())) + ChunkIdx * getChunkSize();
-    Chunks[ChunkIdx] = true;
-    NumAllocated += 1;
-
-    // Use the found index as the next hint
-    FirstFreeChunkIdx = ChunkIdx;
-
-    return FreeChunk;
-}
-
-void *Slab::getSlab() { return getPtr(); }
-
-Bucket &Slab::getBucket() { return bucket; }
-const Bucket &Slab::getBucket() const { return bucket; }
-
-size_t Slab::getChunkSize() const { return bucket.getSize(); }
-
-void Slab::regSlabByAddr(void *Addr, Slab &Slab) {
-    auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock();
-    auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs();
-
-    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
-    Map.insert({Addr, Slab});
-}
-
-void Slab::unregSlabByAddr(void *Addr, Slab &Slab) {
-    auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock();
-    auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs();
-
-    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
-
-    auto Slabs = Map.equal_range(Addr);
-    // At least the must get the current slab from the map.
-    assert(Slabs.first != Slabs.second && "Slab is not found");
-
-    for (auto It = Slabs.first; It != Slabs.second; ++It) {
-        if (It->second == Slab) {
-            Map.erase(It);
-            return;
-        }
-    }
-
-    assert(false && "Slab is not found");
-}
-
-void Slab::regSlab(Slab &Slab) {
-    void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize());
-    void *EndAddr = static_cast<char *>(StartAddr) + bucket.SlabMinSize();
-
-    regSlabByAddr(StartAddr, Slab);
-    regSlabByAddr(EndAddr, Slab);
-}
-
-void Slab::unregSlab(Slab &Slab) {
-    void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize());
-    void *EndAddr = static_cast<char *>(StartAddr) + bucket.SlabMinSize();
-
-    unregSlabByAddr(StartAddr, Slab);
-    unregSlabByAddr(EndAddr, Slab);
-}
-
-void Slab::freeChunk(void *Ptr) {
-    // This method should be called through bucket(since we might remove the slab
-    // as a result), therefore all locks are done on that level.
-
-    // Make sure that we're in the right slab
-    assert(Ptr >= getPtr() && Ptr < getEnd());
-
-    // Even if the pointer p was previously aligned, it's still inside the
-    // corresponding chunk, so we get the correct index here.
-    auto ChunkIdx = (static_cast<char *>(Ptr) - static_cast<char *>(MemPtr)) /
-                    getChunkSize();
-
-    // Make sure that the chunk was allocated
-    assert(Chunks[ChunkIdx] && "double free detected");
-
-    Chunks[ChunkIdx] = false;
-    NumAllocated -= 1;
-
-    if (ChunkIdx < FirstFreeChunkIdx) {
-        FirstFreeChunkIdx = ChunkIdx;
-    }
-}
-
-void *Slab::getEnd() const {
-    return static_cast<char *>(getPtr()) + bucket.SlabMinSize();
-}
-
-bool Slab::hasAvail() { return NumAllocated != getNumChunks(); }
-
-// If a slab was available in the pool then note that the current pooled
-// size has reduced by the size of a slab in this bucket.
-void Bucket::decrementPool(bool &FromPool) {
-    FromPool = true;
-    updateStats(1, -1);
-    OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize();
-}
-
-auto Bucket::getAvailFullSlab(bool &FromPool)
-    -> decltype(AvailableSlabs.begin()) {
-    // Return a slab that will be used for a single allocation.
-    if (AvailableSlabs.size() == 0) {
-        auto It = AvailableSlabs.insert(AvailableSlabs.begin(),
-                                        std::make_unique<Slab>(*this));
-        (*It)->setIterator(It);
-        FromPool = false;
-        updateStats(1, 0);
-    } else {
-        decrementPool(FromPool);
-    }
-
-    return AvailableSlabs.begin();
-}
-
-void *Bucket::getSlab(bool &FromPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
-
-    auto SlabIt = getAvailFullSlab(FromPool);
-    auto *FreeSlab = (*SlabIt)->getSlab();
-    auto It =
-        UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt));
-    AvailableSlabs.erase(SlabIt);
-    (*It)->setIterator(It);
-    return FreeSlab;
-}
-
-void Bucket::freeSlab(Slab &Slab, bool &ToPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
-    auto SlabIter = Slab.getIterator();
-    assert(SlabIter != UnavailableSlabs.end());
-    if (CanPool(ToPool)) {
-        auto It =
-            AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter));
-        UnavailableSlabs.erase(SlabIter);
-        (*It)->setIterator(It);
-    } else {
-        UnavailableSlabs.erase(SlabIter);
-    }
-}
-
-auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) {
-
-    if (AvailableSlabs.size() == 0) {
-        auto It = AvailableSlabs.insert(AvailableSlabs.begin(),
-                                        std::make_unique<Slab>(*this));
-        (*It)->setIterator(It);
-
-        updateStats(1, 0);
-        FromPool = false;
-    } else {
-        if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) {
-            // If this was an empty slab, it was in the pool.
-            // Now it is no longer in the pool, so update count.
-            --chunkedSlabsInPool;
-            decrementPool(FromPool);
-        } else {
-            // Allocation from existing slab is treated as from pool for statistics.
-            FromPool = true;
-        }
-    }
-
-    return AvailableSlabs.begin();
-}
-
-void *Bucket::getChunk(bool &FromPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
-
-    auto SlabIt = getAvailSlab(FromPool);
-    auto *FreeChunk = (*SlabIt)->getChunk();
-
-    // If the slab is full, move it to unavailable slabs and update its iterator
-    if (!((*SlabIt)->hasAvail())) {
-        auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(),
-                                          std::move(*SlabIt));
-        AvailableSlabs.erase(SlabIt);
-        (*It)->setIterator(It);
-    }
-
-    return FreeChunk;
-}
-
-void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
-
-    Slab.freeChunk(Ptr);
-
-    onFreeChunk(Slab, ToPool);
-}
-
-// The lock must be acquired before calling this method
-void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) {
-    ToPool = true;
-
-    // In case if the slab was previously full and now has 1 available
-    // chunk, it should be moved to the list of available slabs
-    if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) {
-        auto SlabIter = Slab.getIterator();
-        assert(SlabIter != UnavailableSlabs.end());
-
-        auto It =
-            AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter));
-        UnavailableSlabs.erase(SlabIter);
-
-        (*It)->setIterator(It);
-    }
-
-    // Check if slab is empty, and pool it if we can.
-    if (Slab.getNumAllocated() == 0) {
-        // The slab is now empty.
-        // If pool has capacity then put the slab in the pool.
-        // The ToPool parameter indicates whether the Slab will be put in the
-        // pool or freed.
-        if (!CanPool(ToPool)) {
-            // Note: since the slab is stored as unique_ptr, just remove it from
-            // the list to destroy the object.
-            auto It = Slab.getIterator();
-            assert(It != AvailableSlabs.end());
-            AvailableSlabs.erase(It);
-        }
-    }
-}
-
-bool Bucket::CanPool(bool &ToPool) {
-    size_t NewFreeSlabsInBucket;
-    // Check if this bucket is used in chunked form or as full slabs.
-    bool chunkedBucket = getSize() <= ChunkCutOff();
-    if (chunkedBucket) {
-        NewFreeSlabsInBucket = chunkedSlabsInPool + 1;
-    } else {
-        NewFreeSlabsInBucket = AvailableSlabs.size() + 1;
-    }
-    if (Capacity() >= NewFreeSlabsInBucket) {
-        size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize;
-        while (true) {
-            size_t NewPoolSize = PoolSize + SlabAllocSize();
-
-            if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) {
-                break;
-            }
-
-            if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong(
-                    PoolSize, NewPoolSize)) {
-                if (chunkedBucket) {
-                    ++chunkedSlabsInPool;
-                }
-
-                updateStats(-1, 1);
-                ToPool = true;
-                return true;
-            }
-        }
-    }
-
-    updateStats(-1, 0);
-    ToPool = false;
-    return false;
-}
-
-umf_memory_provider_handle_t Bucket::getMemHandle() {
-    return OwnAllocCtx.getMemHandle();
-}
-
-size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; }
-
-size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); }
-
-size_t Bucket::Capacity() {
-    // For buckets used in chunked mode, just one slab in pool is sufficient.
-    // For larger buckets, the capacity could be more and is adjustable.
-    if (getSize() <= ChunkCutOff()) {
-        return 1;
-    } else {
-        return OwnAllocCtx.getParams().Capacity;
-    }
-}
-
-size_t Bucket::MaxPoolableSize() {
-    return OwnAllocCtx.getParams().MaxPoolableSize;
-}
-
-size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; }
-
-void Bucket::countAlloc(bool FromPool) {
-    ++allocCount;
-    if (FromPool) {
-        ++allocPoolCount;
-    }
-}
-
-void Bucket::countFree() { ++freeCount; }
-
-void Bucket::updateStats(int InUse, int InPool) {
-    if (OwnAllocCtx.getParams().PoolTrace == 0) {
-        return;
-    }
-    currSlabsInUse += InUse;
-    maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse);
-    currSlabsInPool += InPool;
-    maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool);
-    // Increment or decrement current pool sizes based on whether
-    // slab was added to or removed from pool.
-    OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize();
-}
-
-void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
-    if (allocCount) {
-        if (!TitlePrinted) {
-            std::cout << Label << " memory statistics\n";
-            std::cout << std::setw(14) << "Bucket Size" << std::setw(12)
-                      << "Allocs" << std::setw(12) << "Frees" << std::setw(18)
-                      << "Allocs from Pool" << std::setw(20)
-                      << "Peak Slabs in Use" << std::setw(21)
-                      << "Peak Slabs in Pool" << std::endl;
-            TitlePrinted = true;
-        }
-        std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount
-                  << std::setw(12) << freeCount << std::setw(18)
-                  << allocPoolCount << std::setw(20) << maxSlabsInUse
-                  << std::setw(21) << maxSlabsInPool << std::endl;
-    }
-}
-
-void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try {
-    void *Ptr;
-
-    if (Size == 0) {
-        return nullptr;
-    }
-
-    FromPool = false;
-    if (Size > getParams().MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(getMemHandle(), Size);
-        annotate_memory_undefined(Ptr, Size);
-        return Ptr;
-    }
-
-    auto &Bucket = findBucket(Size);
-
-    if (Size > Bucket.ChunkCutOff()) {
-        Ptr = Bucket.getSlab(FromPool);
-    } else {
-        Ptr = Bucket.getChunk(FromPool);
-    }
-
-    if (getParams().PoolTrace > 1) {
-        Bucket.countAlloc(FromPool);
-    }
-
-    VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size);
-    annotate_memory_undefined(Ptr, Bucket.getSize());
-
-    return Ptr;
-} catch (MemoryProviderError &e) {
-    umf::getPoolLastStatusRef<DisjointPool>() = e.code;
-    return nullptr;
-}
-
-void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
-                                        bool &FromPool) try {
-    void *Ptr;
-
-    if (Size == 0) {
-        return nullptr;
-    }
-
-    if (Alignment <= 1) {
-        return allocate(Size, FromPool);
-    }
-
-    size_t AlignedSize;
-    if (Alignment <= ProviderMinPageSize) {
-        // This allocation will be served from a Bucket which size is multiple
-        // of Alignment and Slab address is aligned to ProviderMinPageSize
-        // so the address will be properly aligned.
-        AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment;
-    } else {
-        // Slabs are only aligned to ProviderMinPageSize, we need to compensate
-        // for that in case the allocation is within pooling limit.
-        // TODO: consider creating properly-aligned Slabs on demand
-        AlignedSize = Size + Alignment - 1;
-    }
-
-    // Check if requested allocation size is within pooling limit.
-    // If not, just request aligned pointer from the system.
-    FromPool = false;
-    if (AlignedSize > getParams().MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment);
-        annotate_memory_undefined(Ptr, Size);
-        return Ptr;
-    }
-
-    auto &Bucket = findBucket(AlignedSize);
-
-    if (AlignedSize > Bucket.ChunkCutOff()) {
-        Ptr = Bucket.getSlab(FromPool);
-    } else {
-        Ptr = Bucket.getChunk(FromPool);
-    }
-
-    if (getParams().PoolTrace > 1) {
-        Bucket.countAlloc(FromPool);
-    }
-
-    VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size);
-    annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size);
-    return AlignPtrUp(Ptr, Alignment);
-} catch (MemoryProviderError &e) {
-    umf::getPoolLastStatusRef<DisjointPool>() = e.code;
-    return nullptr;
-}
-
-std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) {
-    assert(Size <= CutOff && "Unexpected size");
-    assert(Size > 0 && "Unexpected size");
-
-    size_t MinBucketSize = (size_t)1 << MinBucketSizeExp;
-    if (Size < MinBucketSize) {
-        return 0;
-    }
-
-    // Get the position of the leftmost set bit.
-    size_t position = getLeftmostSetBitPos(Size);
-
-    auto isPowerOf2 = 0 == (Size & (Size - 1));
-    auto largerThanHalfwayBetweenPowersOf2 =
-        !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1)));
-    auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) +
-                 (int)largerThanHalfwayBetweenPowersOf2;
-
-    return index;
-}
-
-Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) {
-    auto calculatedIdx = sizeToIdx(Size);
-    assert((*(Buckets[calculatedIdx])).getSize() >= Size);
-    if (calculatedIdx > 0) {
-        assert((*(Buckets[calculatedIdx - 1])).getSize() < Size);
-    }
-
-    return *(Buckets[calculatedIdx]);
-}
-
-void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
-    auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize());
-
-    // Lock the map on read
-    std::shared_lock<std::shared_timed_mutex> Lk(getKnownSlabsMapLock());
-
-    ToPool = false;
-    auto Slabs = getKnownSlabs().equal_range(SlabPtr);
-    if (Slabs.first == Slabs.second) {
-        Lk.unlock();
-        memoryProviderFree(getMemHandle(), Ptr);
-        return;
-    }
-
-    for (auto It = Slabs.first; It != Slabs.second; ++It) {
-        // The slab object won't be deleted until it's removed from the map which is
-        // protected by the lock, so it's safe to access it here.
-        auto &Slab = It->second;
-        if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) {
-            // Unlock the map before freeing the chunk, it may be locked on write
-            // there
-            Lk.unlock();
-            auto &Bucket = Slab.getBucket();
-
-            if (getParams().PoolTrace > 1) {
-                Bucket.countFree();
-            }
-
-            VALGRIND_DO_MEMPOOL_FREE(this, Ptr);
-            annotate_memory_inaccessible(Ptr, Bucket.getSize());
-            if (Bucket.getSize() <= Bucket.ChunkCutOff()) {
-                Bucket.freeChunk(Ptr, Slab, ToPool);
-            } else {
-                Bucket.freeSlab(Slab, ToPool);
-            }
-
-            return;
-        }
-    }
-
-    Lk.unlock();
-    // There is a rare case when we have a pointer from system allocation next
-    // to some slab with an entry in the map. So we find a slab
-    // but the range checks fail.
-    memoryProviderFree(getMemHandle(), Ptr);
-}
-
-void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
-                                         size_t &HighBucketSize,
-                                         size_t &HighPeakSlabsInUse,
-                                         const std::string &MTName) {
-    HighBucketSize = 0;
-    HighPeakSlabsInUse = 0;
-    for (auto &B : Buckets) {
-        (*B).printStats(TitlePrinted, MTName);
-        HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse);
-        if ((*B).allocCount) {
-            HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize);
-        }
-    }
-}
-
-umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t provider,
-                                      umf_disjoint_pool_params_t *parameters) {
-    if (!provider) {
-        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
-    }
-    // MinBucketSize parameter must be a power of 2 for bucket sizes
-    // to generate correctly.
-    if (!parameters->MinBucketSize ||
-        !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) {
-        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
-    }
-
-    impl = std::make_unique<AllocImpl>(provider, parameters);
-    return UMF_RESULT_SUCCESS;
-}
-
-void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates
-                                          // whether slab is from Pool.
-    bool FromPool;
-    auto Ptr = impl->allocate(size, FromPool);
-
-    if (impl->getParams().PoolTrace > 2) {
-        auto MT = impl->getParams().Name;
-        std::cout << "Allocated " << std::setw(8) << size << " " << MT
-                  << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->"
-                  << Ptr << std::endl;
-    }
-    return Ptr;
-}
-
-void *DisjointPool::calloc(size_t, size_t) {
-    // Not supported
-    umf::getPoolLastStatusRef<DisjointPool>() = UMF_RESULT_ERROR_NOT_SUPPORTED;
-    return NULL;
-}
-
-void *DisjointPool::realloc(void *, size_t) {
-    // Not supported
-    umf::getPoolLastStatusRef<DisjointPool>() = UMF_RESULT_ERROR_NOT_SUPPORTED;
-    return NULL;
-}
-
-void *DisjointPool::aligned_malloc(size_t size, size_t alignment) {
-    bool FromPool;
-    auto Ptr = impl->allocate(size, alignment, FromPool);
-
-    if (impl->getParams().PoolTrace > 2) {
-        auto MT = impl->getParams().Name;
-        std::cout << "Allocated " << std::setw(8) << size << " " << MT
-                  << " bytes aligned at " << alignment << " from "
-                  << (FromPool ? "Pool" : "Provider") << " ->" << Ptr
-                  << std::endl;
-    }
-    return Ptr;
-}
-
-size_t DisjointPool::malloc_usable_size(void *) {
-    // Not supported
-    return 0;
-}
-
-umf_result_t DisjointPool::free(void *ptr) try {
-    bool ToPool;
-    impl->deallocate(ptr, ToPool);
-
-    if (impl->getParams().PoolTrace > 2) {
-        auto MT = impl->getParams().Name;
-        std::cout << "Freed " << MT << " " << ptr << " to "
-                  << (ToPool ? "Pool" : "Provider")
-                  << ", Current total pool size "
-                  << impl->getLimits()->TotalSize.load()
-                  << ", Current pool size for " << MT << " "
-                  << impl->getParams().CurPoolSize << "\n";
-    }
-    return UMF_RESULT_SUCCESS;
-} catch (MemoryProviderError &e) {
-    return e.code;
-}
-
-umf_result_t DisjointPool::get_last_allocation_error() {
-    return umf::getPoolLastStatusRef<DisjointPool>();
-}
-
-DisjointPool::DisjointPool() {}
-
-// Define destructor for use with unique_ptr
-DisjointPool::~DisjointPool() {
-    bool TitlePrinted = false;
-    size_t HighBucketSize;
-    size_t HighPeakSlabsInUse;
-    if (impl->getParams().PoolTrace > 1) {
-        auto name = impl->getParams().Name;
-        try { // cannot throw in destructor
-            impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse,
-                             name);
-            if (TitlePrinted) {
-                std::cout << "Current Pool Size "
-                          << impl->getLimits()->TotalSize.load() << std::endl;
-                std::cout << "Suggested Setting=;"
-                          << std::string(1, (char)tolower(name[0]))
-                          << std::string(name + 1) << ":" << HighBucketSize
-                          << "," << HighPeakSlabsInUse << ",64K" << std::endl;
-            }
-        } catch (...) { // ignore exceptions
-        }
-    }
-}
-
-static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS =
-    umf::poolMakeCOps<DisjointPool, umf_disjoint_pool_params_t>();
-
-umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
-    return &UMF_DISJOINT_POOL_OPS;
-}
diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c
index 25169f6cf..611b277df 100644
--- a/src/utils/utils_common.c
+++ b/src/utils/utils_common.c
@@ -138,3 +138,6 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max,
     *out_flags = out_f;
     return UMF_RESULT_SUCCESS;
 }
+
+size_t utils_max(size_t a, size_t b) { return a > b ? a : b; }
+size_t utils_min(size_t a, size_t b) { return a < b ? a : b; }
diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h
index c25fda2ab..9b226246b 100644
--- a/src/utils/utils_common.h
+++ b/src/utils/utils_common.h
@@ -153,6 +153,10 @@ int utils_file_open_or_create(const char *path);
 
 int utils_fallocate(int fd, long offset, long len);
 
+size_t utils_max(size_t a, size_t b);
+
+size_t utils_min(size_t a, size_t b);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h
index 155184cc4..861c621cb 100644
--- a/src/utils/utils_concurrency.h
+++ b/src/utils/utils_concurrency.h
@@ -61,11 +61,13 @@ int utils_mutex_unlock(utils_mutex_t *mutex);
 void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void));
 
 #if defined(_WIN32)
+
 static __inline unsigned char utils_lssb_index(long long value) {
     unsigned long ret;
     _BitScanForward64(&ret, value);
     return (unsigned char)ret;
 }
+
 static __inline unsigned char utils_mssb_index(long long value) {
     unsigned long ret;
     _BitScanReverse64(&ret, value);
@@ -81,15 +83,25 @@ static __inline unsigned char utils_mssb_index(long long value) {
 
 #define utils_atomic_store_release(object, desired)                            \
     InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired)
+
 #define utils_atomic_increment(object)                                         \
     InterlockedIncrement64((LONG64 volatile *)object)
+
 #define utils_atomic_decrement(object)                                         \
     InterlockedDecrement64((LONG64 volatile *)object)
+
 #define utils_fetch_and_add64(ptr, value)                                      \
     InterlockedExchangeAdd64((LONG64 *)(ptr), value)
-#else
+
+#define utils_compare_exchange(object, expected, desired)                      \
+    InterlockedCompareExchangePointer((LONG64 volatile *)object, expected,     \
+                                      desired)
+
+#else // !defined(_WIN32)
+
 #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x))
 #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x)))
+
 #define utils_atomic_load_acquire(object, dest)                                \
     do {                                                                       \
         utils_annotate_acquire((void *)object);                                \
@@ -103,12 +115,19 @@ static __inline unsigned char utils_mssb_index(long long value) {
     } while (0)
 
 #define utils_atomic_increment(object)                                         \
-    __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL)
+    __atomic_add_fetch(object, 1, memory_order_acq_rel)
+
 #define utils_atomic_decrement(object)                                         \
-    __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL)
-#define utils_fetch_and_add64 __sync_fetch_and_add
+    __atomic_sub_fetch(object, 1, memory_order_acq_rel)
 
-#endif
+#define utils_fetch_and_add64(object, value)                                   \
+    __atomic_fetch_add(object, value, memory_order_acq_rel)
+
+#define utils_compare_exchange(object, expected, desired)                      \
+    __atomic_compare_exchange(object, expected, desired, 0 /* strong */,       \
+                              memory_order_acq_rel, memory_order_relaxed)
+
+#endif // !defined(_WIN32)
 
 #ifdef __cplusplus
 }