From 267b155de5c3536dca8ddff4dacf3866d5379c03 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 6 Nov 2024 14:25:03 +0100
Subject: [PATCH 01/26] make slab a C structure

---
 src/pool/CMakeLists.txt       |   2 +-
 src/pool/pool_disjoint.c      | 202 +++++++++++++++
 src/pool/pool_disjoint.cpp    | 453 +++++++++++++---------------------
 src/pool/pool_disjoint_temp.h |  61 +++++
 4 files changed, 431 insertions(+), 287 deletions(-)
 create mode 100644 src/pool/pool_disjoint.c
 create mode 100644 src/pool/pool_disjoint_temp.h

diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt
index bdd196b04..d87ddbcc2 100644
--- a/src/pool/CMakeLists.txt
+++ b/src/pool/CMakeLists.txt
@@ -14,7 +14,7 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT)
     add_umf_library(
         NAME disjoint_pool
         TYPE STATIC
-        SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS}
+        SRCS pool_disjoint.cpp pool_disjoint.c ${POOL_EXTRA_SRCS}
         LIBS ${POOL_EXTRA_LIBS})
 
     target_compile_definitions(disjoint_pool
diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
new file mode 100644
index 000000000..15207fbf5
--- /dev/null
+++ b/src/pool/pool_disjoint.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2022-2024 Intel Corporation
+ *
+ * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+*/
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <umf/memory_pool.h>
+#include <umf/memory_pool_ops.h>
+#include <umf/memory_provider.h>
+#include <umf/pools/pool_disjoint.h>
+
+#include "base_alloc_global.h"
+#include "utils_common.h"
+#include "utils_concurrency.h"
+#include "utils_log.h"
+#include "utils_sanitizers.h"
+
+#include "pool_disjoint_temp.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Temporary solution for disabling memory poisoning. This is needed because
+// AddressSanitizer does not support memory poisoning for GPU allocations.
+// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634
+#ifndef POISON_MEMORY
+#define POISON_MEMORY 0
+#endif
+
+/*static */ void annotate_memory_inaccessible(void *ptr, size_t size) {
+    (void)ptr;
+    (void)size;
+#if (POISON_MEMORY != 0)
+    utils_annotate_memory_inaccessible(ptr, size);
+#endif
+}
+
+/*static*/ void annotate_memory_undefined(void *ptr, size_t size) {
+    (void)ptr;
+    (void)size;
+#if (POISON_MEMORY != 0)
+    utils_annotate_memory_undefined(ptr, size);
+#endif
+}
+
+size_t bucket_get_slab_min_size(const bucket_t bucket);
+size_t bucket_get_slab_alloc_size(const bucket_t bucket);
+size_t bucket_get_size(const bucket_t bucket);
+umf_memory_provider_handle_t bucket_get_provider(const bucket_t bucket);
+
+void slab_reg(slab_t *slab);
+void slab_unreg(slab_t *slab);
+
+slab_t *create_slab(bucket_t bucket, size_t iter_size) {
+    // In case bucket size is not a multiple of SlabMinSize, we would have
+    // some padding at the end of the slab.
+    slab_t *slab = umf_ba_global_alloc(sizeof(slab_t));
+    // TODO check res and errors here and everywhere
+    // TODO use logger
+    slab->num_allocated = 0;
+    slab->first_free_chunk_idx = 0;
+    slab->bucket = bucket;
+    slab->slab_list_iter = umf_ba_global_alloc(iter_size);
+    slab->slab_list_iter_size = iter_size;
+    memset(slab->slab_list_iter, 0, iter_size);
+
+    slab->num_chunks =
+        bucket_get_slab_min_size(bucket) / bucket_get_size(bucket);
+    slab->chunks = umf_ba_global_alloc(sizeof(bool) * slab->num_chunks);
+    memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks);
+
+    slab->slab_size = bucket_get_slab_alloc_size(bucket);
+    umf_result_t res = umfMemoryProviderAlloc(
+        bucket_get_provider(bucket), slab->slab_size, 0, &slab->mem_ptr);
+
+    if (res == UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
+        destroy_slab(slab);
+        return NULL;
+    }
+
+    annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size);
+    fprintf(stderr, "[DP create_slab] bucket: %p, slab_size: %zu\n", bucket,
+            slab->slab_size);
+
+    return slab;
+}
+
+void destroy_slab(slab_t *slab) {
+    fprintf(stderr, "[DP destroy_slab] bucket: %p, slab_size: %zu\n",
+            slab->bucket, slab->slab_size);
+
+    umf_result_t res = umfMemoryProviderFree(bucket_get_provider(slab->bucket),
+                                             slab->mem_ptr, slab->slab_size);
+    assert(res == UMF_RESULT_SUCCESS);
+    umf_ba_global_free(slab->chunks);
+    umf_ba_global_free(slab->slab_list_iter);
+    umf_ba_global_free(slab);
+}
+
+size_t slab_get_num_allocated(const slab_t *slab) {
+    return slab->num_allocated;
+}
+
+size_t slab_get_num_chunks(const slab_t *slab) { return slab->num_chunks; }
+
+// Return the index of the first available chunk, SIZE_MAX otherwise
+size_t slab_find_first_available_chunk_idx(const slab_t *slab) {
+    // Use the first free chunk index as a hint for the search.
+    bool *chunk = slab->chunks + sizeof(bool) * slab->first_free_chunk_idx;
+    while (chunk != slab->chunks + sizeof(bool) * slab->num_chunks) {
+        // false means not used
+        if (*chunk == false) {
+            size_t idx = (chunk - slab->chunks) / sizeof(bool);
+            fprintf(stderr,
+                    "[DP slab_find_first_available_chunk_idx] idx: %zu\n", idx);
+            return idx;
+        }
+        chunk++;
+    }
+
+    fprintf(stderr, "[DP slab_find_first_available_chunk_idx] idx: SIZE_MAX\n");
+    return SIZE_MAX;
+}
+
+void *slab_get_chunk(slab_t *slab) {
+    // assert(slab->num_allocated != slab->num_chunks);
+
+    const size_t chunk_idx = slab_find_first_available_chunk_idx(slab);
+    // Free chunk must exist, otherwise we would have allocated another slab
+    assert(chunk_idx != SIZE_MAX);
+
+    void *free_chunk =
+        (uint8_t *)slab->mem_ptr + chunk_idx * slab_get_chunk_size(slab);
+    // mark as used
+    slab->chunks[chunk_idx] = true;
+    slab->num_allocated += 1;
+
+    // Use the found index as the next hint
+    slab->first_free_chunk_idx = chunk_idx;
+
+    fprintf(stderr, "[DP slab_get_chunk] num_allocated: %zu\n",
+            slab->num_allocated);
+
+    return free_chunk;
+}
+
+void *slab_get_end(const slab_t *slab) {
+    return (uint8_t *)slab->mem_ptr + bucket_get_slab_min_size(slab->bucket);
+}
+
+// TODO remove? why need getter/setter?
+void *slab_get(const slab_t *slab) { return slab->mem_ptr; }
+bucket_t slab_get_bucket(const slab_t *slab) { return slab->bucket; }
+size_t slab_get_chunk_size(const slab_t *slab) {
+    return bucket_get_size(slab->bucket);
+}
+
+void slab_free_chunk(slab_t *slab, void *ptr) {
+    // This method should be called through bucket(since we might remove the
+    // slab as a result), therefore all locks are done on that level.
+
+    // Make sure that we're in the right slab
+    assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab));
+
+    // Even if the pointer p was previously aligned, it's still inside the
+    // corresponding chunk, so we get the correct index here.
+    size_t chunk_idx = (ptr - slab->mem_ptr) / slab_get_chunk_size(slab);
+
+    // Make sure that the chunk was allocated
+    assert(slab->chunks[chunk_idx] && "double free detected");
+    slab->chunks[chunk_idx] = false;
+    slab->num_allocated -= 1;
+
+    if (chunk_idx < slab->first_free_chunk_idx) {
+        slab->first_free_chunk_idx = chunk_idx;
+    }
+
+    fprintf(stderr,
+            "[DP slab_free_chunk] chunk_idx: %zu, num_allocated: %zu, "
+            "first_free_chunk_idx: %zu\n",
+            chunk_idx, slab->num_allocated, slab->first_free_chunk_idx);
+}
+
+bool slab_has_avail(const slab_t *slab) {
+    return slab->num_allocated != slab->num_chunks;
+}
+
+void *slab_get_iterator(const slab_t *slab) { return slab->slab_list_iter; }
+void slab_set_iterator(slab_t *slab, void *it) {
+    memcpy(slab->slab_list_iter, it, slab->slab_list_iter_size);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 2cf8df7a4..944b5be7f 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -31,26 +31,20 @@
 #include "utils_math.h"
 #include "utils_sanitizers.h"
 
-// Temporary solution for disabling memory poisoning. This is needed because
-// AddressSanitizer does not support memory poisoning for GPU allocations.
-// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634
-#ifndef POISON_MEMORY
-#define POISON_MEMORY 0
+// TODO remove
+#ifdef __cplusplus
+extern "C" {
 #endif
 
-static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr,
-                                                [[maybe_unused]] size_t size) {
-#if (POISON_MEMORY != 0)
-    utils_annotate_memory_inaccessible(ptr, size);
-#endif
-}
+#include "pool_disjoint_temp.h"
 
-static inline void annotate_memory_undefined([[maybe_unused]] void *ptr,
-                                             [[maybe_unused]] size_t size) {
-#if (POISON_MEMORY != 0)
-    utils_annotate_memory_undefined(ptr, size);
-#endif
+class Bucket;
+struct slab_t;
+
+#ifdef __cplusplus
 }
+#endif
+// end TODO remove
 
 typedef struct umf_disjoint_pool_shared_limits_t {
     size_t MaxSize;
@@ -138,84 +132,14 @@ typedef struct MemoryProviderError {
     umf_result_t code;
 } MemoryProviderError_t;
 
-class Bucket;
-
-// Represents the allocated memory block of size 'SlabMinSize'
-// Internally, it splits the memory block into chunks. The number of
-// chunks depends of the size of a Bucket which created the Slab.
-// Note: Bucket's methods are responsible for thread safety of Slab access,
-// so no locking happens here.
-class Slab {
-
-    // Pointer to the allocated memory of SlabMinSize bytes
-    void *MemPtr;
-
-    // Represents the current state of each chunk:
-    // if the bit is set then the chunk is allocated
-    // the chunk is free for allocation otherwise
-    std::vector<bool> Chunks;
-
-    // Total number of allocated chunks at the moment.
-    size_t NumAllocated = 0;
-
-    // The bucket which the slab belongs to
-    Bucket &bucket;
-
-    using ListIter = std::list<std::unique_ptr<Slab>>::iterator;
-
-    // Store iterator to the corresponding node in avail/unavail list
-    // to achieve O(1) removal
-    ListIter SlabListIter;
-
-    // Hints where to start search for free chunk in a slab
-    size_t FirstFreeChunkIdx = 0;
-
-    // Return the index of the first available chunk, SIZE_MAX otherwise
-    size_t FindFirstAvailableChunkIdx() const;
-
-    // Register/Unregister the slab in the global slab address map.
-    void regSlab(Slab &);
-    void unregSlab(Slab &);
-    static void regSlabByAddr(void *, Slab &);
-    static void unregSlabByAddr(void *, Slab &);
-
-  public:
-    Slab(Bucket &);
-    ~Slab();
-
-    void setIterator(ListIter It) { SlabListIter = It; }
-    ListIter getIterator() const { return SlabListIter; }
-
-    size_t getNumAllocated() const { return NumAllocated; }
-
-    // Get pointer to allocation that is one piece of this slab.
-    void *getChunk();
-
-    // Get pointer to allocation that is this entire slab.
-    void *getSlab();
-
-    void *getPtr() const { return MemPtr; }
-    void *getEnd() const;
-
-    size_t getChunkSize() const;
-    size_t getNumChunks() const { return Chunks.size(); }
-
-    bool hasAvail();
-
-    Bucket &getBucket();
-    const Bucket &getBucket() const;
-
-    void freeChunk(void *Ptr);
-};
-
 class Bucket {
     const size_t Size;
 
     // List of slabs which have at least 1 available chunk.
-    std::list<std::unique_ptr<Slab>> AvailableSlabs;
+    std::list<slab_t *> AvailableSlabs;
 
     // List of slabs with 0 available chunk.
-    std::list<std::unique_ptr<Slab>> UnavailableSlabs;
+    std::list<slab_t *> UnavailableSlabs;
 
     // Protects the bucket and all the corresponding slabs
     std::mutex BucketLock;
@@ -261,6 +185,18 @@ class Bucket {
           currSlabsInPool(0), maxSlabsInPool(0), allocCount(0),
           maxSlabsInUse(0) {}
 
+    ~Bucket() {
+        for (auto it = AvailableSlabs.begin(); it != AvailableSlabs.end();
+             it++) {
+            destroy_slab(*it);
+        }
+
+        for (auto it = UnavailableSlabs.begin(); it != UnavailableSlabs.end();
+             it++) {
+            destroy_slab(*it);
+        }
+    }
+
     // Get pointer to allocation that is one piece of an available slab in this
     // bucket.
     void *getChunk(bool &FromPool);
@@ -272,10 +208,10 @@ class Bucket {
     size_t getSize() const { return Size; }
 
     // Free an allocation that is one piece of a slab in this bucket.
-    void freeChunk(void *Ptr, Slab &Slab, bool &ToPool);
+    void freeChunk(void *Ptr, slab_t *Slab, bool &ToPool);
 
     // Free an allocation that is a full slab in this bucket.
-    void freeSlab(Slab &Slab, bool &ToPool);
+    void freeSlab(slab_t *Slab, bool &ToPool);
 
     umf_memory_provider_handle_t getMemHandle();
 
@@ -312,7 +248,7 @@ class Bucket {
     void printStats(bool &TitlePrinted, const std::string &Label);
 
   private:
-    void onFreeChunk(Slab &, bool &ToPool);
+    void onFreeChunk(slab_t *, bool &ToPool);
 
     // Update statistics of pool usage, and indicate that an allocation was made
     // from the pool.
@@ -328,7 +264,7 @@ class Bucket {
 class DisjointPool::AllocImpl {
     // It's important for the map to be destroyed last after buckets and their
     // slabs This is because slab's destructor removes the object from the map.
-    std::unordered_multimap<void *, Slab &> KnownSlabs;
+    std::unordered_multimap<void *, slab_t *> KnownSlabs;
     std::shared_timed_mutex KnownSlabsMapLock;
 
     // Handle to the memory provider
@@ -390,7 +326,8 @@ class DisjointPool::AllocImpl {
     std::shared_timed_mutex &getKnownSlabsMapLock() {
         return KnownSlabsMapLock;
     }
-    std::unordered_multimap<void *, Slab &> &getKnownSlabs() {
+
+    std::unordered_multimap<void *, slab_t *> &getKnownSlabs() {
         return KnownSlabs;
     }
 
@@ -443,164 +380,16 @@ static void memoryProviderFree(umf_memory_provider_handle_t hProvider,
     }
 }
 
-bool operator==(const Slab &Lhs, const Slab &Rhs) {
-    return Lhs.getPtr() == Rhs.getPtr();
+bool operator==(const slab_t &Lhs, const slab_t &Rhs) {
+    return slab_get(&Lhs) == slab_get(&Rhs);
 }
 
-std::ostream &operator<<(std::ostream &Os, const Slab &Slab) {
-    Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", "
-       << Slab.getBucket().getSize() << ">";
+std::ostream &operator<<(std::ostream &Os, const slab_t &Slab) {
+    Os << "Slab<" << slab_get(&Slab) << ", " << slab_get_end(&Slab) << ", "
+       << (*(Bucket *)slab_get_bucket(&Slab)).getSize() << ">";
     return Os;
 }
 
-Slab::Slab(Bucket &Bkt)
-    : // In case bucket size is not a multiple of SlabMinSize, we would have
-      // some padding at the end of the slab.
-      Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0},
-      bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} {
-    auto SlabSize = Bkt.SlabAllocSize();
-    MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize);
-    regSlab(*this);
-}
-
-Slab::~Slab() {
-    try {
-        unregSlab(*this);
-    } catch (std::exception &e) {
-        LOG_ERR("DisjointPool: unexpected error: %s", e.what());
-    }
-
-    try {
-        memoryProviderFree(bucket.getMemHandle(), MemPtr);
-    } catch (MemoryProviderError &e) {
-        LOG_ERR("DisjointPool: error from memory provider: %d", e.code);
-
-        if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) {
-            const char *message = "";
-            int error = 0;
-
-            try {
-                umfMemoryProviderGetLastNativeError(
-                    umfGetLastFailedMemoryProvider(), &message, &error);
-                LOG_ERR("Native error msg: %s, native error code: %d", message,
-                        error);
-            } catch (...) {
-                // ignore any additional errors from logger
-            }
-        }
-    }
-}
-
-// Return the index of the first available chunk, SIZE_MAX otherwise
-size_t Slab::FindFirstAvailableChunkIdx() const {
-    // Use the first free chunk index as a hint for the search.
-    auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(),
-                           [](auto x) { return !x; });
-    if (It != Chunks.end()) {
-        return It - Chunks.begin();
-    }
-
-    return std::numeric_limits<size_t>::max();
-}
-
-void *Slab::getChunk() {
-    // assert(NumAllocated != Chunks.size());
-
-    const size_t ChunkIdx = FindFirstAvailableChunkIdx();
-    // Free chunk must exist, otherwise we would have allocated another slab
-    assert(ChunkIdx != (std::numeric_limits<size_t>::max()));
-
-    void *const FreeChunk =
-        (static_cast<uint8_t *>(getPtr())) + ChunkIdx * getChunkSize();
-    Chunks[ChunkIdx] = true;
-    NumAllocated += 1;
-
-    // Use the found index as the next hint
-    FirstFreeChunkIdx = ChunkIdx;
-
-    return FreeChunk;
-}
-
-void *Slab::getSlab() { return getPtr(); }
-
-Bucket &Slab::getBucket() { return bucket; }
-const Bucket &Slab::getBucket() const { return bucket; }
-
-size_t Slab::getChunkSize() const { return bucket.getSize(); }
-
-void Slab::regSlabByAddr(void *Addr, Slab &Slab) {
-    auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock();
-    auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs();
-
-    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
-    Map.insert({Addr, Slab});
-}
-
-void Slab::unregSlabByAddr(void *Addr, Slab &Slab) {
-    auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock();
-    auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs();
-
-    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
-
-    auto Slabs = Map.equal_range(Addr);
-    // At least the must get the current slab from the map.
-    assert(Slabs.first != Slabs.second && "Slab is not found");
-
-    for (auto It = Slabs.first; It != Slabs.second; ++It) {
-        if (It->second == Slab) {
-            Map.erase(It);
-            return;
-        }
-    }
-
-    assert(false && "Slab is not found");
-}
-
-void Slab::regSlab(Slab &Slab) {
-    void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize());
-    void *EndAddr = static_cast<char *>(StartAddr) + bucket.SlabMinSize();
-
-    regSlabByAddr(StartAddr, Slab);
-    regSlabByAddr(EndAddr, Slab);
-}
-
-void Slab::unregSlab(Slab &Slab) {
-    void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize());
-    void *EndAddr = static_cast<char *>(StartAddr) + bucket.SlabMinSize();
-
-    unregSlabByAddr(StartAddr, Slab);
-    unregSlabByAddr(EndAddr, Slab);
-}
-
-void Slab::freeChunk(void *Ptr) {
-    // This method should be called through bucket(since we might remove the slab
-    // as a result), therefore all locks are done on that level.
-
-    // Make sure that we're in the right slab
-    assert(Ptr >= getPtr() && Ptr < getEnd());
-
-    // Even if the pointer p was previously aligned, it's still inside the
-    // corresponding chunk, so we get the correct index here.
-    auto ChunkIdx = (static_cast<char *>(Ptr) - static_cast<char *>(MemPtr)) /
-                    getChunkSize();
-
-    // Make sure that the chunk was allocated
-    assert(Chunks[ChunkIdx] && "double free detected");
-
-    Chunks[ChunkIdx] = false;
-    NumAllocated -= 1;
-
-    if (ChunkIdx < FirstFreeChunkIdx) {
-        FirstFreeChunkIdx = ChunkIdx;
-    }
-}
-
-void *Slab::getEnd() const {
-    return static_cast<char *>(getPtr()) + bucket.SlabMinSize();
-}
-
-bool Slab::hasAvail() { return NumAllocated != getNumChunks(); }
-
 // If a slab was available in the pool then note that the current pooled
 // size has reduced by the size of a slab in this bucket.
 void Bucket::decrementPool(bool &FromPool) {
@@ -609,13 +398,19 @@ void Bucket::decrementPool(bool &FromPool) {
     OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize();
 }
 
-auto Bucket::getAvailFullSlab(bool &FromPool)
-    -> decltype(AvailableSlabs.begin()) {
+std::list<slab_t *>::iterator Bucket::getAvailFullSlab(bool &FromPool) {
     // Return a slab that will be used for a single allocation.
     if (AvailableSlabs.size() == 0) {
-        auto It = AvailableSlabs.insert(AvailableSlabs.begin(),
-                                        std::make_unique<Slab>(*this));
-        (*It)->setIterator(It);
+        slab_t *slab = create_slab((bucket_t *)this,
+                                   sizeof(std::list<slab_t *>::iterator));
+        if (slab == NULL) {
+            throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
+        }
+
+        slab_reg(slab);
+        auto It = AvailableSlabs.insert(AvailableSlabs.begin(), slab);
+        slab_set_iterator(slab, &It);
+
         FromPool = false;
         updateStats(1, 0);
     } else {
@@ -629,24 +424,28 @@ void *Bucket::getSlab(bool &FromPool) {
     std::lock_guard<std::mutex> Lg(BucketLock);
 
     auto SlabIt = getAvailFullSlab(FromPool);
-    auto *FreeSlab = (*SlabIt)->getSlab();
-    auto It =
-        UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt));
+    slab_t *slab = *SlabIt;
+
+    void *ptr = slab_get(slab);
+    auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), slab);
     AvailableSlabs.erase(SlabIt);
-    (*It)->setIterator(It);
-    return FreeSlab;
+    slab_set_iterator(slab, &It);
+    return ptr;
 }
 
-void Bucket::freeSlab(Slab &Slab, bool &ToPool) {
+void Bucket::freeSlab(slab_t *slab, bool &ToPool) {
     std::lock_guard<std::mutex> Lg(BucketLock);
-    auto SlabIter = Slab.getIterator();
+
+    auto SlabIter = *(std::list<slab_t *>::iterator *)slab_get_iterator(slab);
     assert(SlabIter != UnavailableSlabs.end());
     if (CanPool(ToPool)) {
         auto It =
             AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter));
         UnavailableSlabs.erase(SlabIter);
-        (*It)->setIterator(It);
+        slab_set_iterator(*It, &It);
     } else {
+        slab_unreg(*SlabIter);
+        destroy_slab(*SlabIter);
         UnavailableSlabs.erase(SlabIter);
     }
 }
@@ -654,14 +453,20 @@ void Bucket::freeSlab(Slab &Slab, bool &ToPool) {
 auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) {
 
     if (AvailableSlabs.size() == 0) {
-        auto It = AvailableSlabs.insert(AvailableSlabs.begin(),
-                                        std::make_unique<Slab>(*this));
-        (*It)->setIterator(It);
+        slab_t *slab = create_slab((bucket_t *)this,
+                                   sizeof(std::list<slab_t *>::iterator));
+        if (slab == NULL) {
+            throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
+        }
+
+        slab_reg(slab);
+        auto It = AvailableSlabs.insert(AvailableSlabs.begin(), slab);
+        slab_set_iterator(slab, &It);
 
         updateStats(1, 0);
         FromPool = false;
     } else {
-        if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) {
+        if (slab_get_num_allocated(*(AvailableSlabs.begin())) == 0) {
             // If this was an empty slab, it was in the pool.
             // Now it is no longer in the pool, so update count.
             --chunkedSlabsInPool;
@@ -679,46 +484,45 @@ void *Bucket::getChunk(bool &FromPool) {
     std::lock_guard<std::mutex> Lg(BucketLock);
 
     auto SlabIt = getAvailSlab(FromPool);
-    auto *FreeChunk = (*SlabIt)->getChunk();
+    auto *FreeChunk = slab_get_chunk((*SlabIt));
 
     // If the slab is full, move it to unavailable slabs and update its iterator
-    if (!((*SlabIt)->hasAvail())) {
-        auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(),
-                                          std::move(*SlabIt));
+    if (!(slab_has_avail(*SlabIt))) {
+        auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), *SlabIt);
         AvailableSlabs.erase(SlabIt);
-        (*It)->setIterator(It);
+        slab_set_iterator(*It, &It);
     }
 
     return FreeChunk;
 }
 
-void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) {
+void Bucket::freeChunk(void *ptr, slab_t *Slab, bool &ToPool) {
     std::lock_guard<std::mutex> Lg(BucketLock);
 
-    Slab.freeChunk(Ptr);
+    slab_free_chunk(Slab, ptr);
 
     onFreeChunk(Slab, ToPool);
 }
 
 // The lock must be acquired before calling this method
-void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) {
+void Bucket::onFreeChunk(slab_t *slab, bool &ToPool) {
     ToPool = true;
 
     // In case if the slab was previously full and now has 1 available
     // chunk, it should be moved to the list of available slabs
-    if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) {
-        auto SlabIter = Slab.getIterator();
+    if (slab_get_num_allocated(slab) == (slab_get_num_chunks(slab) - 1)) {
+        auto SlabIter =
+            *(std::list<slab_t *>::iterator *)slab_get_iterator(slab);
         assert(SlabIter != UnavailableSlabs.end());
 
-        auto It =
-            AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter));
+        slab_t *slab = *SlabIter;
+        auto It = AvailableSlabs.insert(AvailableSlabs.begin(), slab);
         UnavailableSlabs.erase(SlabIter);
-
-        (*It)->setIterator(It);
+        slab_set_iterator(slab, &It);
     }
 
     // Check if slab is empty, and pool it if we can.
-    if (Slab.getNumAllocated() == 0) {
+    if (slab_get_num_allocated(slab) == 0) {
         // The slab is now empty.
         // If pool has capacity then put the slab in the pool.
         // The ToPool parameter indicates whether the Slab will be put in the
@@ -726,8 +530,10 @@ void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) {
         if (!CanPool(ToPool)) {
             // Note: since the slab is stored as unique_ptr, just remove it from
             // the list to destroy the object.
-            auto It = Slab.getIterator();
+            auto It = *(std::list<slab_t *>::iterator *)slab_get_iterator(slab);
             assert(It != AvailableSlabs.end());
+            slab_unreg(*It);
+            destroy_slab(*It);
             AvailableSlabs.erase(It);
         }
     }
@@ -971,22 +777,22 @@ void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
         // The slab object won't be deleted until it's removed from the map which is
         // protected by the lock, so it's safe to access it here.
         auto &Slab = It->second;
-        if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) {
+        if (Ptr >= slab_get(Slab) && Ptr < slab_get_end(Slab)) {
             // Unlock the map before freeing the chunk, it may be locked on write
             // there
             Lk.unlock();
-            auto &Bucket = Slab.getBucket();
+            auto bucket = (Bucket *)slab_get_bucket(Slab);
 
             if (getParams().PoolTrace > 1) {
-                Bucket.countFree();
+                bucket->countFree();
             }
 
             VALGRIND_DO_MEMPOOL_FREE(this, Ptr);
-            annotate_memory_inaccessible(Ptr, Bucket.getSize());
-            if (Bucket.getSize() <= Bucket.ChunkCutOff()) {
-                Bucket.freeChunk(Ptr, Slab, ToPool);
+            annotate_memory_inaccessible(Ptr, bucket->getSize());
+            if (bucket->getSize() <= bucket->ChunkCutOff()) {
+                bucket->freeChunk(Ptr, Slab, ToPool);
             } else {
-                Bucket.freeSlab(Slab, ToPool);
+                bucket->freeSlab(Slab, ToPool);
             }
 
             return;
@@ -1129,3 +935,78 @@ static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS =
 umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
     return &UMF_DISJOINT_POOL_OPS;
 }
+
+// TODO remove
+#ifdef __cplusplus
+extern "C" {
+#endif
+size_t bucket_get_slab_min_size(const bucket_t bucket) {
+    return ((Bucket *)bucket)->SlabMinSize();
+}
+
+size_t bucket_get_slab_alloc_size(const bucket_t bucket) {
+    return ((Bucket *)bucket)->SlabAllocSize();
+}
+
+size_t bucket_get_size(const bucket_t bucket) {
+    return ((Bucket *)bucket)->getSize();
+}
+
+umf_memory_provider_handle_t bucket_get_provider(const bucket_t bucket) {
+    return ((Bucket *)bucket)->getMemHandle();
+}
+
+void slab_reg_by_addr(void *addr, slab_t *slab) {
+    auto &Lock =
+        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabsMapLock();
+    auto &Map =
+        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabs();
+
+    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
+    Map.insert({addr, slab});
+}
+
+void slab_unreg_by_addr(void *addr, slab_t *slab) {
+    auto &Lock =
+        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabsMapLock();
+    auto &Map =
+        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabs();
+
+    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
+
+    auto Slabs = Map.equal_range(addr);
+    // At least the must get the current slab from the map.
+    assert(Slabs.first != Slabs.second && "Slab is not found");
+
+    for (auto It = Slabs.first; It != Slabs.second; ++It) {
+        if (It->second == slab) {
+            Map.erase(It);
+            return;
+        }
+    }
+
+    assert(false && "Slab is not found");
+}
+
+void slab_reg(slab_t *slab) {
+    Bucket *bucket = (Bucket *)slab_get_bucket(slab);
+    void *start_addr = AlignPtrDown(slab_get(slab), bucket->SlabMinSize());
+    void *end_addr = static_cast<char *>(start_addr) + bucket->SlabMinSize();
+
+    slab_reg_by_addr(start_addr, slab);
+    slab_reg_by_addr(end_addr, slab);
+}
+
+void slab_unreg(slab_t *slab) {
+    Bucket *bucket = (Bucket *)slab_get_bucket(slab);
+    void *start_addr = AlignPtrDown(slab_get(slab), bucket->SlabMinSize());
+    void *end_addr = static_cast<char *>(start_addr) + bucket->SlabMinSize();
+
+    slab_unreg_by_addr(start_addr, slab);
+    slab_unreg_by_addr(end_addr, slab);
+}
+
+#ifdef __cplusplus
+}
+#endif
+// end TODO remove
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
new file mode 100644
index 000000000..6ec1b7a9b
--- /dev/null
+++ b/src/pool/pool_disjoint_temp.h
@@ -0,0 +1,61 @@
+
+
+#ifndef TEMP_H
+#define TEMP_H 1
+
+void annotate_memory_inaccessible(void *ptr, size_t size);
+void annotate_memory_undefined(void *ptr, size_t size);
+
+typedef void *bucket_t;
+
+// Represents the allocated memory block of size 'slab_min_size'
+// Internally, it splits the memory block into chunks. The number of
+// chunks depends of the size of a Bucket which created the Slab.
+// Note: Bucket's methods are responsible for thread safety of Slab access,
+// so no locking happens here.
+typedef struct slab_t {
+    // Pointer to the allocated memory of slab_min_size bytes
+    void *mem_ptr;
+    size_t slab_size;
+
+    // Represents the current state of each chunk: if the bit is set then the
+    // chunk is allocated, and if the chunk is free for allocation otherwise
+    bool *chunks;
+    size_t num_chunks;
+
+    // Total number of allocated chunks at the moment.
+    size_t num_allocated;
+
+    // The bucket which the slab belongs to
+    bucket_t bucket;
+
+    // Hints where to start search for free chunk in a slab
+    size_t first_free_chunk_idx;
+
+    // Store iterator to the corresponding node in avail/unavail list
+    // to achieve O(1) removal
+    void *slab_list_iter;
+    size_t slab_list_iter_size;
+} slab_t;
+
+slab_t *create_slab(bucket_t bucket, size_t iter_size);
+void destroy_slab(slab_t *slab);
+
+void *slab_get(const slab_t *slab);
+void *slab_get_end(const slab_t *slab);
+bucket_t slab_get_bucket(const slab_t *slab);
+void *slab_get_chunk(slab_t *slab);
+size_t slab_get_num_chunks(const slab_t *slab);
+size_t slab_get_chunk_size(const slab_t *slab);
+size_t slab_get_num_allocated(const slab_t *slab);
+
+void *slab_get_iterator(const slab_t *slab);
+void slab_set_iterator(slab_t *slab, void *it);
+
+bool slab_has_avail(const slab_t *slab);
+void slab_free_chunk(slab_t *slab, void *ptr);
+
+void slab_reg(slab_t *slab);
+void slab_unreg(slab_t *slab);
+
+#endif // TEMP_H
\ No newline at end of file

From 7f7dbb5a12c984e743b506204daef6110c2b8b2e Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Sat, 9 Nov 2024 14:23:47 +0100
Subject: [PATCH 02/26] use align* macros from utils

---
 src/pool/pool_disjoint.cpp | 46 ++++++++++++--------------------------
 1 file changed, 14 insertions(+), 32 deletions(-)

diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 944b5be7f..d71889da4 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -27,6 +27,7 @@
 #include "../cpp_helpers.hpp"
 #include "pool_disjoint.h"
 #include "umf.h"
+#include "utils_common.h"
 #include "utils_log.h"
 #include "utils_math.h"
 #include "utils_sanitizers.h"
@@ -103,31 +104,6 @@ void umfDisjointPoolSharedLimitsDestroy(
 // go directly to the provider.
 static constexpr size_t CutOff = (size_t)1 << 31; // 2GB
 
-// Aligns the pointer down to the specified alignment
-// (e.g. returns 8 for Size = 13, Alignment = 8)
-static void *AlignPtrDown(void *Ptr, const size_t Alignment) {
-    return reinterpret_cast<void *>((reinterpret_cast<size_t>(Ptr)) &
-                                    (~(Alignment - 1)));
-}
-
-// Aligns the pointer up to the specified alignment
-// (e.g. returns 16 for Size = 13, Alignment = 8)
-static void *AlignPtrUp(void *Ptr, const size_t Alignment) {
-    void *AlignedPtr = AlignPtrDown(Ptr, Alignment);
-    // Special case when the pointer is already aligned
-    if (Ptr == AlignedPtr) {
-        return Ptr;
-    }
-    return static_cast<char *>(AlignedPtr) + Alignment;
-}
-
-// Aligns the value up to the specified alignment
-// (e.g. returns 16 for Size = 13, Alignment = 8)
-static size_t AlignUp(size_t Val, size_t Alignment) {
-    assert(Alignment > 0);
-    return (Val + Alignment - 1) & (~(Alignment - 1));
-}
-
 typedef struct MemoryProviderError {
     umf_result_t code;
 } MemoryProviderError_t;
@@ -691,7 +667,7 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
         // This allocation will be served from a Bucket which size is multiple
         // of Alignment and Slab address is aligned to ProviderMinPageSize
         // so the address will be properly aligned.
-        AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment;
+        AlignedSize = (Size > 1) ? ALIGN_UP(Size, Alignment) : Alignment;
     } else {
         // Slabs are only aligned to ProviderMinPageSize, we need to compensate
         // for that in case the allocation is within pooling limit.
@@ -720,9 +696,9 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
         Bucket.countAlloc(FromPool);
     }
 
-    VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size);
-    annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size);
-    return AlignPtrUp(Ptr, Alignment);
+    VALGRIND_DO_MEMPOOL_ALLOC(this, ALIGN_UP((size_t)Ptr, Alignment), Size);
+    annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size);
+    return (void *)ALIGN_UP((size_t)Ptr, Alignment);
 } catch (MemoryProviderError &e) {
     umf::getPoolLastStatusRef<DisjointPool>() = e.code;
     return nullptr;
@@ -760,7 +736,11 @@ Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) {
 }
 
 void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
-    auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize());
+    if (Ptr == nullptr) {
+        return;
+    }
+
+    auto *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, SlabMinSize());
 
     // Lock the map on read
     std::shared_lock<std::shared_timed_mutex> Lk(getKnownSlabsMapLock());
@@ -990,7 +970,8 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
 
 void slab_reg(slab_t *slab) {
     Bucket *bucket = (Bucket *)slab_get_bucket(slab);
-    void *start_addr = AlignPtrDown(slab_get(slab), bucket->SlabMinSize());
+    void *start_addr =
+        (void *)ALIGN_DOWN((size_t)slab_get(slab), bucket->SlabMinSize());
     void *end_addr = static_cast<char *>(start_addr) + bucket->SlabMinSize();
 
     slab_reg_by_addr(start_addr, slab);
@@ -999,7 +980,8 @@ void slab_reg(slab_t *slab) {
 
 void slab_unreg(slab_t *slab) {
     Bucket *bucket = (Bucket *)slab_get_bucket(slab);
-    void *start_addr = AlignPtrDown(slab_get(slab), bucket->SlabMinSize());
+    void *start_addr =
+        (void *)ALIGN_DOWN((size_t)slab_get(slab), bucket->SlabMinSize());
     void *end_addr = static_cast<char *>(start_addr) + bucket->SlabMinSize();
 
     slab_unreg_by_addr(start_addr, slab);

From 97df9e50b55382afe4b616676d761aeaf9b035f5 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Sat, 9 Nov 2024 15:00:41 +0100
Subject: [PATCH 03/26] use list implementation from utlist

---
 src/pool/pool_disjoint.c      |  20 +++--
 src/pool/pool_disjoint.cpp    | 135 ++++++++++++++++------------------
 src/pool/pool_disjoint_temp.h |  14 ++--
 3 files changed, 82 insertions(+), 87 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 15207fbf5..abbe505ff 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -58,7 +58,7 @@ umf_memory_provider_handle_t bucket_get_provider(const bucket_t bucket);
 void slab_reg(slab_t *slab);
 void slab_unreg(slab_t *slab);
 
-slab_t *create_slab(bucket_t bucket, size_t iter_size) {
+slab_t *create_slab(bucket_t bucket) {
     // In case bucket size is not a multiple of SlabMinSize, we would have
     // some padding at the end of the slab.
     slab_t *slab = umf_ba_global_alloc(sizeof(slab_t));
@@ -67,9 +67,11 @@ slab_t *create_slab(bucket_t bucket, size_t iter_size) {
     slab->num_allocated = 0;
     slab->first_free_chunk_idx = 0;
     slab->bucket = bucket;
-    slab->slab_list_iter = umf_ba_global_alloc(iter_size);
-    slab->slab_list_iter_size = iter_size;
-    memset(slab->slab_list_iter, 0, iter_size);
+
+    slab->iter =
+        (slab_list_item_t *)umf_ba_global_alloc(sizeof(slab_list_item_t));
+    slab->iter->val = slab;
+    slab->iter->prev = slab->iter->next = NULL;
 
     slab->num_chunks =
         bucket_get_slab_min_size(bucket) / bucket_get_size(bucket);
@@ -100,7 +102,7 @@ void destroy_slab(slab_t *slab) {
                                              slab->mem_ptr, slab->slab_size);
     assert(res == UMF_RESULT_SUCCESS);
     umf_ba_global_free(slab->chunks);
-    umf_ba_global_free(slab->slab_list_iter);
+    umf_ba_global_free(slab->iter);
     umf_ba_global_free(slab);
 }
 
@@ -171,7 +173,8 @@ void slab_free_chunk(slab_t *slab, void *ptr) {
 
     // Even if the pointer p was previously aligned, it's still inside the
     // corresponding chunk, so we get the correct index here.
-    size_t chunk_idx = (ptr - slab->mem_ptr) / slab_get_chunk_size(slab);
+    size_t chunk_idx =
+        ((uint8_t *)ptr - (uint8_t *)slab->mem_ptr) / slab_get_chunk_size(slab);
 
     // Make sure that the chunk was allocated
     assert(slab->chunks[chunk_idx] && "double free detected");
@@ -192,11 +195,6 @@ bool slab_has_avail(const slab_t *slab) {
     return slab->num_allocated != slab->num_chunks;
 }
 
-void *slab_get_iterator(const slab_t *slab) { return slab->slab_list_iter; }
-void slab_set_iterator(slab_t *slab, void *it) {
-    memcpy(slab->slab_list_iter, it, slab->slab_list_iter_size);
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index d71889da4..26276fc85 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -24,6 +24,8 @@
 
 #include "provider/provider_tracking.h"
 
+#include "uthash/utlist.h"
+
 #include "../cpp_helpers.hpp"
 #include "pool_disjoint.h"
 #include "umf.h"
@@ -112,10 +114,10 @@ class Bucket {
     const size_t Size;
 
     // List of slabs which have at least 1 available chunk.
-    std::list<slab_t *> AvailableSlabs;
+    slab_list_item_t *AvailableSlabs;
 
     // List of slabs with 0 available chunk.
-    std::list<slab_t *> UnavailableSlabs;
+    slab_list_item_t *UnavailableSlabs;
 
     // Protects the bucket and all the corresponding slabs
     std::mutex BucketLock;
@@ -156,21 +158,25 @@ class Bucket {
     size_t maxSlabsInUse;
 
     Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx)
-        : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0),
-          allocPoolCount(0), freeCount(0), currSlabsInUse(0),
-          currSlabsInPool(0), maxSlabsInPool(0), allocCount(0),
-          maxSlabsInUse(0) {}
+        : Size{Sz}, OwnAllocCtx{AllocCtx} {
+        AvailableSlabs = NULL;
+        UnavailableSlabs = NULL;
+        chunkedSlabsInPool = 0;
+        allocPoolCount = 0;
+        freeCount = 0;
+        currSlabsInUse = 0;
+        currSlabsInPool = 0;
+        maxSlabsInPool = 0;
+        allocCount = 0;
+        maxSlabsInUse = 0;
+    }
 
     ~Bucket() {
-        for (auto it = AvailableSlabs.begin(); it != AvailableSlabs.end();
-             it++) {
-            destroy_slab(*it);
-        }
-
-        for (auto it = UnavailableSlabs.begin(); it != UnavailableSlabs.end();
-             it++) {
-            destroy_slab(*it);
-        }
+        slab_list_item_t *it = NULL, *tmp = NULL;
+        // TODO check eng
+        // use extra tmp to store next iterator before the slab is destroyed
+        LL_FOREACH_SAFE(AvailableSlabs, it, tmp) { destroy_slab(it->val); }
+        LL_FOREACH_SAFE(UnavailableSlabs, it, tmp) { destroy_slab(it->val); }
     }
 
     // Get pointer to allocation that is one piece of an available slab in this
@@ -231,10 +237,10 @@ class Bucket {
     void decrementPool(bool &FromPool);
 
     // Get a slab to be used for chunked allocations.
-    decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool);
+    slab_list_item_t *getAvailSlab(bool &FromPool);
 
     // Get a slab that will be used as a whole for a single allocation.
-    decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool);
+    slab_list_item_t *getAvailFullSlab(bool &FromPool);
 };
 
 class DisjointPool::AllocImpl {
@@ -374,75 +380,65 @@ void Bucket::decrementPool(bool &FromPool) {
     OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize();
 }
 
-std::list<slab_t *>::iterator Bucket::getAvailFullSlab(bool &FromPool) {
+slab_list_item_t *Bucket::getAvailFullSlab(bool &FromPool) {
     // Return a slab that will be used for a single allocation.
-    if (AvailableSlabs.size() == 0) {
-        slab_t *slab = create_slab((bucket_t *)this,
-                                   sizeof(std::list<slab_t *>::iterator));
+    if (AvailableSlabs == NULL) {
+        slab_t *slab = create_slab((bucket_t *)this);
         if (slab == NULL) {
             throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
         }
 
         slab_reg(slab);
-        auto It = AvailableSlabs.insert(AvailableSlabs.begin(), slab);
-        slab_set_iterator(slab, &It);
-
+        DL_PREPEND(AvailableSlabs, slab->iter);
         FromPool = false;
         updateStats(1, 0);
     } else {
         decrementPool(FromPool);
     }
 
-    return AvailableSlabs.begin();
+    return AvailableSlabs;
 }
 
 void *Bucket::getSlab(bool &FromPool) {
     std::lock_guard<std::mutex> Lg(BucketLock);
 
-    auto SlabIt = getAvailFullSlab(FromPool);
-    slab_t *slab = *SlabIt;
-
+    slab_list_item_t *slab_it = getAvailFullSlab(FromPool);
+    slab_t *slab = slab_it->val;
     void *ptr = slab_get(slab);
-    auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), slab);
-    AvailableSlabs.erase(SlabIt);
-    slab_set_iterator(slab, &It);
+
+    DL_DELETE(AvailableSlabs, slab_it);
+    DL_PREPEND(UnavailableSlabs, slab_it);
     return ptr;
 }
 
 void Bucket::freeSlab(slab_t *slab, bool &ToPool) {
     std::lock_guard<std::mutex> Lg(BucketLock);
 
-    auto SlabIter = *(std::list<slab_t *>::iterator *)slab_get_iterator(slab);
-    assert(SlabIter != UnavailableSlabs.end());
+    slab_list_item_t *slab_it = slab->iter;
+    assert(slab_it->val != NULL);
     if (CanPool(ToPool)) {
-        auto It =
-            AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter));
-        UnavailableSlabs.erase(SlabIter);
-        slab_set_iterator(*It, &It);
+        DL_DELETE(UnavailableSlabs, slab_it);
+        DL_PREPEND(AvailableSlabs, slab_it);
     } else {
-        slab_unreg(*SlabIter);
-        destroy_slab(*SlabIter);
-        UnavailableSlabs.erase(SlabIter);
+        slab_unreg(slab_it->val);
+        DL_DELETE(UnavailableSlabs, slab_it);
+        destroy_slab(slab_it->val);
     }
 }
 
-auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) {
-
-    if (AvailableSlabs.size() == 0) {
-        slab_t *slab = create_slab((bucket_t *)this,
-                                   sizeof(std::list<slab_t *>::iterator));
+slab_list_item_t *Bucket::getAvailSlab(bool &FromPool) {
+    if (AvailableSlabs == NULL) {
+        slab_t *slab = create_slab((bucket_t *)this);
         if (slab == NULL) {
             throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
         }
 
         slab_reg(slab);
-        auto It = AvailableSlabs.insert(AvailableSlabs.begin(), slab);
-        slab_set_iterator(slab, &It);
-
+        DL_PREPEND(AvailableSlabs, slab->iter);
         updateStats(1, 0);
         FromPool = false;
     } else {
-        if (slab_get_num_allocated(*(AvailableSlabs.begin())) == 0) {
+        if (slab_get_num_allocated(AvailableSlabs->val) == 0) {
             // If this was an empty slab, it was in the pool.
             // Now it is no longer in the pool, so update count.
             --chunkedSlabsInPool;
@@ -453,20 +449,19 @@ auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) {
         }
     }
 
-    return AvailableSlabs.begin();
+    return AvailableSlabs;
 }
 
 void *Bucket::getChunk(bool &FromPool) {
     std::lock_guard<std::mutex> Lg(BucketLock);
 
-    auto SlabIt = getAvailSlab(FromPool);
-    auto *FreeChunk = slab_get_chunk((*SlabIt));
+    slab_list_item_t *slab_it = getAvailSlab(FromPool);
+    auto *FreeChunk = slab_get_chunk(slab_it->val);
 
     // If the slab is full, move it to unavailable slabs and update its iterator
-    if (!(slab_has_avail(*SlabIt))) {
-        auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), *SlabIt);
-        AvailableSlabs.erase(SlabIt);
-        slab_set_iterator(*It, &It);
+    if (!(slab_has_avail(slab_it->val))) {
+        DL_DELETE(AvailableSlabs, slab_it);
+        DL_PREPEND(UnavailableSlabs, slab_it);
     }
 
     return FreeChunk;
@@ -487,14 +482,10 @@ void Bucket::onFreeChunk(slab_t *slab, bool &ToPool) {
     // In case if the slab was previously full and now has 1 available
     // chunk, it should be moved to the list of available slabs
     if (slab_get_num_allocated(slab) == (slab_get_num_chunks(slab) - 1)) {
-        auto SlabIter =
-            *(std::list<slab_t *>::iterator *)slab_get_iterator(slab);
-        assert(SlabIter != UnavailableSlabs.end());
-
-        slab_t *slab = *SlabIter;
-        auto It = AvailableSlabs.insert(AvailableSlabs.begin(), slab);
-        UnavailableSlabs.erase(SlabIter);
-        slab_set_iterator(slab, &It);
+        slab_list_item_t *slab_it = slab->iter;
+        assert(slab_it->val != NULL);
+        DL_DELETE(UnavailableSlabs, slab_it);
+        DL_PREPEND(AvailableSlabs, slab_it);
     }
 
     // Check if slab is empty, and pool it if we can.
@@ -506,11 +497,11 @@ void Bucket::onFreeChunk(slab_t *slab, bool &ToPool) {
         if (!CanPool(ToPool)) {
             // Note: since the slab is stored as unique_ptr, just remove it from
             // the list to destroy the object.
-            auto It = *(std::list<slab_t *>::iterator *)slab_get_iterator(slab);
-            assert(It != AvailableSlabs.end());
-            slab_unreg(*It);
-            destroy_slab(*It);
-            AvailableSlabs.erase(It);
+            slab_list_item_t *slab_it = slab->iter;
+            assert(slab_it->val != NULL);
+            slab_unreg(slab_it->val);
+            DL_DELETE(AvailableSlabs, slab_it);
+            destroy_slab(slab_it->val);
         }
     }
 }
@@ -522,7 +513,11 @@ bool Bucket::CanPool(bool &ToPool) {
     if (chunkedBucket) {
         NewFreeSlabsInBucket = chunkedSlabsInPool + 1;
     } else {
-        NewFreeSlabsInBucket = AvailableSlabs.size() + 1;
+        // TODO optimize
+        size_t avail_num = 0;
+        slab_list_item_t *it = NULL;
+        DL_FOREACH(AvailableSlabs, it) { avail_num++; }
+        NewFreeSlabsInBucket = avail_num + 1;
     }
     if (Capacity() >= NewFreeSlabsInBucket) {
         size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize;
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
index 6ec1b7a9b..31b8023a0 100644
--- a/src/pool/pool_disjoint_temp.h
+++ b/src/pool/pool_disjoint_temp.h
@@ -7,6 +7,7 @@ void annotate_memory_inaccessible(void *ptr, size_t size);
 void annotate_memory_undefined(void *ptr, size_t size);
 
 typedef void *bucket_t;
+typedef struct slab_list_item_t slab_list_item_t;
 
 // Represents the allocated memory block of size 'slab_min_size'
 // Internally, it splits the memory block into chunks. The number of
@@ -34,11 +35,15 @@ typedef struct slab_t {
 
     // Store iterator to the corresponding node in avail/unavail list
     // to achieve O(1) removal
-    void *slab_list_iter;
-    size_t slab_list_iter_size;
+    slab_list_item_t *iter;
 } slab_t;
 
-slab_t *create_slab(bucket_t bucket, size_t iter_size);
+typedef struct slab_list_item_t {
+    slab_t *val;
+    struct slab_list_item_t *prev, *next;
+} slab_list_item_t;
+
+slab_t *create_slab(bucket_t bucket);
 void destroy_slab(slab_t *slab);
 
 void *slab_get(const slab_t *slab);
@@ -49,9 +54,6 @@ size_t slab_get_num_chunks(const slab_t *slab);
 size_t slab_get_chunk_size(const slab_t *slab);
 size_t slab_get_num_allocated(const slab_t *slab);
 
-void *slab_get_iterator(const slab_t *slab);
-void slab_set_iterator(slab_t *slab, void *it);
-
 bool slab_has_avail(const slab_t *slab);
 void slab_free_chunk(slab_t *slab, void *ptr);
 

From 50842f881c815b17fb2e2d4cc163fe68336cc56a Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Sat, 9 Nov 2024 18:18:16 +0100
Subject: [PATCH 04/26] use utils lock in bucket

---
 src/pool/pool_disjoint.c      | 135 ++++++++-
 src/pool/pool_disjoint.cpp    | 555 ++++++++++++----------------------
 src/pool/pool_disjoint_temp.h |  82 ++++-
 3 files changed, 396 insertions(+), 376 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index abbe505ff..06cdeabc9 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -16,6 +16,7 @@
 #include <umf/pools/pool_disjoint.h>
 
 #include "base_alloc_global.h"
+#include "uthash/utlist.h"
 #include "utils_common.h"
 #include "utils_concurrency.h"
 #include "utils_log.h"
@@ -50,15 +51,12 @@ extern "C" {
 #endif
 }
 
-size_t bucket_get_slab_min_size(const bucket_t bucket);
-size_t bucket_get_slab_alloc_size(const bucket_t bucket);
-size_t bucket_get_size(const bucket_t bucket);
-umf_memory_provider_handle_t bucket_get_provider(const bucket_t bucket);
+size_t bucket_get_size(bucket_t *bucket);
 
 void slab_reg(slab_t *slab);
 void slab_unreg(slab_t *slab);
 
-slab_t *create_slab(bucket_t bucket) {
+slab_t *create_slab(bucket_t *bucket) {
     // In case bucket size is not a multiple of SlabMinSize, we would have
     // some padding at the end of the slab.
     slab_t *slab = umf_ba_global_alloc(sizeof(slab_t));
@@ -73,14 +71,13 @@ slab_t *create_slab(bucket_t bucket) {
     slab->iter->val = slab;
     slab->iter->prev = slab->iter->next = NULL;
 
-    slab->num_chunks =
-        bucket_get_slab_min_size(bucket) / bucket_get_size(bucket);
+    slab->num_chunks = bucket_slab_min_size(bucket) / bucket_get_size(bucket);
     slab->chunks = umf_ba_global_alloc(sizeof(bool) * slab->num_chunks);
     memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks);
 
-    slab->slab_size = bucket_get_slab_alloc_size(bucket);
+    slab->slab_size = bucket_slab_alloc_size(bucket);
     umf_result_t res = umfMemoryProviderAlloc(
-        bucket_get_provider(bucket), slab->slab_size, 0, &slab->mem_ptr);
+        bucket_get_mem_handle(bucket), slab->slab_size, 0, &slab->mem_ptr);
 
     if (res == UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
         destroy_slab(slab);
@@ -88,18 +85,18 @@ slab_t *create_slab(bucket_t bucket) {
     }
 
     annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size);
-    fprintf(stderr, "[DP create_slab] bucket: %p, slab_size: %zu\n", bucket,
-            slab->slab_size);
+    fprintf(stderr, "[DP create_slab] bucket: %p, slab_size: %zu\n",
+            (void *)bucket, slab->slab_size);
 
     return slab;
 }
 
 void destroy_slab(slab_t *slab) {
     fprintf(stderr, "[DP destroy_slab] bucket: %p, slab_size: %zu\n",
-            slab->bucket, slab->slab_size);
+            (void *)slab->bucket, slab->slab_size);
 
-    umf_result_t res = umfMemoryProviderFree(bucket_get_provider(slab->bucket),
-                                             slab->mem_ptr, slab->slab_size);
+    umf_result_t res = umfMemoryProviderFree(
+        bucket_get_mem_handle(slab->bucket), slab->mem_ptr, slab->slab_size);
     assert(res == UMF_RESULT_SUCCESS);
     umf_ba_global_free(slab->chunks);
     umf_ba_global_free(slab->iter);
@@ -154,12 +151,12 @@ void *slab_get_chunk(slab_t *slab) {
 }
 
 void *slab_get_end(const slab_t *slab) {
-    return (uint8_t *)slab->mem_ptr + bucket_get_slab_min_size(slab->bucket);
+    return (uint8_t *)slab->mem_ptr + bucket_slab_min_size(slab->bucket);
 }
 
 // TODO remove? why need getter/setter?
 void *slab_get(const slab_t *slab) { return slab->mem_ptr; }
-bucket_t slab_get_bucket(const slab_t *slab) { return slab->bucket; }
+bucket_t *slab_get_bucket(slab_t *slab) { return slab->bucket; }
 size_t slab_get_chunk_size(const slab_t *slab) {
     return bucket_get_size(slab->bucket);
 }
@@ -195,6 +192,112 @@ bool slab_has_avail(const slab_t *slab) {
     return slab->num_allocated != slab->num_chunks;
 }
 
+void slab_reg(slab_t *slab) {
+    bucket_t *bucket = slab_get_bucket(slab);
+    void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
+                                          bucket_slab_min_size(bucket));
+    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket);
+
+    slab_reg_by_addr(start_addr, slab);
+    slab_reg_by_addr(end_addr, slab);
+}
+
+void slab_unreg(slab_t *slab) {
+    bucket_t *bucket = slab_get_bucket(slab);
+    void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
+                                          bucket_slab_min_size(bucket));
+    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket);
+
+    slab_unreg_by_addr(start_addr, slab);
+    slab_unreg_by_addr(end_addr, slab);
+}
+
+// The lock must be acquired before calling this method
+void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *ToPool) {
+    *ToPool = true;
+
+    // In case if the slab was previously full and now has 1 available
+    // chunk, it should be moved to the list of available slabs
+    if (slab_get_num_allocated(slab) == (slab_get_num_chunks(slab) - 1)) {
+        slab_list_item_t *slab_it = slab->iter;
+        assert(slab_it->val != NULL);
+        DL_DELETE(bucket->UnavailableSlabs, slab_it);
+        DL_PREPEND(bucket->AvailableSlabs, slab_it);
+    }
+
+    // Check if slab is empty, and pool it if we can.
+    if (slab_get_num_allocated(slab) == 0) {
+        // The slab is now empty.
+        // If pool has capacity then put the slab in the pool.
+        // The ToPool parameter indicates whether the Slab will be put in the
+        // pool or freed.
+        if (!bucket_can_pool(bucket, ToPool)) {
+            // Note: since the slab is stored as unique_ptr, just remove it from
+            // the list to destroy the object.
+            slab_list_item_t *slab_it = slab->iter;
+            assert(slab_it->val != NULL);
+            slab_unreg(slab_it->val);
+            DL_DELETE(bucket->AvailableSlabs, slab_it);
+            destroy_slab(slab_it->val);
+        }
+    }
+}
+
+// Return the allocation size of this bucket.
+size_t bucket_get_size(bucket_t *bucket) { return bucket->Size; }
+
+void *bucket_get_alloc_ctx(bucket_t *bucket) { return bucket->OwnAllocCtx; }
+
+void bucket_count_free(bucket_t *bucket) { ++bucket->freeCount; }
+
+void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *Slab,
+                       bool *ToPool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_free_chunk(Slab, ptr);
+    bucket_on_free_chunk(bucket, Slab, ToPool);
+
+    utils_mutex_unlock(&bucket->bucket_lock);
+}
+
+void bucket_count_alloc(bucket_t *bucket, bool FromPool) {
+    ++bucket->allocCount;
+    if (FromPool) {
+        ++bucket->allocPoolCount;
+    }
+}
+
+void *bucket_get_chunk(bucket_t *bucket, bool *FromPool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, FromPool);
+    void *free_chunk = slab_get_chunk(slab_it->val);
+
+    // If the slab is full, move it to unavailable slabs and update its iterator
+    if (!(slab_has_avail(slab_it->val))) {
+        DL_DELETE(bucket->AvailableSlabs, slab_it);
+        DL_PREPEND(bucket->UnavailableSlabs, slab_it);
+    }
+
+    utils_mutex_unlock(&bucket->bucket_lock);
+    return free_chunk;
+}
+
+size_t bucket_chunk_cut_off(bucket_t *bucket) {
+    return bucket_slab_min_size(bucket) / 2;
+}
+
+size_t bucket_slab_alloc_size(bucket_t *bucket) {
+    // return max
+    return (bucket_get_size(bucket) > bucket_slab_min_size(bucket))
+               ? bucket_get_size(bucket)
+               : bucket_slab_min_size(bucket);
+}
+
+size_t bucket_slab_min_size(bucket_t *bucket) {
+    return bucket_get_params(bucket)->SlabMinSize;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 26276fc85..74aeb9487 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -27,6 +27,7 @@
 #include "uthash/utlist.h"
 
 #include "../cpp_helpers.hpp"
+#include "base_alloc_global.h"
 #include "pool_disjoint.h"
 #include "umf.h"
 #include "utils_common.h"
@@ -41,7 +42,6 @@ extern "C" {
 
 #include "pool_disjoint_temp.h"
 
-class Bucket;
 struct slab_t;
 
 #ifdef __cplusplus
@@ -110,138 +110,40 @@ typedef struct MemoryProviderError {
     umf_result_t code;
 } MemoryProviderError_t;
 
-class Bucket {
-    const size_t Size;
-
-    // List of slabs which have at least 1 available chunk.
-    slab_list_item_t *AvailableSlabs;
-
-    // List of slabs with 0 available chunk.
-    slab_list_item_t *UnavailableSlabs;
-
-    // Protects the bucket and all the corresponding slabs
-    std::mutex BucketLock;
-
-    // Reference to the allocator context, used access memory allocation
-    // routines, slab map and etc.
-    DisjointPool::AllocImpl &OwnAllocCtx;
-
-    // For buckets used in chunked mode, a counter of slabs in the pool.
-    // For allocations that use an entire slab each, the entries in the Available
-    // list are entries in the pool.Each slab is available for a new
-    // allocation.The size of the Available list is the size of the pool.
-    // For allocations that use slabs in chunked mode, slabs will be in the
-    // Available list if any one or more of their chunks is free.The entire slab
-    // is not necessarily free, just some chunks in the slab are free. To
-    // implement pooling we will allow one slab in the Available list to be
-    // entirely empty. Normally such a slab would have been freed. But
-    // now we don't, and treat this slab as "in the pool".
-    // When a slab becomes entirely free we have to decide whether to return it
-    // to the provider or keep it allocated. A simple check for size of the
-    // Available list is not sufficient to check whether any slab has been
-    // pooled yet. We would have to traverse the entire Available list and check
-    // if any of them is entirely free. Instead we keep a counter of entirely
-    // empty slabs within the Available list to speed up the process of checking
-    // if a slab in this bucket is already pooled.
-    size_t chunkedSlabsInPool;
-
-    // Statistics
-    size_t allocPoolCount;
-    size_t freeCount;
-    size_t currSlabsInUse;
-    size_t currSlabsInPool;
-    size_t maxSlabsInPool;
+bucket_t *create_bucket(size_t Sz, DisjointPool::AllocImpl *AllocCtx) {
+    bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t));
 
-  public:
-    // Statistics
-    size_t allocCount;
-    size_t maxSlabsInUse;
-
-    Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx)
-        : Size{Sz}, OwnAllocCtx{AllocCtx} {
-        AvailableSlabs = NULL;
-        UnavailableSlabs = NULL;
-        chunkedSlabsInPool = 0;
-        allocPoolCount = 0;
-        freeCount = 0;
-        currSlabsInUse = 0;
-        currSlabsInPool = 0;
-        maxSlabsInPool = 0;
-        allocCount = 0;
-        maxSlabsInUse = 0;
-    }
-
-    ~Bucket() {
-        slab_list_item_t *it = NULL, *tmp = NULL;
-        // TODO check eng
-        // use extra tmp to store next iterator before the slab is destroyed
-        LL_FOREACH_SAFE(AvailableSlabs, it, tmp) { destroy_slab(it->val); }
-        LL_FOREACH_SAFE(UnavailableSlabs, it, tmp) { destroy_slab(it->val); }
-    }
-
-    // Get pointer to allocation that is one piece of an available slab in this
-    // bucket.
-    void *getChunk(bool &FromPool);
-
-    // Get pointer to allocation that is a full slab in this bucket.
-    void *getSlab(bool &FromPool);
-
-    // Return the allocation size of this bucket.
-    size_t getSize() const { return Size; }
-
-    // Free an allocation that is one piece of a slab in this bucket.
-    void freeChunk(void *Ptr, slab_t *Slab, bool &ToPool);
-
-    // Free an allocation that is a full slab in this bucket.
-    void freeSlab(slab_t *Slab, bool &ToPool);
-
-    umf_memory_provider_handle_t getMemHandle();
-
-    DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; }
-
-    // Check whether an allocation to be freed can be placed in the pool.
-    bool CanPool(bool &ToPool);
-
-    // The minimum allocation size for any slab.
-    size_t SlabMinSize();
+    bucket->Size = Sz;
+    bucket->OwnAllocCtx = AllocCtx;
+    bucket->AvailableSlabs = NULL;
+    bucket->UnavailableSlabs = NULL;
+    bucket->chunkedSlabsInPool = 0;
+    bucket->allocPoolCount = 0;
+    bucket->freeCount = 0;
+    bucket->currSlabsInUse = 0;
+    bucket->currSlabsInPool = 0;
+    bucket->maxSlabsInPool = 0;
+    bucket->allocCount = 0;
+    bucket->maxSlabsInUse = 0;
 
-    // The allocation size for a slab in this bucket.
-    size_t SlabAllocSize();
+    utils_mutex_init(&bucket->bucket_lock);
 
-    // The minimum size of a chunk from this bucket's slabs.
-    size_t ChunkCutOff();
-
-    // The number of slabs in this bucket that can be in the pool.
-    size_t Capacity();
-
-    // The maximum allocation size subject to pooling.
-    size_t MaxPoolableSize();
-
-    // Update allocation count
-    void countAlloc(bool FromPool);
-
-    // Update free count
-    void countFree();
-
-    // Update statistics of Available/Unavailable
-    void updateStats(int InUse, int InPool);
-
-    // Print bucket statistics
-    void printStats(bool &TitlePrinted, const std::string &Label);
-
-  private:
-    void onFreeChunk(slab_t *, bool &ToPool);
+    return bucket;
+}
 
-    // Update statistics of pool usage, and indicate that an allocation was made
-    // from the pool.
-    void decrementPool(bool &FromPool);
+void destroy_bucket(bucket_t *bucket) {
+    slab_list_item_t *it = NULL, *tmp = NULL;
+    // TODO check eng
+    // use extra tmp to store next iterator before the slab is destroyed
+    LL_FOREACH_SAFE(bucket->AvailableSlabs, it, tmp) { destroy_slab(it->val); }
+    LL_FOREACH_SAFE(bucket->UnavailableSlabs, it, tmp) {
+        destroy_slab(it->val);
+    }
 
-    // Get a slab to be used for chunked allocations.
-    slab_list_item_t *getAvailSlab(bool &FromPool);
+    utils_mutex_destroy_not_free(&bucket->bucket_lock);
 
-    // Get a slab that will be used as a whole for a single allocation.
-    slab_list_item_t *getAvailFullSlab(bool &FromPool);
-};
+    umf_ba_global_free(bucket);
+}
 
 class DisjointPool::AllocImpl {
     // It's important for the map to be destroyed last after buckets and their
@@ -253,7 +155,7 @@ class DisjointPool::AllocImpl {
     umf_memory_provider_handle_t MemHandle;
 
     // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
-    std::vector<std::unique_ptr<Bucket>> Buckets;
+    std::vector<bucket_t *> Buckets;
 
     // Configuration for this instance
     umf_disjoint_pool_params_t params;
@@ -285,10 +187,11 @@ class DisjointPool::AllocImpl {
         MinBucketSizeExp = (size_t)log2Utils(Size1);
         auto Size2 = Size1 + Size1 / 2;
         for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
-            Buckets.push_back(std::make_unique<Bucket>(Size1, *this));
-            Buckets.push_back(std::make_unique<Bucket>(Size2, *this));
+            // TODO copy allocimpl
+            Buckets.push_back(create_bucket(Size1, this));
+            Buckets.push_back(create_bucket(Size2, this));
         }
-        Buckets.push_back(std::make_unique<Bucket>(CutOff, *this));
+        Buckets.push_back(create_bucket(CutOff, this));
 
         auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr,
                                                    &ProviderMinPageSize);
@@ -297,7 +200,14 @@ class DisjointPool::AllocImpl {
         }
     }
 
-    ~AllocImpl() { VALGRIND_DO_DESTROY_MEMPOOL(this); }
+    ~AllocImpl() {
+
+        for (auto it = Buckets.begin(); it != Buckets.end(); it++) {
+            destroy_bucket(*it);
+        }
+
+        VALGRIND_DO_DESTROY_MEMPOOL(this);
+    }
 
     void *allocate(size_t Size, size_t Alignment, bool &FromPool);
     void *allocate(size_t Size, bool &FromPool);
@@ -329,7 +239,7 @@ class DisjointPool::AllocImpl {
                     size_t &HighPeakSlabsInUse, const std::string &Label);
 
   private:
-    Bucket &findBucket(size_t Size);
+    bucket_t *findBucket(size_t Size);
     std::size_t sizeToIdx(size_t Size);
 };
 
@@ -366,232 +276,129 @@ bool operator==(const slab_t &Lhs, const slab_t &Rhs) {
     return slab_get(&Lhs) == slab_get(&Rhs);
 }
 
-std::ostream &operator<<(std::ostream &Os, const slab_t &Slab) {
+/*
+std::ostream &operator<<(std::ostream &Os, slab_t &Slab) {
     Os << "Slab<" << slab_get(&Slab) << ", " << slab_get_end(&Slab) << ", "
-       << (*(Bucket *)slab_get_bucket(&Slab)).getSize() << ">";
+       << slab_get_bucket(&Slab)->getSize() << ">";
     return Os;
 }
+*/
 
 // If a slab was available in the pool then note that the current pooled
 // size has reduced by the size of a slab in this bucket.
-void Bucket::decrementPool(bool &FromPool) {
-    FromPool = true;
-    updateStats(1, -1);
-    OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize();
+void bucket_decrement_pool(bucket_t *bucket, bool *FromPool) {
+    *FromPool = true;
+    bucket_update_stats(bucket, 1, -1);
+    bucket_get_limits(bucket)->TotalSize -= bucket_slab_alloc_size(bucket);
 }
 
-slab_list_item_t *Bucket::getAvailFullSlab(bool &FromPool) {
+slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *FromPool) {
     // Return a slab that will be used for a single allocation.
-    if (AvailableSlabs == NULL) {
-        slab_t *slab = create_slab((bucket_t *)this);
+    if (bucket->AvailableSlabs == NULL) {
+        slab_t *slab = create_slab(bucket);
         if (slab == NULL) {
+            utils_mutex_unlock(&bucket->bucket_lock);
             throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
         }
 
         slab_reg(slab);
-        DL_PREPEND(AvailableSlabs, slab->iter);
-        FromPool = false;
-        updateStats(1, 0);
+        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
+        *FromPool = false;
+        bucket_update_stats(bucket, 1, 0);
     } else {
-        decrementPool(FromPool);
+        bucket_decrement_pool(bucket, FromPool);
     }
 
-    return AvailableSlabs;
+    return bucket->AvailableSlabs;
 }
 
-void *Bucket::getSlab(bool &FromPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
+void *bucket_get_slab(bucket_t *bucket, bool *FromPool) {
+    utils_mutex_lock(&bucket->bucket_lock);
 
-    slab_list_item_t *slab_it = getAvailFullSlab(FromPool);
+    slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, FromPool);
     slab_t *slab = slab_it->val;
     void *ptr = slab_get(slab);
 
-    DL_DELETE(AvailableSlabs, slab_it);
-    DL_PREPEND(UnavailableSlabs, slab_it);
+    DL_DELETE(bucket->AvailableSlabs, slab_it);
+    DL_PREPEND(bucket->UnavailableSlabs, slab_it);
+
+    utils_mutex_unlock(&bucket->bucket_lock);
     return ptr;
 }
 
-void Bucket::freeSlab(slab_t *slab, bool &ToPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
+void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *ToPool) {
+    utils_mutex_lock(&bucket->bucket_lock);
 
     slab_list_item_t *slab_it = slab->iter;
     assert(slab_it->val != NULL);
-    if (CanPool(ToPool)) {
-        DL_DELETE(UnavailableSlabs, slab_it);
-        DL_PREPEND(AvailableSlabs, slab_it);
+    if (bucket_can_pool(bucket, ToPool)) {
+        DL_DELETE(bucket->UnavailableSlabs, slab_it);
+        DL_PREPEND(bucket->AvailableSlabs, slab_it);
     } else {
         slab_unreg(slab_it->val);
-        DL_DELETE(UnavailableSlabs, slab_it);
+        DL_DELETE(bucket->UnavailableSlabs, slab_it);
         destroy_slab(slab_it->val);
     }
+    utils_mutex_unlock(&bucket->bucket_lock);
 }
 
-slab_list_item_t *Bucket::getAvailSlab(bool &FromPool) {
-    if (AvailableSlabs == NULL) {
-        slab_t *slab = create_slab((bucket_t *)this);
+slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *FromPool) {
+    if (bucket->AvailableSlabs == NULL) {
+        slab_t *slab = create_slab(bucket);
         if (slab == NULL) {
             throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
         }
 
         slab_reg(slab);
-        DL_PREPEND(AvailableSlabs, slab->iter);
-        updateStats(1, 0);
-        FromPool = false;
+        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
+        bucket_update_stats(bucket, 1, 0);
+        *FromPool = false;
     } else {
-        if (slab_get_num_allocated(AvailableSlabs->val) == 0) {
+        if (slab_get_num_allocated(bucket->AvailableSlabs->val) == 0) {
             // If this was an empty slab, it was in the pool.
             // Now it is no longer in the pool, so update count.
-            --chunkedSlabsInPool;
-            decrementPool(FromPool);
+            --bucket->chunkedSlabsInPool;
+            bucket_decrement_pool(bucket, FromPool);
         } else {
             // Allocation from existing slab is treated as from pool for statistics.
-            FromPool = true;
-        }
-    }
-
-    return AvailableSlabs;
-}
-
-void *Bucket::getChunk(bool &FromPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
-
-    slab_list_item_t *slab_it = getAvailSlab(FromPool);
-    auto *FreeChunk = slab_get_chunk(slab_it->val);
-
-    // If the slab is full, move it to unavailable slabs and update its iterator
-    if (!(slab_has_avail(slab_it->val))) {
-        DL_DELETE(AvailableSlabs, slab_it);
-        DL_PREPEND(UnavailableSlabs, slab_it);
-    }
-
-    return FreeChunk;
-}
-
-void Bucket::freeChunk(void *ptr, slab_t *Slab, bool &ToPool) {
-    std::lock_guard<std::mutex> Lg(BucketLock);
-
-    slab_free_chunk(Slab, ptr);
-
-    onFreeChunk(Slab, ToPool);
-}
-
-// The lock must be acquired before calling this method
-void Bucket::onFreeChunk(slab_t *slab, bool &ToPool) {
-    ToPool = true;
-
-    // In case if the slab was previously full and now has 1 available
-    // chunk, it should be moved to the list of available slabs
-    if (slab_get_num_allocated(slab) == (slab_get_num_chunks(slab) - 1)) {
-        slab_list_item_t *slab_it = slab->iter;
-        assert(slab_it->val != NULL);
-        DL_DELETE(UnavailableSlabs, slab_it);
-        DL_PREPEND(AvailableSlabs, slab_it);
-    }
-
-    // Check if slab is empty, and pool it if we can.
-    if (slab_get_num_allocated(slab) == 0) {
-        // The slab is now empty.
-        // If pool has capacity then put the slab in the pool.
-        // The ToPool parameter indicates whether the Slab will be put in the
-        // pool or freed.
-        if (!CanPool(ToPool)) {
-            // Note: since the slab is stored as unique_ptr, just remove it from
-            // the list to destroy the object.
-            slab_list_item_t *slab_it = slab->iter;
-            assert(slab_it->val != NULL);
-            slab_unreg(slab_it->val);
-            DL_DELETE(AvailableSlabs, slab_it);
-            destroy_slab(slab_it->val);
-        }
-    }
-}
-
-bool Bucket::CanPool(bool &ToPool) {
-    size_t NewFreeSlabsInBucket;
-    // Check if this bucket is used in chunked form or as full slabs.
-    bool chunkedBucket = getSize() <= ChunkCutOff();
-    if (chunkedBucket) {
-        NewFreeSlabsInBucket = chunkedSlabsInPool + 1;
-    } else {
-        // TODO optimize
-        size_t avail_num = 0;
-        slab_list_item_t *it = NULL;
-        DL_FOREACH(AvailableSlabs, it) { avail_num++; }
-        NewFreeSlabsInBucket = avail_num + 1;
-    }
-    if (Capacity() >= NewFreeSlabsInBucket) {
-        size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize;
-        while (true) {
-            size_t NewPoolSize = PoolSize + SlabAllocSize();
-
-            if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) {
-                break;
-            }
-
-            if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong(
-                    PoolSize, NewPoolSize)) {
-                if (chunkedBucket) {
-                    ++chunkedSlabsInPool;
-                }
-
-                updateStats(-1, 1);
-                ToPool = true;
-                return true;
-            }
+            *FromPool = true;
         }
     }
 
-    updateStats(-1, 0);
-    ToPool = false;
-    return false;
-}
-
-umf_memory_provider_handle_t Bucket::getMemHandle() {
-    return OwnAllocCtx.getMemHandle();
+    return bucket->AvailableSlabs;
 }
 
-size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; }
-
-size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); }
-
-size_t Bucket::Capacity() {
+size_t bucket_capacity(bucket_t *bucket) {
     // For buckets used in chunked mode, just one slab in pool is sufficient.
     // For larger buckets, the capacity could be more and is adjustable.
-    if (getSize() <= ChunkCutOff()) {
+    if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
         return 1;
     } else {
-        return OwnAllocCtx.getParams().Capacity;
+        return bucket_get_params(bucket)->Capacity;
     }
 }
 
-size_t Bucket::MaxPoolableSize() {
-    return OwnAllocCtx.getParams().MaxPoolableSize;
-}
-
-size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; }
-
-void Bucket::countAlloc(bool FromPool) {
-    ++allocCount;
-    if (FromPool) {
-        ++allocPoolCount;
-    }
+size_t bucket_max_poolable_size(bucket_t *bucket) {
+    return bucket_get_params(bucket)->MaxPoolableSize;
 }
 
-void Bucket::countFree() { ++freeCount; }
-
-void Bucket::updateStats(int InUse, int InPool) {
-    if (OwnAllocCtx.getParams().PoolTrace == 0) {
+void bucket_update_stats(bucket_t *bucket, int InUse, int InPool) {
+    if (bucket_get_params(bucket)->PoolTrace == 0) {
         return;
     }
-    currSlabsInUse += InUse;
-    maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse);
-    currSlabsInPool += InPool;
-    maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool);
+    bucket->currSlabsInUse += InUse;
+    bucket->maxSlabsInUse =
+        std::max(bucket->currSlabsInUse, bucket->maxSlabsInUse);
+    bucket->currSlabsInPool += InPool;
+    bucket->maxSlabsInPool =
+        std::max(bucket->currSlabsInPool, bucket->maxSlabsInPool);
     // Increment or decrement current pool sizes based on whether
     // slab was added to or removed from pool.
-    OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize();
+    bucket_get_params(bucket)->CurPoolSize +=
+        InPool * bucket_slab_alloc_size(bucket);
 }
 
+/*
 void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
     if (allocCount) {
         if (!TitlePrinted) {
@@ -609,6 +416,7 @@ void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
                   << std::setw(21) << maxSlabsInPool << std::endl;
     }
 }
+*/
 
 void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try {
     void *Ptr;
@@ -624,20 +432,20 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try {
         return Ptr;
     }
 
-    auto &Bucket = findBucket(Size);
+    bucket_t *bucket = findBucket(Size);
 
-    if (Size > Bucket.ChunkCutOff()) {
-        Ptr = Bucket.getSlab(FromPool);
+    if (Size > bucket_chunk_cut_off(bucket)) {
+        Ptr = bucket_get_slab(bucket, &FromPool);
     } else {
-        Ptr = Bucket.getChunk(FromPool);
+        Ptr = bucket_get_chunk(bucket, &FromPool);
     }
 
     if (getParams().PoolTrace > 1) {
-        Bucket.countAlloc(FromPool);
+        bucket_count_alloc(bucket, FromPool);
     }
 
     VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size);
-    annotate_memory_undefined(Ptr, Bucket.getSize());
+    annotate_memory_undefined(Ptr, bucket_get_size(bucket));
 
     return Ptr;
 } catch (MemoryProviderError &e) {
@@ -679,16 +487,16 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
         return Ptr;
     }
 
-    auto &Bucket = findBucket(AlignedSize);
+    bucket_t *bucket = findBucket(AlignedSize);
 
-    if (AlignedSize > Bucket.ChunkCutOff()) {
-        Ptr = Bucket.getSlab(FromPool);
+    if (AlignedSize > bucket_chunk_cut_off(bucket)) {
+        Ptr = bucket_get_slab(bucket, &FromPool);
     } else {
-        Ptr = Bucket.getChunk(FromPool);
+        Ptr = bucket_get_chunk(bucket, &FromPool);
     }
 
     if (getParams().PoolTrace > 1) {
-        Bucket.countAlloc(FromPool);
+        bucket_count_alloc(bucket, FromPool);
     }
 
     VALGRIND_DO_MEMPOOL_ALLOC(this, ALIGN_UP((size_t)Ptr, Alignment), Size);
@@ -720,14 +528,16 @@ std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) {
     return index;
 }
 
-Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) {
+bucket_t *DisjointPool::AllocImpl::findBucket(size_t Size) {
     auto calculatedIdx = sizeToIdx(Size);
-    assert((*(Buckets[calculatedIdx])).getSize() >= Size);
+    bucket_t *bucket = Buckets[calculatedIdx];
+    assert(bucket_get_size(bucket) >= Size);
     if (calculatedIdx > 0) {
-        assert((*(Buckets[calculatedIdx - 1])).getSize() < Size);
+        bucket_t *bucket_prev = Buckets[calculatedIdx - 1];
+        assert(bucket_get_size(bucket_prev) < Size);
     }
 
-    return *(Buckets[calculatedIdx]);
+    return Buckets[calculatedIdx];
 }
 
 void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
@@ -756,18 +566,18 @@ void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
             // Unlock the map before freeing the chunk, it may be locked on write
             // there
             Lk.unlock();
-            auto bucket = (Bucket *)slab_get_bucket(Slab);
+            bucket_t *bucket = slab_get_bucket(Slab);
 
             if (getParams().PoolTrace > 1) {
-                bucket->countFree();
+                bucket_count_free(bucket);
             }
 
             VALGRIND_DO_MEMPOOL_FREE(this, Ptr);
-            annotate_memory_inaccessible(Ptr, bucket->getSize());
-            if (bucket->getSize() <= bucket->ChunkCutOff()) {
-                bucket->freeChunk(Ptr, Slab, ToPool);
+            annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
+            if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
+                bucket_free_chunk(bucket, Ptr, Slab, &ToPool);
             } else {
-                bucket->freeSlab(Slab, ToPool);
+                bucket_free_slab(bucket, Slab, &ToPool);
             }
 
             return;
@@ -785,13 +595,20 @@ void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
                                          size_t &HighBucketSize,
                                          size_t &HighPeakSlabsInUse,
                                          const std::string &MTName) {
+    (void)TitlePrinted; // TODO
+    (void)MTName;       // TODO
+
     HighBucketSize = 0;
     HighPeakSlabsInUse = 0;
     for (auto &B : Buckets) {
-        (*B).printStats(TitlePrinted, MTName);
-        HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse);
+        // TODO
+        //(*B).printStats(TitlePrinted, MTName);
+        bucket_t *bucket = B;
+        HighPeakSlabsInUse =
+            std::max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
         if ((*B).allocCount) {
-            HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize);
+            HighBucketSize =
+                std::max(bucket_slab_alloc_size(bucket), HighBucketSize);
         }
     }
 }
@@ -915,47 +732,56 @@ umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
 #ifdef __cplusplus
 extern "C" {
 #endif
-size_t bucket_get_slab_min_size(const bucket_t bucket) {
-    return ((Bucket *)bucket)->SlabMinSize();
+
+umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket) {
+    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
+    return &t->getParams();
 }
 
-size_t bucket_get_slab_alloc_size(const bucket_t bucket) {
-    return ((Bucket *)bucket)->SlabAllocSize();
+umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket) {
+    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
+    return t->getLimits();
 }
 
-size_t bucket_get_size(const bucket_t bucket) {
-    return ((Bucket *)bucket)->getSize();
+umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket) {
+    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
+    return t->getMemHandle();
 }
 
-umf_memory_provider_handle_t bucket_get_provider(const bucket_t bucket) {
-    return ((Bucket *)bucket)->getMemHandle();
+std::unordered_multimap<void *, slab_t *> *
+bucket_get_known_slabs(bucket_t *bucket) {
+    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
+    return &t->getKnownSlabs();
+}
+
+std::shared_timed_mutex *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
+    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
+    return &t->getKnownSlabsMapLock();
 }
 
 void slab_reg_by_addr(void *addr, slab_t *slab) {
-    auto &Lock =
-        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabsMapLock();
-    auto &Map =
-        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabs();
+    bucket_t *bucket = slab_get_bucket(slab);
+    auto Lock = bucket_get_known_slabs_map_lock(bucket);
+    auto Map = bucket_get_known_slabs(bucket);
 
-    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
-    Map.insert({addr, slab});
+    std::lock_guard<std::shared_timed_mutex> Lg(*Lock);
+    Map->insert({addr, slab});
 }
 
 void slab_unreg_by_addr(void *addr, slab_t *slab) {
-    auto &Lock =
-        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabsMapLock();
-    auto &Map =
-        ((Bucket *)slab_get_bucket(slab))->getAllocCtx().getKnownSlabs();
+    bucket_t *bucket = slab_get_bucket(slab);
+    auto Lock = bucket_get_known_slabs_map_lock(bucket);
+    auto Map = bucket_get_known_slabs(bucket);
 
-    std::lock_guard<std::shared_timed_mutex> Lg(Lock);
+    std::lock_guard<std::shared_timed_mutex> Lg(*Lock);
 
-    auto Slabs = Map.equal_range(addr);
+    auto Slabs = Map->equal_range(addr);
     // At least the must get the current slab from the map.
     assert(Slabs.first != Slabs.second && "Slab is not found");
 
     for (auto It = Slabs.first; It != Slabs.second; ++It) {
         if (It->second == slab) {
-            Map.erase(It);
+            Map->erase(It);
             return;
         }
     }
@@ -963,24 +789,45 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
     assert(false && "Slab is not found");
 }
 
-void slab_reg(slab_t *slab) {
-    Bucket *bucket = (Bucket *)slab_get_bucket(slab);
-    void *start_addr =
-        (void *)ALIGN_DOWN((size_t)slab_get(slab), bucket->SlabMinSize());
-    void *end_addr = static_cast<char *>(start_addr) + bucket->SlabMinSize();
+bool bucket_can_pool(bucket_t *bucket, bool *ToPool) {
+    size_t NewFreeSlabsInBucket;
+    // Check if this bucket is used in chunked form or as full slabs.
+    bool chunkedBucket =
+        bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket);
+    if (chunkedBucket) {
+        NewFreeSlabsInBucket = bucket->chunkedSlabsInPool + 1;
+    } else {
+        // TODO optimize
+        size_t avail_num = 0;
+        slab_list_item_t *it = NULL;
+        DL_FOREACH(bucket->AvailableSlabs, it) { avail_num++; }
+        NewFreeSlabsInBucket = avail_num + 1;
+    }
+    if (bucket_capacity(bucket) >= NewFreeSlabsInBucket) {
+        size_t PoolSize = bucket_get_limits(bucket)->TotalSize;
+        while (true) {
+            size_t NewPoolSize = PoolSize + bucket_slab_alloc_size(bucket);
 
-    slab_reg_by_addr(start_addr, slab);
-    slab_reg_by_addr(end_addr, slab);
-}
+            if (bucket_get_limits(bucket)->MaxSize < NewPoolSize) {
+                break;
+            }
 
-void slab_unreg(slab_t *slab) {
-    Bucket *bucket = (Bucket *)slab_get_bucket(slab);
-    void *start_addr =
-        (void *)ALIGN_DOWN((size_t)slab_get(slab), bucket->SlabMinSize());
-    void *end_addr = static_cast<char *>(start_addr) + bucket->SlabMinSize();
+            if (bucket_get_limits(bucket)->TotalSize.compare_exchange_strong(
+                    PoolSize, NewPoolSize)) {
+                if (chunkedBucket) {
+                    ++bucket->chunkedSlabsInPool;
+                }
 
-    slab_unreg_by_addr(start_addr, slab);
-    slab_unreg_by_addr(end_addr, slab);
+                bucket_update_stats(bucket, -1, 1);
+                *ToPool = true;
+                return true;
+            }
+        }
+    }
+
+    bucket_update_stats(bucket, -1, 0);
+    *ToPool = false;
+    return false;
 }
 
 #ifdef __cplusplus
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
index 31b8023a0..a257ca14c 100644
--- a/src/pool/pool_disjoint_temp.h
+++ b/src/pool/pool_disjoint_temp.h
@@ -6,9 +6,56 @@
 void annotate_memory_inaccessible(void *ptr, size_t size);
 void annotate_memory_undefined(void *ptr, size_t size);
 
-typedef void *bucket_t;
 typedef struct slab_list_item_t slab_list_item_t;
 
+typedef struct bucket_t {
+    size_t Size;
+
+    // List of slabs which have at least 1 available chunk.
+    slab_list_item_t *AvailableSlabs;
+
+    // List of slabs with 0 available chunk.
+    slab_list_item_t *UnavailableSlabs;
+
+    // Protects the bucket and all the corresponding slabs
+    utils_mutex_t bucket_lock;
+
+    // Reference to the allocator context, used access memory allocation
+    // routines, slab map and etc.
+    void *OwnAllocCtx;
+
+    // For buckets used in chunked mode, a counter of slabs in the pool.
+    // For allocations that use an entire slab each, the entries in the Available
+    // list are entries in the pool.Each slab is available for a new
+    // allocation.The size of the Available list is the size of the pool.
+    // For allocations that use slabs in chunked mode, slabs will be in the
+    // Available list if any one or more of their chunks is free.The entire slab
+    // is not necessarily free, just some chunks in the slab are free. To
+    // implement pooling we will allow one slab in the Available list to be
+    // entirely empty. Normally such a slab would have been freed. But
+    // now we don't, and treat this slab as "in the pool".
+    // When a slab becomes entirely free we have to decide whether to return it
+    // to the provider or keep it allocated. A simple check for size of the
+    // Available list is not sufficient to check whether any slab has been
+    // pooled yet.We would have to traverse the entire Available listand check
+    // if any of them is entirely free. Instead we keep a counter of entirely
+    // empty slabs within the Available list to speed up the process of checking
+    // if a slab in this bucket is already pooled.
+    size_t chunkedSlabsInPool;
+
+    // Statistics
+    size_t allocPoolCount;
+    size_t freeCount;
+    size_t currSlabsInUse;
+    size_t currSlabsInPool;
+    size_t maxSlabsInPool;
+
+    // Statistics
+    size_t allocCount;
+    size_t maxSlabsInUse;
+
+} bucket_t;
+
 // Represents the allocated memory block of size 'slab_min_size'
 // Internally, it splits the memory block into chunks. The number of
 // chunks depends of the size of a Bucket which created the Slab.
@@ -28,7 +75,7 @@ typedef struct slab_t {
     size_t num_allocated;
 
     // The bucket which the slab belongs to
-    bucket_t bucket;
+    bucket_t *bucket;
 
     // Hints where to start search for free chunk in a slab
     size_t first_free_chunk_idx;
@@ -43,12 +90,12 @@ typedef struct slab_list_item_t {
     struct slab_list_item_t *prev, *next;
 } slab_list_item_t;
 
-slab_t *create_slab(bucket_t bucket);
+slab_t *create_slab(bucket_t *bucket);
 void destroy_slab(slab_t *slab);
 
 void *slab_get(const slab_t *slab);
 void *slab_get_end(const slab_t *slab);
-bucket_t slab_get_bucket(const slab_t *slab);
+bucket_t *slab_get_bucket(slab_t *slab);
 void *slab_get_chunk(slab_t *slab);
 size_t slab_get_num_chunks(const slab_t *slab);
 size_t slab_get_chunk_size(const slab_t *slab);
@@ -58,6 +105,29 @@ bool slab_has_avail(const slab_t *slab);
 void slab_free_chunk(slab_t *slab, void *ptr);
 
 void slab_reg(slab_t *slab);
+void slab_reg_by_addr(void *addr, slab_t *slab);
 void slab_unreg(slab_t *slab);
-
-#endif // TEMP_H
\ No newline at end of file
+void slab_unreg_by_addr(void *addr, slab_t *slab);
+
+void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool);
+size_t bucket_slab_alloc_size(bucket_t *bucket);
+bool bucket_can_pool(bucket_t *bucket, bool *to_pool);
+void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool);
+void bucket_decrement_pool(bucket_t *bucket, bool *from_pool);
+void *bucket_get_chunk(bucket_t *bucket, bool *from_pool);
+void *bucket_get_slab(bucket_t *bucket, bool *from_pool);
+size_t bucket_get_size(bucket_t *bucket);
+size_t bucket_chunk_cut_off(bucket_t *bucket);
+size_t bucket_capacity(bucket_t *bucket);
+size_t bucket_slab_min_size(bucket_t *bucket);
+void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *Slab,
+                       bool *to_pool);
+void bucket_count_alloc(bucket_t *bucket, bool from_pool);
+void bucket_count_free(bucket_t *bucket);
+
+umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
+umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket);
+umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
+slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
+
+#endif // TEMP_H

From b487769e9b904fa4a5b61ed885bccc1e19ec619f Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Sun, 10 Nov 2024 17:29:45 +0100
Subject: [PATCH 05/26] add utils_min/max functions

---
 src/pool/pool_disjoint.cpp | 14 ++++++++------
 src/utils/utils_common.c   |  3 +++
 src/utils/utils_common.h   |  4 ++++
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 74aeb9487..a7d031513 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -180,9 +180,9 @@ class DisjointPool::AllocImpl {
         // Powers of 2 and the value halfway between the powers of 2.
         auto Size1 = this->params.MinBucketSize;
         // MinBucketSize cannot be larger than CutOff.
-        Size1 = std::min(Size1, CutOff);
+        Size1 = utils_min(Size1, CutOff);
         // Buckets sized smaller than the bucket default size- 8 aren't needed.
-        Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
+        Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
         // Calculate the exponent for MinBucketSize used for finding buckets.
         MinBucketSizeExp = (size_t)log2Utils(Size1);
         auto Size2 = Size1 + Size1 / 2;
@@ -387,11 +387,13 @@ void bucket_update_stats(bucket_t *bucket, int InUse, int InPool) {
         return;
     }
     bucket->currSlabsInUse += InUse;
+
     bucket->maxSlabsInUse =
-        std::max(bucket->currSlabsInUse, bucket->maxSlabsInUse);
+        utils_max(bucket->currSlabsInUse, bucket->maxSlabsInUse);
     bucket->currSlabsInPool += InPool;
     bucket->maxSlabsInPool =
-        std::max(bucket->currSlabsInPool, bucket->maxSlabsInPool);
+        utils_max(bucket->currSlabsInPool, bucket->maxSlabsInPool);
+
     // Increment or decrement current pool sizes based on whether
     // slab was added to or removed from pool.
     bucket_get_params(bucket)->CurPoolSize +=
@@ -605,10 +607,10 @@ void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
         //(*B).printStats(TitlePrinted, MTName);
         bucket_t *bucket = B;
         HighPeakSlabsInUse =
-            std::max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
+            utils_max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
         if ((*B).allocCount) {
             HighBucketSize =
-                std::max(bucket_slab_alloc_size(bucket), HighBucketSize);
+                utils_max(bucket_slab_alloc_size(bucket), HighBucketSize);
         }
     }
 }
diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c
index 25169f6cf..984e91a6c 100644
--- a/src/utils/utils_common.c
+++ b/src/utils/utils_common.c
@@ -138,3 +138,6 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max,
     *out_flags = out_f;
     return UMF_RESULT_SUCCESS;
 }
+
+size_t utils_max(size_t a, size_t b) { return a > b ? a : b; }
+size_t utils_min(size_t a, size_t b) { return a < b ? a : b; }
\ No newline at end of file
diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h
index c25fda2ab..9b226246b 100644
--- a/src/utils/utils_common.h
+++ b/src/utils/utils_common.h
@@ -153,6 +153,10 @@ int utils_file_open_or_create(const char *path);
 
 int utils_fallocate(int fd, long offset, long len);
 
+size_t utils_max(size_t a, size_t b);
+
+size_t utils_min(size_t a, size_t b);
+
 #ifdef __cplusplus
 }
 #endif

From dbacd44e11ceba393c0c2197396ae84ba31dc83b Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Sun, 10 Nov 2024 17:37:33 +0100
Subject: [PATCH 06/26] make bucket C structure 2

---
 src/pool/pool_disjoint.c      | 155 ++++++++++++++++++++
 src/pool/pool_disjoint.cpp    | 257 +++++++++-------------------------
 src/pool/pool_disjoint_temp.h |  16 ++-
 3 files changed, 232 insertions(+), 196 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 06cdeabc9..79416bc6d 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -212,6 +212,41 @@ void slab_unreg(slab_t *slab) {
     slab_unreg_by_addr(end_addr, slab);
 }
 
+bucket_t *create_bucket(size_t Sz, void *AllocCtx) {
+    bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t));
+
+    bucket->Size = Sz;
+    bucket->OwnAllocCtx = AllocCtx;
+    bucket->AvailableSlabs = NULL;
+    bucket->UnavailableSlabs = NULL;
+    bucket->chunkedSlabsInPool = 0;
+    bucket->allocPoolCount = 0;
+    bucket->freeCount = 0;
+    bucket->currSlabsInUse = 0;
+    bucket->currSlabsInPool = 0;
+    bucket->maxSlabsInPool = 0;
+    bucket->allocCount = 0;
+    bucket->maxSlabsInUse = 0;
+
+    utils_mutex_init(&bucket->bucket_lock);
+
+    return bucket;
+}
+
+void destroy_bucket(bucket_t *bucket) {
+    slab_list_item_t *it = NULL, *tmp = NULL;
+    // TODO check eng
+    // use extra tmp to store next iterator before the slab is destroyed
+    LL_FOREACH_SAFE(bucket->AvailableSlabs, it, tmp) { destroy_slab(it->val); }
+    LL_FOREACH_SAFE(bucket->UnavailableSlabs, it, tmp) {
+        destroy_slab(it->val);
+    }
+
+    utils_mutex_destroy_not_free(&bucket->bucket_lock);
+
+    umf_ba_global_free(bucket);
+}
+
 // The lock must be acquired before calling this method
 void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *ToPool) {
     *ToPool = true;
@@ -271,6 +306,11 @@ void *bucket_get_chunk(bucket_t *bucket, bool *FromPool) {
     utils_mutex_lock(&bucket->bucket_lock);
 
     slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, FromPool);
+    if (slab_it == NULL) {
+        utils_mutex_unlock(&bucket->bucket_lock);
+        return NULL;
+    }
+
     void *free_chunk = slab_get_chunk(slab_it->val);
 
     // If the slab is full, move it to unavailable slabs and update its iterator
@@ -298,6 +338,121 @@ size_t bucket_slab_min_size(bucket_t *bucket) {
     return bucket_get_params(bucket)->SlabMinSize;
 }
 
+slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket,
+                                             bool *from_pool) {
+    // Return a slab that will be used for a single allocation.
+    if (bucket->AvailableSlabs == NULL) {
+        slab_t *slab = create_slab(bucket);
+        if (slab == NULL) {
+            //assert(0);
+            return NULL;
+        }
+
+        slab_reg(slab);
+        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
+        *from_pool = false;
+        bucket_update_stats(bucket, 1, 0);
+    } else {
+        bucket_decrement_pool(bucket, from_pool);
+    }
+
+    return bucket->AvailableSlabs;
+}
+
+void *bucket_get_slab(bucket_t *bucket, bool *from_pool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool);
+    if (slab_it == NULL) {
+        utils_mutex_unlock(&bucket->bucket_lock);
+        return NULL;
+    }
+    slab_t *slab = slab_it->val;
+    void *ptr = slab_get(slab);
+
+    DL_DELETE(bucket->AvailableSlabs, slab_it);
+    DL_PREPEND(bucket->UnavailableSlabs, slab_it);
+
+    utils_mutex_unlock(&bucket->bucket_lock);
+    return ptr;
+}
+
+void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) {
+    utils_mutex_lock(&bucket->bucket_lock);
+
+    slab_list_item_t *slab_it = slab->iter;
+    assert(slab_it->val != NULL);
+    if (bucket_can_pool(bucket, to_pool)) {
+        DL_DELETE(bucket->UnavailableSlabs, slab_it);
+        DL_PREPEND(bucket->AvailableSlabs, slab_it);
+    } else {
+        slab_unreg(slab_it->val);
+        DL_DELETE(bucket->UnavailableSlabs, slab_it);
+        destroy_slab(slab_it->val);
+    }
+    utils_mutex_unlock(&bucket->bucket_lock);
+}
+
+slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) {
+    if (bucket->AvailableSlabs == NULL) {
+        slab_t *slab = create_slab(bucket);
+        if (slab == NULL) {
+            // TODO log
+            // TODO replace asserts
+            return NULL;
+        }
+
+        slab_reg(slab);
+        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
+        bucket_update_stats(bucket, 1, 0);
+        *from_pool = false;
+    } else {
+        if (slab_get_num_allocated(bucket->AvailableSlabs->val) == 0) {
+            // If this was an empty slab, it was in the pool.
+            // Now it is no longer in the pool, so update count.
+            --bucket->chunkedSlabsInPool;
+            bucket_decrement_pool(bucket, from_pool);
+        } else {
+            // Allocation from existing slab is treated as from pool for statistics.
+            *from_pool = true;
+        }
+    }
+
+    return bucket->AvailableSlabs;
+}
+
+size_t bucket_capacity(bucket_t *bucket) {
+    // For buckets used in chunked mode, just one slab in pool is sufficient.
+    // For larger buckets, the capacity could be more and is adjustable.
+    if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
+        return 1;
+    } else {
+        return bucket_get_params(bucket)->Capacity;
+    }
+}
+
+size_t bucket_max_poolable_size(bucket_t *bucket) {
+    return bucket_get_params(bucket)->MaxPoolableSize;
+}
+
+void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) {
+    if (bucket_get_params(bucket)->PoolTrace == 0) {
+        return;
+    }
+
+    bucket->currSlabsInUse += in_use;
+    bucket->maxSlabsInUse =
+        utils_max(bucket->currSlabsInUse, bucket->maxSlabsInUse);
+    bucket->currSlabsInPool += in_pool;
+    bucket->maxSlabsInPool =
+        utils_max(bucket->currSlabsInPool, bucket->maxSlabsInPool);
+
+    // Increment or decrement current pool sizes based on whether
+    // slab was added to or removed from pool.
+    bucket_get_params(bucket)->CurPoolSize +=
+        in_pool * bucket_slab_alloc_size(bucket);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index a7d031513..ce265e019 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -110,41 +110,6 @@ typedef struct MemoryProviderError {
     umf_result_t code;
 } MemoryProviderError_t;
 
-bucket_t *create_bucket(size_t Sz, DisjointPool::AllocImpl *AllocCtx) {
-    bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t));
-
-    bucket->Size = Sz;
-    bucket->OwnAllocCtx = AllocCtx;
-    bucket->AvailableSlabs = NULL;
-    bucket->UnavailableSlabs = NULL;
-    bucket->chunkedSlabsInPool = 0;
-    bucket->allocPoolCount = 0;
-    bucket->freeCount = 0;
-    bucket->currSlabsInUse = 0;
-    bucket->currSlabsInPool = 0;
-    bucket->maxSlabsInPool = 0;
-    bucket->allocCount = 0;
-    bucket->maxSlabsInUse = 0;
-
-    utils_mutex_init(&bucket->bucket_lock);
-
-    return bucket;
-}
-
-void destroy_bucket(bucket_t *bucket) {
-    slab_list_item_t *it = NULL, *tmp = NULL;
-    // TODO check eng
-    // use extra tmp to store next iterator before the slab is destroyed
-    LL_FOREACH_SAFE(bucket->AvailableSlabs, it, tmp) { destroy_slab(it->val); }
-    LL_FOREACH_SAFE(bucket->UnavailableSlabs, it, tmp) {
-        destroy_slab(it->val);
-    }
-
-    utils_mutex_destroy_not_free(&bucket->bucket_lock);
-
-    umf_ba_global_free(bucket);
-}
-
 class DisjointPool::AllocImpl {
     // It's important for the map to be destroyed last after buckets and their
     // slabs This is because slab's destructor removes the object from the map.
@@ -180,9 +145,9 @@ class DisjointPool::AllocImpl {
         // Powers of 2 and the value halfway between the powers of 2.
         auto Size1 = this->params.MinBucketSize;
         // MinBucketSize cannot be larger than CutOff.
-        Size1 = utils_min(Size1, CutOff);
+        Size1 = std::min(Size1, CutOff);
         // Buckets sized smaller than the bucket default size- 8 aren't needed.
-        Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
+        Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
         // Calculate the exponent for MinBucketSize used for finding buckets.
         MinBucketSizeExp = (size_t)log2Utils(Size1);
         auto Size2 = Size1 + Size1 / 2;
@@ -211,7 +176,7 @@ class DisjointPool::AllocImpl {
 
     void *allocate(size_t Size, size_t Alignment, bool &FromPool);
     void *allocate(size_t Size, bool &FromPool);
-    void deallocate(void *Ptr, bool &ToPool);
+    umf_result_t deallocate(void *Ptr, bool &ToPool);
 
     umf_memory_provider_handle_t getMemHandle() { return MemHandle; }
 
@@ -248,14 +213,15 @@ static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
     void *ptr;
     auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
     if (ret != UMF_RESULT_SUCCESS) {
-        throw MemoryProviderError{ret};
+        umf::getPoolLastStatusRef<DisjointPool>() = ret;
+        return NULL;
     }
     annotate_memory_inaccessible(ptr, size);
     return ptr;
 }
 
-static void memoryProviderFree(umf_memory_provider_handle_t hProvider,
-                               void *ptr) {
+static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider,
+                                       void *ptr) {
     size_t size = 0;
 
     if (ptr) {
@@ -268,8 +234,12 @@ static void memoryProviderFree(umf_memory_provider_handle_t hProvider,
 
     auto ret = umfMemoryProviderFree(hProvider, ptr, size);
     if (ret != UMF_RESULT_SUCCESS) {
-        throw MemoryProviderError{ret};
+
+        umf::getPoolLastStatusRef<DisjointPool>() = ret;
+        // throw MemoryProviderError{ret};
+        return ret;
     }
+    return UMF_RESULT_SUCCESS;
 }
 
 bool operator==(const slab_t &Lhs, const slab_t &Rhs) {
@@ -292,114 +262,6 @@ void bucket_decrement_pool(bucket_t *bucket, bool *FromPool) {
     bucket_get_limits(bucket)->TotalSize -= bucket_slab_alloc_size(bucket);
 }
 
-slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *FromPool) {
-    // Return a slab that will be used for a single allocation.
-    if (bucket->AvailableSlabs == NULL) {
-        slab_t *slab = create_slab(bucket);
-        if (slab == NULL) {
-            utils_mutex_unlock(&bucket->bucket_lock);
-            throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
-        }
-
-        slab_reg(slab);
-        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
-        *FromPool = false;
-        bucket_update_stats(bucket, 1, 0);
-    } else {
-        bucket_decrement_pool(bucket, FromPool);
-    }
-
-    return bucket->AvailableSlabs;
-}
-
-void *bucket_get_slab(bucket_t *bucket, bool *FromPool) {
-    utils_mutex_lock(&bucket->bucket_lock);
-
-    slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, FromPool);
-    slab_t *slab = slab_it->val;
-    void *ptr = slab_get(slab);
-
-    DL_DELETE(bucket->AvailableSlabs, slab_it);
-    DL_PREPEND(bucket->UnavailableSlabs, slab_it);
-
-    utils_mutex_unlock(&bucket->bucket_lock);
-    return ptr;
-}
-
-void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *ToPool) {
-    utils_mutex_lock(&bucket->bucket_lock);
-
-    slab_list_item_t *slab_it = slab->iter;
-    assert(slab_it->val != NULL);
-    if (bucket_can_pool(bucket, ToPool)) {
-        DL_DELETE(bucket->UnavailableSlabs, slab_it);
-        DL_PREPEND(bucket->AvailableSlabs, slab_it);
-    } else {
-        slab_unreg(slab_it->val);
-        DL_DELETE(bucket->UnavailableSlabs, slab_it);
-        destroy_slab(slab_it->val);
-    }
-    utils_mutex_unlock(&bucket->bucket_lock);
-}
-
-slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *FromPool) {
-    if (bucket->AvailableSlabs == NULL) {
-        slab_t *slab = create_slab(bucket);
-        if (slab == NULL) {
-            throw MemoryProviderError{UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY};
-        }
-
-        slab_reg(slab);
-        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
-        bucket_update_stats(bucket, 1, 0);
-        *FromPool = false;
-    } else {
-        if (slab_get_num_allocated(bucket->AvailableSlabs->val) == 0) {
-            // If this was an empty slab, it was in the pool.
-            // Now it is no longer in the pool, so update count.
-            --bucket->chunkedSlabsInPool;
-            bucket_decrement_pool(bucket, FromPool);
-        } else {
-            // Allocation from existing slab is treated as from pool for statistics.
-            *FromPool = true;
-        }
-    }
-
-    return bucket->AvailableSlabs;
-}
-
-size_t bucket_capacity(bucket_t *bucket) {
-    // For buckets used in chunked mode, just one slab in pool is sufficient.
-    // For larger buckets, the capacity could be more and is adjustable.
-    if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
-        return 1;
-    } else {
-        return bucket_get_params(bucket)->Capacity;
-    }
-}
-
-size_t bucket_max_poolable_size(bucket_t *bucket) {
-    return bucket_get_params(bucket)->MaxPoolableSize;
-}
-
-void bucket_update_stats(bucket_t *bucket, int InUse, int InPool) {
-    if (bucket_get_params(bucket)->PoolTrace == 0) {
-        return;
-    }
-    bucket->currSlabsInUse += InUse;
-
-    bucket->maxSlabsInUse =
-        utils_max(bucket->currSlabsInUse, bucket->maxSlabsInUse);
-    bucket->currSlabsInPool += InPool;
-    bucket->maxSlabsInPool =
-        utils_max(bucket->currSlabsInPool, bucket->maxSlabsInPool);
-
-    // Increment or decrement current pool sizes based on whether
-    // slab was added to or removed from pool.
-    bucket_get_params(bucket)->CurPoolSize +=
-        InPool * bucket_slab_alloc_size(bucket);
-}
-
 /*
 void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
     if (allocCount) {
@@ -420,7 +282,7 @@ void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
 }
 */
 
-void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try {
+void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) {
     void *Ptr;
 
     if (Size == 0) {
@@ -430,6 +292,14 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try {
     FromPool = false;
     if (Size > getParams().MaxPoolableSize) {
         Ptr = memoryProviderAlloc(getMemHandle(), Size);
+
+        if (Ptr == NULL) {
+            // TODO get code from func
+            umf::getPoolLastStatusRef<DisjointPool>() =
+                UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+            return nullptr;
+        }
+
         annotate_memory_undefined(Ptr, Size);
         return Ptr;
     }
@@ -442,6 +312,13 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try {
         Ptr = bucket_get_chunk(bucket, &FromPool);
     }
 
+    if (Ptr == NULL) {
+        // TODO get code from func
+        umf::getPoolLastStatusRef<DisjointPool>() =
+            UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+        return nullptr;
+    }
+
     if (getParams().PoolTrace > 1) {
         bucket_count_alloc(bucket, FromPool);
     }
@@ -450,13 +327,10 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try {
     annotate_memory_undefined(Ptr, bucket_get_size(bucket));
 
     return Ptr;
-} catch (MemoryProviderError &e) {
-    umf::getPoolLastStatusRef<DisjointPool>() = e.code;
-    return nullptr;
 }
 
 void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
-                                        bool &FromPool) try {
+                                        bool &FromPool) {
     void *Ptr;
 
     if (Size == 0) {
@@ -485,6 +359,7 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
     FromPool = false;
     if (AlignedSize > getParams().MaxPoolableSize) {
         Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment);
+        assert(Ptr);
         annotate_memory_undefined(Ptr, Size);
         return Ptr;
     }
@@ -497,6 +372,7 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
         Ptr = bucket_get_chunk(bucket, &FromPool);
     }
 
+    assert(Ptr);
     if (getParams().PoolTrace > 1) {
         bucket_count_alloc(bucket, FromPool);
     }
@@ -504,9 +380,6 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
     VALGRIND_DO_MEMPOOL_ALLOC(this, ALIGN_UP((size_t)Ptr, Alignment), Size);
     annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size);
     return (void *)ALIGN_UP((size_t)Ptr, Alignment);
-} catch (MemoryProviderError &e) {
-    umf::getPoolLastStatusRef<DisjointPool>() = e.code;
-    return nullptr;
 }
 
 std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) {
@@ -542,9 +415,9 @@ bucket_t *DisjointPool::AllocImpl::findBucket(size_t Size) {
     return Buckets[calculatedIdx];
 }
 
-void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
+umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
     if (Ptr == nullptr) {
-        return;
+        return UMF_RESULT_SUCCESS;
     }
 
     auto *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, SlabMinSize());
@@ -556,8 +429,8 @@ void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
     auto Slabs = getKnownSlabs().equal_range(SlabPtr);
     if (Slabs.first == Slabs.second) {
         Lk.unlock();
-        memoryProviderFree(getMemHandle(), Ptr);
-        return;
+        umf_result_t ret = memoryProviderFree(getMemHandle(), Ptr);
+        return ret;
     }
 
     for (auto It = Slabs.first; It != Slabs.second; ++It) {
@@ -582,7 +455,7 @@ void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
                 bucket_free_slab(bucket, Slab, &ToPool);
             }
 
-            return;
+            return UMF_RESULT_SUCCESS;
         }
     }
 
@@ -591,6 +464,7 @@ void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
     // to some slab with an entry in the map. So we find a slab
     // but the range checks fail.
     memoryProviderFree(getMemHandle(), Ptr);
+    return UMF_RESULT_SUCCESS;
 }
 
 void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
@@ -607,10 +481,10 @@ void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
         //(*B).printStats(TitlePrinted, MTName);
         bucket_t *bucket = B;
         HighPeakSlabsInUse =
-            utils_max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
+            std::max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
         if ((*B).allocCount) {
             HighBucketSize =
-                utils_max(bucket_slab_alloc_size(bucket), HighBucketSize);
+                std::max(bucket_slab_alloc_size(bucket), HighBucketSize);
         }
     }
 }
@@ -676,22 +550,23 @@ size_t DisjointPool::malloc_usable_size(void *) {
     return 0;
 }
 
-umf_result_t DisjointPool::free(void *ptr) try {
+umf_result_t DisjointPool::free(void *ptr) {
     bool ToPool;
-    impl->deallocate(ptr, ToPool);
-
-    if (impl->getParams().PoolTrace > 2) {
-        auto MT = impl->getParams().Name;
-        std::cout << "Freed " << MT << " " << ptr << " to "
-                  << (ToPool ? "Pool" : "Provider")
-                  << ", Current total pool size "
-                  << impl->getLimits()->TotalSize.load()
-                  << ", Current pool size for " << MT << " "
-                  << impl->getParams().CurPoolSize << "\n";
+    umf_result_t ret = impl->deallocate(ptr, ToPool);
+
+    if (ret == UMF_RESULT_SUCCESS) {
+
+        if (impl->getParams().PoolTrace > 2) {
+            auto MT = impl->getParams().Name;
+            std::cout << "Freed " << MT << " " << ptr << " to "
+                      << (ToPool ? "Pool" : "Provider")
+                      << ", Current total pool size "
+                      << impl->getLimits()->TotalSize.load()
+                      << ", Current pool size for " << MT << " "
+                      << impl->getParams().CurPoolSize << "\n";
+        }
     }
-    return UMF_RESULT_SUCCESS;
-} catch (MemoryProviderError &e) {
-    return e.code;
+    return ret;
 }
 
 umf_result_t DisjointPool::get_last_allocation_error() {
@@ -707,19 +582,19 @@ DisjointPool::~DisjointPool() {
     size_t HighPeakSlabsInUse;
     if (impl->getParams().PoolTrace > 1) {
         auto name = impl->getParams().Name;
-        try { // cannot throw in destructor
-            impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse,
-                             name);
-            if (TitlePrinted) {
-                std::cout << "Current Pool Size "
-                          << impl->getLimits()->TotalSize.load() << std::endl;
-                std::cout << "Suggested Setting=;"
-                          << std::string(1, (char)tolower(name[0]))
-                          << std::string(name + 1) << ":" << HighBucketSize
-                          << "," << HighPeakSlabsInUse << ",64K" << std::endl;
-            }
-        } catch (...) { // ignore exceptions
+        //try { // cannot throw in destructor
+        impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse,
+                         name);
+        if (TitlePrinted) {
+            std::cout << "Current Pool Size "
+                      << impl->getLimits()->TotalSize.load() << std::endl;
+            std::cout << "Suggested Setting=;"
+                      << std::string(1, (char)tolower(name[0]))
+                      << std::string(name + 1) << ":" << HighBucketSize << ","
+                      << HighPeakSlabsInUse << ",64K" << std::endl;
         }
+        //} catch (...) { // ignore exceptions
+        // }
     }
 }
 
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
index a257ca14c..1a777d9a8 100644
--- a/src/pool/pool_disjoint_temp.h
+++ b/src/pool/pool_disjoint_temp.h
@@ -109,25 +109,31 @@ void slab_reg_by_addr(void *addr, slab_t *slab);
 void slab_unreg(slab_t *slab);
 void slab_unreg_by_addr(void *addr, slab_t *slab);
 
+bucket_t *create_bucket(size_t sz, void *alloc_ctx);
+void destroy_bucket(bucket_t *bucket);
+
 void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool);
-size_t bucket_slab_alloc_size(bucket_t *bucket);
 bool bucket_can_pool(bucket_t *bucket, bool *to_pool);
 void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool);
 void bucket_decrement_pool(bucket_t *bucket, bool *from_pool);
 void *bucket_get_chunk(bucket_t *bucket, bool *from_pool);
-void *bucket_get_slab(bucket_t *bucket, bool *from_pool);
 size_t bucket_get_size(bucket_t *bucket);
 size_t bucket_chunk_cut_off(bucket_t *bucket);
 size_t bucket_capacity(bucket_t *bucket);
-size_t bucket_slab_min_size(bucket_t *bucket);
-void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *Slab,
+void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab,
                        bool *to_pool);
 void bucket_count_alloc(bucket_t *bucket, bool from_pool);
 void bucket_count_free(bucket_t *bucket);
 
+void *bucket_get_slab(bucket_t *bucket, bool *from_pool);
+size_t bucket_slab_alloc_size(bucket_t *bucket);
+size_t bucket_slab_min_size(bucket_t *bucket);
+slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
+slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool);
+void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool);
+
 umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
 umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket);
 umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
-slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
 
 #endif // TEMP_H

From 4730d57b17662c2f9fe9de09ac361b3c16bdf971 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Tue, 12 Nov 2024 08:59:07 +0100
Subject: [PATCH 07/26] make shared limits C structure

---
 src/pool/pool_disjoint.c      | 73 ++++++++++++++++++++++++++++-
 src/pool/pool_disjoint.cpp    | 86 ++++++++---------------------------
 src/pool/pool_disjoint_temp.h |  8 +++-
 src/utils/utils_concurrency.h | 25 ++++++++--
 4 files changed, 118 insertions(+), 74 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 79416bc6d..43f157d12 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -51,6 +51,23 @@ extern "C" {
 #endif
 }
 
+typedef struct umf_disjoint_pool_shared_limits_t {
+    size_t max_size;
+    _Atomic(size_t) total_size;
+} umf_disjoint_pool_shared_limits_t;
+
+umf_disjoint_pool_shared_limits_t *shared_limits_create(size_t max_size) {
+    umf_disjoint_pool_shared_limits_t *ptr =
+        umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t));
+    ptr->max_size = max_size;
+    ptr->total_size = 0;
+    return ptr;
+}
+
+void shared_limits_destroy(umf_disjoint_pool_shared_limits_t *shared_limits) {
+    umf_ba_global_free(shared_limits);
+}
+
 size_t bucket_get_size(bucket_t *bucket);
 
 void slab_reg(slab_t *slab);
@@ -212,7 +229,8 @@ void slab_unreg(slab_t *slab) {
     slab_unreg_by_addr(end_addr, slab);
 }
 
-bucket_t *create_bucket(size_t Sz, void *AllocCtx) {
+bucket_t *create_bucket(size_t Sz, void *AllocCtx,
+                        umf_disjoint_pool_shared_limits_t *shared_limits) {
     bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t));
 
     bucket->Size = Sz;
@@ -228,6 +246,9 @@ bucket_t *create_bucket(size_t Sz, void *AllocCtx) {
     bucket->allocCount = 0;
     bucket->maxSlabsInUse = 0;
 
+    bucket->shared_limits = shared_limits;
+    assert(shared_limits);
+
     utils_mutex_init(&bucket->bucket_lock);
 
     return bucket;
@@ -453,6 +474,56 @@ void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) {
         in_pool * bucket_slab_alloc_size(bucket);
 }
 
+// If a slab was available in the pool then note that the current pooled
+// size has reduced by the size of a slab in this bucket.
+void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) {
+    *from_pool = true;
+    bucket_update_stats(bucket, 1, -1);
+    utils_fetch_and_add64(&bucket->shared_limits->total_size,
+                          -bucket_slab_alloc_size(bucket));
+}
+
+bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
+    size_t NewFreeSlabsInBucket;
+    // Check if this bucket is used in chunked form or as full slabs.
+    bool chunkedBucket =
+        bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket);
+    if (chunkedBucket) {
+        NewFreeSlabsInBucket = bucket->chunkedSlabsInPool + 1;
+    } else {
+        // TODO optimize
+        size_t avail_num = 0;
+        slab_list_item_t *it = NULL;
+        DL_FOREACH(bucket->AvailableSlabs, it) { avail_num++; }
+        NewFreeSlabsInBucket = avail_num + 1;
+    }
+    if (bucket_capacity(bucket) >= NewFreeSlabsInBucket) {
+        size_t pool_size = bucket->shared_limits->total_size;
+        while (true) {
+            size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket);
+
+            if (bucket->shared_limits->max_size < new_pool_size) {
+                break;
+            }
+
+            if (utils_compare_exchange(&bucket->shared_limits->total_size,
+                                       &pool_size, &new_pool_size)) {
+                if (chunkedBucket) {
+                    ++bucket->chunkedSlabsInPool;
+                }
+
+                bucket_update_stats(bucket, -1, 1);
+                *to_pool = true;
+                return true;
+            }
+        }
+    }
+
+    bucket_update_stats(bucket, -1, 0);
+    *to_pool = false;
+    return false;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index ce265e019..a3003a26b 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -35,6 +35,8 @@
 #include "utils_math.h"
 #include "utils_sanitizers.h"
 
+#include "utils_concurrency.h"
+
 // TODO remove
 #ifdef __cplusplus
 extern "C" {
@@ -49,11 +51,6 @@ struct slab_t;
 #endif
 // end TODO remove
 
-typedef struct umf_disjoint_pool_shared_limits_t {
-    size_t MaxSize;
-    std::atomic<size_t> TotalSize;
-} umf_disjoint_pool_shared_limits_t;
-
 class DisjointPool {
   public:
     class AllocImpl;
@@ -78,12 +75,12 @@ class DisjointPool {
 
 umf_disjoint_pool_shared_limits_t *
 umfDisjointPoolSharedLimitsCreate(size_t MaxSize) {
-    return new umf_disjoint_pool_shared_limits_t{MaxSize, 0};
+    return shared_limits_create(MaxSize);
 }
 
 void umfDisjointPoolSharedLimitsDestroy(
     umf_disjoint_pool_shared_limits_t *limits) {
-    delete limits;
+    shared_limits_destroy(limits);
 }
 
 // Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is
@@ -125,8 +122,7 @@ class DisjointPool::AllocImpl {
     // Configuration for this instance
     umf_disjoint_pool_params_t params;
 
-    umf_disjoint_pool_shared_limits_t DefaultSharedLimits = {
-        (std::numeric_limits<size_t>::max)(), 0};
+    umf_disjoint_pool_shared_limits_t *DefaultSharedLimits;
 
     // Used in algorithm for finding buckets
     std::size_t MinBucketSizeExp;
@@ -150,13 +146,14 @@ class DisjointPool::AllocImpl {
         Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
         // Calculate the exponent for MinBucketSize used for finding buckets.
         MinBucketSizeExp = (size_t)log2Utils(Size1);
+        DefaultSharedLimits = shared_limits_create(SIZE_MAX);
         auto Size2 = Size1 + Size1 / 2;
         for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
             // TODO copy allocimpl
-            Buckets.push_back(create_bucket(Size1, this));
-            Buckets.push_back(create_bucket(Size2, this));
+            Buckets.push_back(create_bucket(Size1, this, this->getLimits()));
+            Buckets.push_back(create_bucket(Size2, this, this->getLimits()));
         }
-        Buckets.push_back(create_bucket(CutOff, this));
+        Buckets.push_back(create_bucket(CutOff, this, this->getLimits()));
 
         auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr,
                                                    &ProviderMinPageSize);
@@ -166,6 +163,8 @@ class DisjointPool::AllocImpl {
     }
 
     ~AllocImpl() {
+        // TODO
+        // destroy DefaultSharedLimits
 
         for (auto it = Buckets.begin(); it != Buckets.end(); it++) {
             destroy_bucket(*it);
@@ -196,7 +195,7 @@ class DisjointPool::AllocImpl {
         if (params.SharedLimits) {
             return params.SharedLimits;
         } else {
-            return &DefaultSharedLimits;
+            return DefaultSharedLimits;
         }
     };
 
@@ -254,14 +253,6 @@ std::ostream &operator<<(std::ostream &Os, slab_t &Slab) {
 }
 */
 
-// If a slab was available in the pool then note that the current pooled
-// size has reduced by the size of a slab in this bucket.
-void bucket_decrement_pool(bucket_t *bucket, bool *FromPool) {
-    *FromPool = true;
-    bucket_update_stats(bucket, 1, -1);
-    bucket_get_limits(bucket)->TotalSize -= bucket_slab_alloc_size(bucket);
-}
-
 /*
 void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
     if (allocCount) {
@@ -553,7 +544,7 @@ size_t DisjointPool::malloc_usable_size(void *) {
 umf_result_t DisjointPool::free(void *ptr) {
     bool ToPool;
     umf_result_t ret = impl->deallocate(ptr, ToPool);
-
+    /*
     if (ret == UMF_RESULT_SUCCESS) {
 
         if (impl->getParams().PoolTrace > 2) {
@@ -565,7 +556,7 @@ umf_result_t DisjointPool::free(void *ptr) {
                       << ", Current pool size for " << MT << " "
                       << impl->getParams().CurPoolSize << "\n";
         }
-    }
+    }*/
     return ret;
 }
 
@@ -577,10 +568,11 @@ DisjointPool::DisjointPool() {}
 
 // Define destructor for use with unique_ptr
 DisjointPool::~DisjointPool() {
-    bool TitlePrinted = false;
-    size_t HighBucketSize;
-    size_t HighPeakSlabsInUse;
+    /*
     if (impl->getParams().PoolTrace > 1) {
+        bool TitlePrinted = false;
+        size_t HighBucketSize;
+        size_t HighPeakSlabsInUse;
         auto name = impl->getParams().Name;
         //try { // cannot throw in destructor
         impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse,
@@ -596,6 +588,7 @@ DisjointPool::~DisjointPool() {
         //} catch (...) { // ignore exceptions
         // }
     }
+    */
 }
 
 static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS =
@@ -666,47 +659,6 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
     assert(false && "Slab is not found");
 }
 
-bool bucket_can_pool(bucket_t *bucket, bool *ToPool) {
-    size_t NewFreeSlabsInBucket;
-    // Check if this bucket is used in chunked form or as full slabs.
-    bool chunkedBucket =
-        bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket);
-    if (chunkedBucket) {
-        NewFreeSlabsInBucket = bucket->chunkedSlabsInPool + 1;
-    } else {
-        // TODO optimize
-        size_t avail_num = 0;
-        slab_list_item_t *it = NULL;
-        DL_FOREACH(bucket->AvailableSlabs, it) { avail_num++; }
-        NewFreeSlabsInBucket = avail_num + 1;
-    }
-    if (bucket_capacity(bucket) >= NewFreeSlabsInBucket) {
-        size_t PoolSize = bucket_get_limits(bucket)->TotalSize;
-        while (true) {
-            size_t NewPoolSize = PoolSize + bucket_slab_alloc_size(bucket);
-
-            if (bucket_get_limits(bucket)->MaxSize < NewPoolSize) {
-                break;
-            }
-
-            if (bucket_get_limits(bucket)->TotalSize.compare_exchange_strong(
-                    PoolSize, NewPoolSize)) {
-                if (chunkedBucket) {
-                    ++bucket->chunkedSlabsInPool;
-                }
-
-                bucket_update_stats(bucket, -1, 1);
-                *ToPool = true;
-                return true;
-            }
-        }
-    }
-
-    bucket_update_stats(bucket, -1, 0);
-    *ToPool = false;
-    return false;
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
index 1a777d9a8..bf763f275 100644
--- a/src/pool/pool_disjoint_temp.h
+++ b/src/pool/pool_disjoint_temp.h
@@ -6,6 +6,9 @@
 void annotate_memory_inaccessible(void *ptr, size_t size);
 void annotate_memory_undefined(void *ptr, size_t size);
 
+umf_disjoint_pool_shared_limits_t *shared_limits_create(size_t max_size);
+void shared_limits_destroy(umf_disjoint_pool_shared_limits_t *shared_limits);
+
 typedef struct slab_list_item_t slab_list_item_t;
 
 typedef struct bucket_t {
@@ -24,6 +27,8 @@ typedef struct bucket_t {
     // routines, slab map and etc.
     void *OwnAllocCtx;
 
+    umf_disjoint_pool_shared_limits_t *shared_limits;
+
     // For buckets used in chunked mode, a counter of slabs in the pool.
     // For allocations that use an entire slab each, the entries in the Available
     // list are entries in the pool.Each slab is available for a new
@@ -109,7 +114,8 @@ void slab_reg_by_addr(void *addr, slab_t *slab);
 void slab_unreg(slab_t *slab);
 void slab_unreg_by_addr(void *addr, slab_t *slab);
 
-bucket_t *create_bucket(size_t sz, void *alloc_ctx);
+bucket_t *create_bucket(size_t sz, void *alloc_ctx,
+                        umf_disjoint_pool_shared_limits_t *shared_limits);
 void destroy_bucket(bucket_t *bucket);
 
 void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool);
diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h
index 155184cc4..854cdfa05 100644
--- a/src/utils/utils_concurrency.h
+++ b/src/utils/utils_concurrency.h
@@ -61,11 +61,13 @@ int utils_mutex_unlock(utils_mutex_t *mutex);
 void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void));
 
 #if defined(_WIN32)
+
 static __inline unsigned char utils_lssb_index(long long value) {
     unsigned long ret;
     _BitScanForward64(&ret, value);
     return (unsigned char)ret;
 }
+
 static __inline unsigned char utils_mssb_index(long long value) {
     unsigned long ret;
     _BitScanReverse64(&ret, value);
@@ -81,15 +83,21 @@ static __inline unsigned char utils_mssb_index(long long value) {
 
 #define utils_atomic_store_release(object, desired)                            \
     InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired)
+
 #define utils_atomic_increment(object)                                         \
     InterlockedIncrement64((LONG64 volatile *)object)
+
 #define utils_atomic_decrement(object)                                         \
     InterlockedDecrement64((LONG64 volatile *)object)
+
 #define utils_fetch_and_add64(ptr, value)                                      \
     InterlockedExchangeAdd64((LONG64 *)(ptr), value)
-#else
+
+#else // !defined(_WIN32)
+
 #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x))
 #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x)))
+
 #define utils_atomic_load_acquire(object, dest)                                \
     do {                                                                       \
         utils_annotate_acquire((void *)object);                                \
@@ -103,12 +111,19 @@ static __inline unsigned char utils_mssb_index(long long value) {
     } while (0)
 
 #define utils_atomic_increment(object)                                         \
-    __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL)
+    __atomic_add_fetch(object, 1, memory_order_acq_rel)
+
 #define utils_atomic_decrement(object)                                         \
-    __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL)
-#define utils_fetch_and_add64 __sync_fetch_and_add
+    __atomic_sub_fetch(object, 1, memory_order_acq_rel)
 
-#endif
+#define utils_fetch_and_add64(object, value)                                   \
+    __atomic_fetch_add(object, value, memory_order_acq_rel)
+
+#define utils_compare_exchange(object, expected, desired)                      \
+    __atomic_compare_exchange(object, expected, desired, 0 /* strong */,       \
+                              memory_order_acq_rel, memory_order_relaxed)
+
+#endif // !defined(_WIN32)
 
 #ifdef __cplusplus
 }

From cb7f676a2aaf335e04f0621d37356ca27897611a Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Tue, 12 Nov 2024 17:40:59 +0100
Subject: [PATCH 08/26] make known_slabs_map_lock a utils_mutex_t

---
 src/pool/pool_disjoint.c   |  2 +-
 src/pool/pool_disjoint.cpp | 46 +++++++++++++++++++++-----------------
 src/utils/utils_common.c   |  2 +-
 3 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 43f157d12..adb4e63fc 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -53,7 +53,7 @@ extern "C" {
 
 typedef struct umf_disjoint_pool_shared_limits_t {
     size_t max_size;
-    _Atomic(size_t) total_size;
+    size_t total_size; // requires atomic access
 } umf_disjoint_pool_shared_limits_t;
 
 umf_disjoint_pool_shared_limits_t *shared_limits_create(size_t max_size) {
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index a3003a26b..87015effd 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -44,8 +44,6 @@ extern "C" {
 
 #include "pool_disjoint_temp.h"
 
-struct slab_t;
-
 #ifdef __cplusplus
 }
 #endif
@@ -111,7 +109,9 @@ class DisjointPool::AllocImpl {
     // It's important for the map to be destroyed last after buckets and their
     // slabs This is because slab's destructor removes the object from the map.
     std::unordered_multimap<void *, slab_t *> KnownSlabs;
-    std::shared_timed_mutex KnownSlabsMapLock;
+
+    // prev std::shared_timed_mutex - ok?
+    utils_mutex_t known_slabs_map_lock;
 
     // Handle to the memory provider
     umf_memory_provider_handle_t MemHandle;
@@ -125,7 +125,7 @@ class DisjointPool::AllocImpl {
     umf_disjoint_pool_shared_limits_t *DefaultSharedLimits;
 
     // Used in algorithm for finding buckets
-    std::size_t MinBucketSizeExp;
+    size_t MinBucketSizeExp;
 
     // Coarse-grain allocation min alignment
     size_t ProviderMinPageSize;
@@ -137,6 +137,8 @@ class DisjointPool::AllocImpl {
 
         VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0);
 
+        utils_mutex_init(&known_slabs_map_lock);
+
         // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
         // Powers of 2 and the value halfway between the powers of 2.
         auto Size1 = this->params.MinBucketSize;
@@ -147,6 +149,7 @@ class DisjointPool::AllocImpl {
         // Calculate the exponent for MinBucketSize used for finding buckets.
         MinBucketSizeExp = (size_t)log2Utils(Size1);
         DefaultSharedLimits = shared_limits_create(SIZE_MAX);
+
         auto Size2 = Size1 + Size1 / 2;
         for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
             // TODO copy allocimpl
@@ -171,6 +174,8 @@ class DisjointPool::AllocImpl {
         }
 
         VALGRIND_DO_DESTROY_MEMPOOL(this);
+
+        utils_mutex_destroy_not_free(&known_slabs_map_lock);
     }
 
     void *allocate(size_t Size, size_t Alignment, bool &FromPool);
@@ -179,9 +184,7 @@ class DisjointPool::AllocImpl {
 
     umf_memory_provider_handle_t getMemHandle() { return MemHandle; }
 
-    std::shared_timed_mutex &getKnownSlabsMapLock() {
-        return KnownSlabsMapLock;
-    }
+    utils_mutex_t *getKnownSlabsMapLock() { return &known_slabs_map_lock; }
 
     std::unordered_multimap<void *, slab_t *> &getKnownSlabs() {
         return KnownSlabs;
@@ -204,7 +207,7 @@ class DisjointPool::AllocImpl {
 
   private:
     bucket_t *findBucket(size_t Size);
-    std::size_t sizeToIdx(size_t Size);
+    size_t sizeToIdx(size_t Size);
 };
 
 static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
@@ -398,9 +401,12 @@ bucket_t *DisjointPool::AllocImpl::findBucket(size_t Size) {
     auto calculatedIdx = sizeToIdx(Size);
     bucket_t *bucket = Buckets[calculatedIdx];
     assert(bucket_get_size(bucket) >= Size);
+    (void)bucket;
+
     if (calculatedIdx > 0) {
         bucket_t *bucket_prev = Buckets[calculatedIdx - 1];
         assert(bucket_get_size(bucket_prev) < Size);
+        (void)bucket_prev;
     }
 
     return Buckets[calculatedIdx];
@@ -414,12 +420,12 @@ umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
     auto *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, SlabMinSize());
 
     // Lock the map on read
-    std::shared_lock<std::shared_timed_mutex> Lk(getKnownSlabsMapLock());
+    utils_mutex_lock(getKnownSlabsMapLock());
 
     ToPool = false;
     auto Slabs = getKnownSlabs().equal_range(SlabPtr);
     if (Slabs.first == Slabs.second) {
-        Lk.unlock();
+        utils_mutex_unlock(getKnownSlabsMapLock());
         umf_result_t ret = memoryProviderFree(getMemHandle(), Ptr);
         return ret;
     }
@@ -431,7 +437,7 @@ umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
         if (Ptr >= slab_get(Slab) && Ptr < slab_get_end(Slab)) {
             // Unlock the map before freeing the chunk, it may be locked on write
             // there
-            Lk.unlock();
+            utils_mutex_unlock(getKnownSlabsMapLock());
             bucket_t *bucket = slab_get_bucket(Slab);
 
             if (getParams().PoolTrace > 1) {
@@ -450,7 +456,7 @@ umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
         }
     }
 
-    Lk.unlock();
+    utils_mutex_unlock(getKnownSlabsMapLock());
     // There is a rare case when we have a pointer from system allocation next
     // to some slab with an entry in the map. So we find a slab
     // but the range checks fail.
@@ -608,11 +614,6 @@ umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket) {
     return &t->getParams();
 }
 
-umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket) {
-    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
-    return t->getLimits();
-}
-
 umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket) {
     auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
     return t->getMemHandle();
@@ -624,9 +625,9 @@ bucket_get_known_slabs(bucket_t *bucket) {
     return &t->getKnownSlabs();
 }
 
-std::shared_timed_mutex *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
+utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
     auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
-    return &t->getKnownSlabsMapLock();
+    return t->getKnownSlabsMapLock();
 }
 
 void slab_reg_by_addr(void *addr, slab_t *slab) {
@@ -634,8 +635,9 @@ void slab_reg_by_addr(void *addr, slab_t *slab) {
     auto Lock = bucket_get_known_slabs_map_lock(bucket);
     auto Map = bucket_get_known_slabs(bucket);
 
-    std::lock_guard<std::shared_timed_mutex> Lg(*Lock);
+    utils_mutex_lock(Lock);
     Map->insert({addr, slab});
+    utils_mutex_unlock(Lock);
 }
 
 void slab_unreg_by_addr(void *addr, slab_t *slab) {
@@ -643,7 +645,7 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
     auto Lock = bucket_get_known_slabs_map_lock(bucket);
     auto Map = bucket_get_known_slabs(bucket);
 
-    std::lock_guard<std::shared_timed_mutex> Lg(*Lock);
+    utils_mutex_lock(Lock);
 
     auto Slabs = Map->equal_range(addr);
     // At least the must get the current slab from the map.
@@ -652,11 +654,13 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
     for (auto It = Slabs.first; It != Slabs.second; ++It) {
         if (It->second == slab) {
             Map->erase(It);
+            utils_mutex_unlock(Lock);
             return;
         }
     }
 
     assert(false && "Slab is not found");
+    utils_mutex_unlock(Lock);
 }
 
 #ifdef __cplusplus
diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c
index 984e91a6c..611b277df 100644
--- a/src/utils/utils_common.c
+++ b/src/utils/utils_common.c
@@ -140,4 +140,4 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max,
 }
 
 size_t utils_max(size_t a, size_t b) { return a > b ? a : b; }
-size_t utils_min(size_t a, size_t b) { return a < b ? a : b; }
\ No newline at end of file
+size_t utils_min(size_t a, size_t b) { return a < b ? a : b; }

From 81cc968d47af9875421cd8df1acd615f735bc01e Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 09:56:49 +0100
Subject: [PATCH 09/26] convert buckets vector to C array

---
 src/pool/pool_disjoint.cpp | 47 +++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 87015effd..7dda97353 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -117,7 +117,8 @@ class DisjointPool::AllocImpl {
     umf_memory_provider_handle_t MemHandle;
 
     // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
-    std::vector<bucket_t *> Buckets;
+    bucket_t **buckets;
+    size_t buckets_num;
 
     // Configuration for this instance
     umf_disjoint_pool_params_t params;
@@ -141,22 +142,36 @@ class DisjointPool::AllocImpl {
 
         // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
         // Powers of 2 and the value halfway between the powers of 2.
-        auto Size1 = this->params.MinBucketSize;
+        size_t Size1 = this->params.MinBucketSize;
+
         // MinBucketSize cannot be larger than CutOff.
         Size1 = std::min(Size1, CutOff);
+
         // Buckets sized smaller than the bucket default size- 8 aren't needed.
         Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
+
         // Calculate the exponent for MinBucketSize used for finding buckets.
         MinBucketSizeExp = (size_t)log2Utils(Size1);
         DefaultSharedLimits = shared_limits_create(SIZE_MAX);
 
+        // count number of buckets, start from 1
+        buckets_num = 1;
         auto Size2 = Size1 + Size1 / 2;
+        size_t ts2 = Size2, ts1 = Size1;
         for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
-            // TODO copy allocimpl
-            Buckets.push_back(create_bucket(Size1, this, this->getLimits()));
-            Buckets.push_back(create_bucket(Size2, this, this->getLimits()));
+            buckets_num += 2;
+        }
+        buckets =
+            (bucket_t **)umf_ba_global_alloc(sizeof(bucket_t *) * buckets_num);
+
+        int i = 0;
+        Size1 = ts1;
+        Size2 = ts2;
+        for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) {
+            buckets[i] = create_bucket(Size1, this, this->getLimits());
+            buckets[i + 1] = create_bucket(Size2, this, this->getLimits());
         }
-        Buckets.push_back(create_bucket(CutOff, this, this->getLimits()));
+        buckets[i] = create_bucket(CutOff, this, this->getLimits());
 
         auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr,
                                                    &ProviderMinPageSize);
@@ -169,8 +184,8 @@ class DisjointPool::AllocImpl {
         // TODO
         // destroy DefaultSharedLimits
 
-        for (auto it = Buckets.begin(); it != Buckets.end(); it++) {
-            destroy_bucket(*it);
+        for (size_t i = 0; i < buckets_num; i++) {
+            destroy_bucket(buckets[i]);
         }
 
         VALGRIND_DO_DESTROY_MEMPOOL(this);
@@ -399,17 +414,17 @@ std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) {
 
 bucket_t *DisjointPool::AllocImpl::findBucket(size_t Size) {
     auto calculatedIdx = sizeToIdx(Size);
-    bucket_t *bucket = Buckets[calculatedIdx];
+    bucket_t *bucket = buckets[calculatedIdx];
     assert(bucket_get_size(bucket) >= Size);
     (void)bucket;
 
     if (calculatedIdx > 0) {
-        bucket_t *bucket_prev = Buckets[calculatedIdx - 1];
+        bucket_t *bucket_prev = buckets[calculatedIdx - 1];
         assert(bucket_get_size(bucket_prev) < Size);
         (void)bucket_prev;
     }
 
-    return Buckets[calculatedIdx];
+    return buckets[calculatedIdx];
 }
 
 umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
@@ -473,15 +488,15 @@ void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
 
     HighBucketSize = 0;
     HighPeakSlabsInUse = 0;
-    for (auto &B : Buckets) {
+    for (size_t i = 0; i < buckets_num; i++) {
         // TODO
         //(*B).printStats(TitlePrinted, MTName);
-        bucket_t *bucket = B;
+        bucket_t *bucket = buckets[i];
         HighPeakSlabsInUse =
-            std::max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
-        if ((*B).allocCount) {
+            utils_max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
+        if (bucket->allocCount) {
             HighBucketSize =
-                std::max(bucket_slab_alloc_size(bucket), HighBucketSize);
+                utils_max(bucket_slab_alloc_size(bucket), HighBucketSize);
         }
     }
 }

From 3b06aee0fce936afdf475a16d07af180ab4fb481 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 10:04:03 +0100
Subject: [PATCH 10/26] fix

---
 src/pool/pool_disjoint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index adb4e63fc..ac85ccf1a 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -480,7 +480,7 @@ void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) {
     *from_pool = true;
     bucket_update_stats(bucket, 1, -1);
     utils_fetch_and_add64(&bucket->shared_limits->total_size,
-                          -bucket_slab_alloc_size(bucket));
+                          -(long long)bucket_slab_alloc_size(bucket));
 }
 
 bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {

From ded4a207691927513859eada8ec367d40c303cb6 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 10:35:43 +0100
Subject: [PATCH 11/26] use critnib

---
 src/pool/CMakeLists.txt    |   3 +-
 src/pool/pool_disjoint.cpp | 110 +++++++++++++++++++------------------
 2 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt
index d87ddbcc2..79257b7e7 100644
--- a/src/pool/CMakeLists.txt
+++ b/src/pool/CMakeLists.txt
@@ -14,7 +14,8 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT)
     add_umf_library(
         NAME disjoint_pool
         TYPE STATIC
-        SRCS pool_disjoint.cpp pool_disjoint.c ${POOL_EXTRA_SRCS}
+        SRCS pool_disjoint.cpp pool_disjoint.c 
+        ../critnib/critnib.c ${POOL_EXTRA_SRCS}
         LIBS ${POOL_EXTRA_LIBS})
 
     target_compile_definitions(disjoint_pool
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 7dda97353..24dd3ad27 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -31,11 +31,12 @@
 #include "pool_disjoint.h"
 #include "umf.h"
 #include "utils_common.h"
+#include "utils_concurrency.h"
 #include "utils_log.h"
 #include "utils_math.h"
 #include "utils_sanitizers.h"
 
-#include "utils_concurrency.h"
+#include "critnib/critnib.h"
 
 // TODO remove
 #ifdef __cplusplus
@@ -108,7 +109,7 @@ typedef struct MemoryProviderError {
 class DisjointPool::AllocImpl {
     // It's important for the map to be destroyed last after buckets and their
     // slabs This is because slab's destructor removes the object from the map.
-    std::unordered_multimap<void *, slab_t *> KnownSlabs;
+    critnib *known_slabs; // (void *, slab_t *)
 
     // prev std::shared_timed_mutex - ok?
     utils_mutex_t known_slabs_map_lock;
@@ -139,6 +140,7 @@ class DisjointPool::AllocImpl {
         VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0);
 
         utils_mutex_init(&known_slabs_map_lock);
+        known_slabs = critnib_new();
 
         // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
         // Powers of 2 and the value halfway between the powers of 2.
@@ -190,6 +192,8 @@ class DisjointPool::AllocImpl {
 
         VALGRIND_DO_DESTROY_MEMPOOL(this);
 
+        critnib_delete(known_slabs);
+
         utils_mutex_destroy_not_free(&known_slabs_map_lock);
     }
 
@@ -201,9 +205,7 @@ class DisjointPool::AllocImpl {
 
     utils_mutex_t *getKnownSlabsMapLock() { return &known_slabs_map_lock; }
 
-    std::unordered_multimap<void *, slab_t *> &getKnownSlabs() {
-        return KnownSlabs;
-    }
+    critnib *getKnownSlabs() { return known_slabs; }
 
     size_t SlabMinSize() { return params.SlabMinSize; };
 
@@ -438,38 +440,41 @@ umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
     utils_mutex_lock(getKnownSlabsMapLock());
 
     ToPool = false;
-    auto Slabs = getKnownSlabs().equal_range(SlabPtr);
-    if (Slabs.first == Slabs.second) {
+
+    slab_t *slab = (slab_t *)critnib_get(known_slabs, (uintptr_t)SlabPtr);
+    //auto Slabs = getKnownSlabs().equal_range(SlabPtr);
+    if (slab == NULL) {
         utils_mutex_unlock(getKnownSlabsMapLock());
         umf_result_t ret = memoryProviderFree(getMemHandle(), Ptr);
         return ret;
     }
 
-    for (auto It = Slabs.first; It != Slabs.second; ++It) {
-        // The slab object won't be deleted until it's removed from the map which is
-        // protected by the lock, so it's safe to access it here.
-        auto &Slab = It->second;
-        if (Ptr >= slab_get(Slab) && Ptr < slab_get_end(Slab)) {
-            // Unlock the map before freeing the chunk, it may be locked on write
-            // there
-            utils_mutex_unlock(getKnownSlabsMapLock());
-            bucket_t *bucket = slab_get_bucket(Slab);
-
-            if (getParams().PoolTrace > 1) {
-                bucket_count_free(bucket);
-            }
-
-            VALGRIND_DO_MEMPOOL_FREE(this, Ptr);
-            annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
-            if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
-                bucket_free_chunk(bucket, Ptr, Slab, &ToPool);
-            } else {
-                bucket_free_slab(bucket, Slab, &ToPool);
-            }
-
-            return UMF_RESULT_SUCCESS;
+    // TODO - no multimap
+    // for (auto It = Slabs.first; It != Slabs.second; ++It) {
+
+    // The slab object won't be deleted until it's removed from the map which is
+    // protected by the lock, so it's safe to access it here.
+    if (Ptr >= slab_get(slab) && Ptr < slab_get_end(slab)) {
+        // Unlock the map before freeing the chunk, it may be locked on write
+        // there
+        utils_mutex_unlock(getKnownSlabsMapLock());
+        bucket_t *bucket = slab_get_bucket(slab);
+
+        if (getParams().PoolTrace > 1) {
+            bucket_count_free(bucket);
+        }
+
+        VALGRIND_DO_MEMPOOL_FREE(this, Ptr);
+        annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
+        if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
+            bucket_free_chunk(bucket, Ptr, slab, &ToPool);
+        } else {
+            bucket_free_slab(bucket, slab, &ToPool);
         }
+
+        return UMF_RESULT_SUCCESS;
     }
+    //} // for multimap
 
     utils_mutex_unlock(getKnownSlabsMapLock());
     // There is a rare case when we have a pointer from system allocation next
@@ -634,10 +639,9 @@ umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket) {
     return t->getMemHandle();
 }
 
-std::unordered_multimap<void *, slab_t *> *
-bucket_get_known_slabs(bucket_t *bucket) {
+critnib *bucket_get_known_slabs(bucket_t *bucket) {
     auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
-    return &t->getKnownSlabs();
+    return t->getKnownSlabs();
 }
 
 utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
@@ -647,35 +651,33 @@ utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
 
 void slab_reg_by_addr(void *addr, slab_t *slab) {
     bucket_t *bucket = slab_get_bucket(slab);
-    auto Lock = bucket_get_known_slabs_map_lock(bucket);
-    auto Map = bucket_get_known_slabs(bucket);
+    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
+    critnib *slabs = bucket_get_known_slabs(bucket);
+
+    utils_mutex_lock(lock);
 
-    utils_mutex_lock(Lock);
-    Map->insert({addr, slab});
-    utils_mutex_unlock(Lock);
+    // TODO multimap
+    assert(critnib_get(slabs, (uintptr_t)addr) == NULL);
+    critnib_insert(slabs, (uintptr_t)addr, slab, 0);
+
+    utils_mutex_unlock(lock);
 }
 
 void slab_unreg_by_addr(void *addr, slab_t *slab) {
     bucket_t *bucket = slab_get_bucket(slab);
-    auto Lock = bucket_get_known_slabs_map_lock(bucket);
-    auto Map = bucket_get_known_slabs(bucket);
-
-    utils_mutex_lock(Lock);
+    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
+    critnib *slabs = bucket_get_known_slabs(bucket);
 
-    auto Slabs = Map->equal_range(addr);
-    // At least the must get the current slab from the map.
-    assert(Slabs.first != Slabs.second && "Slab is not found");
+    utils_mutex_lock(lock);
 
-    for (auto It = Slabs.first; It != Slabs.second; ++It) {
-        if (It->second == slab) {
-            Map->erase(It);
-            utils_mutex_unlock(Lock);
-            return;
-        }
-    }
+    // debug only
+    // assume single-value per key
+    slab_t *known_slab = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
+    assert(known_slab != NULL && "Slab is not found");
+    assert(slab == known_slab);
+    critnib_remove(slabs, (uintptr_t)addr);
 
-    assert(false && "Slab is not found");
-    utils_mutex_unlock(Lock);
+    utils_mutex_unlock(lock);
 }
 
 #ifdef __cplusplus

From 24cf90a6b6b0f45eb1a94732f2aa9bc6c01aba4c Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 10:40:29 +0100
Subject: [PATCH 12/26] add utils_compare_exchange for win

---
 src/utils/utils_concurrency.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h
index 854cdfa05..69506b4a1 100644
--- a/src/utils/utils_concurrency.h
+++ b/src/utils/utils_concurrency.h
@@ -93,6 +93,9 @@ static __inline unsigned char utils_mssb_index(long long value) {
 #define utils_fetch_and_add64(ptr, value)                                      \
     InterlockedExchangeAdd64((LONG64 *)(ptr), value)
 
+#define utils_compare_exchange(object, expected, desired)                      \
+    InterlockedCompareExchange64((LONG64 volatile *)object, expected, desired)
+
 #else // !defined(_WIN32)
 
 #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x))

From f527b306a9da5243f2c34713032e918e46ea89c9 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 10:40:36 +0100
Subject: [PATCH 13/26] fix cmake

---
 src/pool/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt
index 79257b7e7..d7b48254a 100644
--- a/src/pool/CMakeLists.txt
+++ b/src/pool/CMakeLists.txt
@@ -14,8 +14,8 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT)
     add_umf_library(
         NAME disjoint_pool
         TYPE STATIC
-        SRCS pool_disjoint.cpp pool_disjoint.c 
-        ../critnib/critnib.c ${POOL_EXTRA_SRCS}
+        SRCS pool_disjoint.cpp pool_disjoint.c ../critnib/critnib.c
+             ${POOL_EXTRA_SRCS}
         LIBS ${POOL_EXTRA_LIBS})
 
     target_compile_definitions(disjoint_pool

From 00b0dc80849aa09cbe8d382c26f91d94aecf347d Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 14:47:20 +0100
Subject: [PATCH 14/26] fix

---
 src/pool/pool_disjoint.c      | 26 ++++++++++++++++++++++----
 src/pool/pool_disjoint.cpp    | 11 ++++++++++-
 src/utils/utils_concurrency.h |  2 +-
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index ac85ccf1a..9a1fe478f 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -93,8 +93,12 @@ slab_t *create_slab(bucket_t *bucket) {
     memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks);
 
     slab->slab_size = bucket_slab_alloc_size(bucket);
-    umf_result_t res = umfMemoryProviderAlloc(
-        bucket_get_mem_handle(bucket), slab->slab_size, 0, &slab->mem_ptr);
+
+    // NOTE: originally slabs memory were allocated without alignment
+    // with this registering a slab is simpler and doesn't require multimap
+    umf_result_t res =
+        umfMemoryProviderAlloc(bucket_get_mem_handle(bucket), slab->slab_size,
+                               bucket_slab_min_size(bucket), &slab->mem_ptr);
 
     if (res == UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
         destroy_slab(slab);
@@ -115,6 +119,8 @@ void destroy_slab(slab_t *slab) {
     umf_result_t res = umfMemoryProviderFree(
         bucket_get_mem_handle(slab->bucket), slab->mem_ptr, slab->slab_size);
     assert(res == UMF_RESULT_SUCCESS);
+    (void)res;
+
     umf_ba_global_free(slab->chunks);
     umf_ba_global_free(slab->iter);
     umf_ba_global_free(slab);
@@ -211,9 +217,14 @@ bool slab_has_avail(const slab_t *slab) {
 
 void slab_reg(slab_t *slab) {
     bucket_t *bucket = slab_get_bucket(slab);
+    // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size
+    // I also decr end_addr by 1
     void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
                                           bucket_slab_min_size(bucket));
-    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket);
+    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1;
+
+    fprintf(stderr, "[DP slab_reg] slab: %p, start: %p, end %p\n", (void *)slab,
+            start_addr, end_addr);
 
     slab_reg_by_addr(start_addr, slab);
     slab_reg_by_addr(end_addr, slab);
@@ -221,9 +232,14 @@ void slab_reg(slab_t *slab) {
 
 void slab_unreg(slab_t *slab) {
     bucket_t *bucket = slab_get_bucket(slab);
+    // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size
+    // I also decr end_addr by 1
     void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
                                           bucket_slab_min_size(bucket));
-    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket);
+    void *end_addr = (uint8_t *)(start_addr) + bucket_slab_min_size(bucket) - 1;
+
+    fprintf(stderr, "[DP slab_unreg] slab: %p, start: %p, end %p\n",
+            (void *)slab, start_addr, end_addr);
 
     slab_unreg_by_addr(start_addr, slab);
     slab_unreg_by_addr(end_addr, slab);
@@ -485,6 +501,7 @@ void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) {
 
 bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
     size_t NewFreeSlabsInBucket;
+
     // Check if this bucket is used in chunked form or as full slabs.
     bool chunkedBucket =
         bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket);
@@ -497,6 +514,7 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
         DL_FOREACH(bucket->AvailableSlabs, it) { avail_num++; }
         NewFreeSlabsInBucket = avail_num + 1;
     }
+
     if (bucket_capacity(bucket) >= NewFreeSlabsInBucket) {
         size_t pool_size = bucket->shared_limits->total_size;
         while (true) {
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 24dd3ad27..7067fca46 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -657,7 +657,12 @@ void slab_reg_by_addr(void *addr, slab_t *slab) {
     utils_mutex_lock(lock);
 
     // TODO multimap
-    assert(critnib_get(slabs, (uintptr_t)addr) == NULL);
+    slab_t *t = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
+    assert(t == NULL);
+    (void)t;
+
+    fprintf(stderr, "[DP slab_reg_by_addr] addr: %p, slab: %p\n", addr,
+            (void *)slab);
     critnib_insert(slabs, (uintptr_t)addr, slab, 0);
 
     utils_mutex_unlock(lock);
@@ -675,6 +680,10 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
     slab_t *known_slab = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
     assert(known_slab != NULL && "Slab is not found");
     assert(slab == known_slab);
+    (void)known_slab;
+
+    fprintf(stderr, "[DP slab_unreg_by_addr] addr: %p, slab: %p\n", addr,
+            (void *)slab);
     critnib_remove(slabs, (uintptr_t)addr);
 
     utils_mutex_unlock(lock);
diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h
index 69506b4a1..87f2e05a1 100644
--- a/src/utils/utils_concurrency.h
+++ b/src/utils/utils_concurrency.h
@@ -94,7 +94,7 @@ static __inline unsigned char utils_mssb_index(long long value) {
     InterlockedExchangeAdd64((LONG64 *)(ptr), value)
 
 #define utils_compare_exchange(object, expected, desired)                      \
-    InterlockedCompareExchange64((LONG64 volatile *)object, expected, desired)
+    InterlockedCompareExchange64((LONG64 volatile *)object, *expected, *desired)
 
 #else // !defined(_WIN32)
 

From a95fe583515117e969d880753d350ebc3c14a050 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 15:23:13 +0100
Subject: [PATCH 15/26] move functions

---
 src/pool/pool_disjoint.c      | 44 ++++++++++++++++++++++++++++++++++-
 src/pool/pool_disjoint.cpp    | 40 -------------------------------
 src/pool/pool_disjoint_temp.h |  2 ++
 3 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 9a1fe478f..1cced16be 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -15,8 +15,10 @@
 #include <umf/memory_provider.h>
 #include <umf/pools/pool_disjoint.h>
 
-#include "base_alloc_global.h"
+#include "critnib/critnib.h"
 #include "uthash/utlist.h"
+
+#include "base_alloc_global.h"
 #include "utils_common.h"
 #include "utils_concurrency.h"
 #include "utils_log.h"
@@ -542,6 +544,46 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
     return false;
 }
 
+void slab_reg_by_addr(void *addr, slab_t *slab) {
+    bucket_t *bucket = slab_get_bucket(slab);
+    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
+    critnib *slabs = bucket_get_known_slabs(bucket);
+
+    utils_mutex_lock(lock);
+
+    // TODO multimap
+    slab_t *t = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
+    assert(t == NULL);
+    (void)t;
+
+    fprintf(stderr, "[DP slab_reg_by_addr] addr: %p, slab: %p\n", addr,
+            (void *)slab);
+    critnib_insert(slabs, (uintptr_t)addr, slab, 0);
+
+    utils_mutex_unlock(lock);
+}
+
+void slab_unreg_by_addr(void *addr, slab_t *slab) {
+    bucket_t *bucket = slab_get_bucket(slab);
+    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
+    critnib *slabs = bucket_get_known_slabs(bucket);
+
+    utils_mutex_lock(lock);
+
+    // debug only
+    // assume single-value per key
+    slab_t *known_slab = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
+    assert(known_slab != NULL && "Slab is not found");
+    assert(slab == known_slab);
+    (void)known_slab;
+
+    fprintf(stderr, "[DP slab_unreg_by_addr] addr: %p, slab: %p\n", addr,
+            (void *)slab);
+    critnib_remove(slabs, (uintptr_t)addr);
+
+    utils_mutex_unlock(lock);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 7067fca46..05910b42e 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -649,46 +649,6 @@ utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
     return t->getKnownSlabsMapLock();
 }
 
-void slab_reg_by_addr(void *addr, slab_t *slab) {
-    bucket_t *bucket = slab_get_bucket(slab);
-    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
-    critnib *slabs = bucket_get_known_slabs(bucket);
-
-    utils_mutex_lock(lock);
-
-    // TODO multimap
-    slab_t *t = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
-    assert(t == NULL);
-    (void)t;
-
-    fprintf(stderr, "[DP slab_reg_by_addr] addr: %p, slab: %p\n", addr,
-            (void *)slab);
-    critnib_insert(slabs, (uintptr_t)addr, slab, 0);
-
-    utils_mutex_unlock(lock);
-}
-
-void slab_unreg_by_addr(void *addr, slab_t *slab) {
-    bucket_t *bucket = slab_get_bucket(slab);
-    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
-    critnib *slabs = bucket_get_known_slabs(bucket);
-
-    utils_mutex_lock(lock);
-
-    // debug only
-    // assume single-value per key
-    slab_t *known_slab = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
-    assert(known_slab != NULL && "Slab is not found");
-    assert(slab == known_slab);
-    (void)known_slab;
-
-    fprintf(stderr, "[DP slab_unreg_by_addr] addr: %p, slab: %p\n", addr,
-            (void *)slab);
-    critnib_remove(slabs, (uintptr_t)addr);
-
-    utils_mutex_unlock(lock);
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
index bf763f275..9be4552c3 100644
--- a/src/pool/pool_disjoint_temp.h
+++ b/src/pool/pool_disjoint_temp.h
@@ -141,5 +141,7 @@ void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool);
 umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
 umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket);
 umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
+utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket);
+critnib *bucket_get_known_slabs(bucket_t *bucket);
 
 #endif // TEMP_H

From 3331b224fc582e91c5d05d6c7ee99917d6eaf057 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Wed, 13 Nov 2024 21:58:34 +0100
Subject: [PATCH 16/26] AllocImpl

---
 src/pool/pool_disjoint.c      | 118 +++++++++++++-
 src/pool/pool_disjoint.cpp    | 292 +++++++++-------------------------
 src/pool/pool_disjoint_temp.h |  45 ++++++
 src/utils/utils_concurrency.h |   3 +-
 4 files changed, 242 insertions(+), 216 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 1cced16be..5a68949e1 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -22,6 +22,7 @@
 #include "utils_common.h"
 #include "utils_concurrency.h"
 #include "utils_log.h"
+#include "utils_math.h"
 #include "utils_sanitizers.h"
 
 #include "pool_disjoint_temp.h"
@@ -30,6 +31,8 @@
 extern "C" {
 #endif
 
+static size_t CutOff = (size_t)1 << 31; // 2GB
+
 // Temporary solution for disabling memory poisoning. This is needed because
 // AddressSanitizer does not support memory poisoning for GPU allocations.
 // More info: https://github.com/oneapi-src/unified-memory-framework/issues/634
@@ -526,8 +529,12 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
                 break;
             }
 
-            if (utils_compare_exchange(&bucket->shared_limits->total_size,
-                                       &pool_size, &new_pool_size)) {
+            // TODO!!!
+            //if (utils_compare_exchange(&bucket->shared_limits->total_size,
+            //                           pool_size, new_pool_size)) {
+            if (bucket->shared_limits->total_size != new_pool_size) {
+                bucket->shared_limits->total_size = new_pool_size;
+
                 if (chunkedBucket) {
                     ++bucket->chunkedSlabsInPool;
                 }
@@ -544,6 +551,26 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
     return false;
 }
 
+umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket) {
+    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
+    return AllocImpl_getParams(t);
+}
+
+umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket) {
+    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
+    return AllocImpl_getMemHandle(t);
+}
+
+critnib *bucket_get_known_slabs(bucket_t *bucket) {
+    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
+    return AllocImpl_getKnownSlabs(t);
+}
+
+utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
+    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
+    return AllocImpl_getKnownSlabsMapLock(t);
+}
+
 void slab_reg_by_addr(void *addr, slab_t *slab) {
     bucket_t *bucket = slab_get_bucket(slab);
     utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
@@ -584,6 +611,93 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
     utils_mutex_unlock(lock);
 }
 
+AllocImpl *create_AllocImpl(umf_memory_provider_handle_t hProvider,
+                            umf_disjoint_pool_params_t *params) {
+
+    AllocImpl *ai = (AllocImpl *)umf_ba_global_alloc(sizeof(AllocImpl));
+
+    VALGRIND_DO_CREATE_MEMPOOL(ai, 0, 0);
+    ai->MemHandle = hProvider;
+    ai->params = *params;
+
+    utils_mutex_init(&ai->known_slabs_map_lock);
+    ai->known_slabs = critnib_new();
+
+    // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
+    // Powers of 2 and the value halfway between the powers of 2.
+    size_t Size1 = ai->params.MinBucketSize;
+
+    // MinBucketSize cannot be larger than CutOff.
+    Size1 = utils_min(Size1, CutOff);
+
+    // Buckets sized smaller than the bucket default size- 8 aren't needed.
+    Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
+
+    // Calculate the exponent for MinBucketSize used for finding buckets.
+    ai->MinBucketSizeExp = (size_t)log2Utils(Size1);
+    ai->DefaultSharedLimits = shared_limits_create(SIZE_MAX);
+
+    // count number of buckets, start from 1
+    ai->buckets_num = 1;
+    size_t Size2 = Size1 + Size1 / 2;
+    size_t ts2 = Size2, ts1 = Size1;
+    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
+        ai->buckets_num += 2;
+    }
+    ai->buckets =
+        (bucket_t **)umf_ba_global_alloc(sizeof(bucket_t *) * ai->buckets_num);
+
+    int i = 0;
+    Size1 = ts1;
+    Size2 = ts2;
+    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) {
+        ai->buckets[i] = create_bucket(Size1, ai, AllocImpl_getLimits(ai));
+        ai->buckets[i + 1] = create_bucket(Size2, ai, AllocImpl_getLimits(ai));
+    }
+    ai->buckets[i] = create_bucket(CutOff, ai, AllocImpl_getLimits(ai));
+
+    umf_result_t ret = umfMemoryProviderGetMinPageSize(
+        hProvider, NULL, &ai->ProviderMinPageSize);
+    if (ret != UMF_RESULT_SUCCESS) {
+        ai->ProviderMinPageSize = 0;
+    }
+
+    return ai;
+}
+
+void destroy_AllocImpl(AllocImpl *ai) {
+    // TODO
+    // destroy DefaultSharedLimits
+
+    for (size_t i = 0; i < ai->buckets_num; i++) {
+        destroy_bucket(ai->buckets[i]);
+    }
+
+    VALGRIND_DO_DESTROY_MEMPOOL(ai);
+
+    critnib_delete(ai->known_slabs);
+
+    utils_mutex_destroy_not_free(&ai->known_slabs_map_lock);
+
+    umf_ba_global_free(ai);
+}
+
+umf_memory_provider_handle_t AllocImpl_getMemHandle(AllocImpl *ai) {
+    return ai->MemHandle;
+}
+
+utils_mutex_t *AllocImpl_getKnownSlabsMapLock(AllocImpl *ai) {
+    return &ai->known_slabs_map_lock;
+}
+
+critnib *AllocImpl_getKnownSlabs(AllocImpl *ai) { return ai->known_slabs; }
+
+size_t AllocImpl_SlabMinSize(AllocImpl *ai) { return ai->params.SlabMinSize; };
+
+umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai) {
+    return &ai->params;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 05910b42e..bbb79d1d5 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -50,28 +50,6 @@ extern "C" {
 #endif
 // end TODO remove
 
-class DisjointPool {
-  public:
-    class AllocImpl;
-    using Config = umf_disjoint_pool_params_t;
-
-    umf_result_t initialize(umf_memory_provider_handle_t provider,
-                            umf_disjoint_pool_params_t *parameters);
-    void *malloc(size_t size);
-    void *calloc(size_t, size_t);
-    void *realloc(void *, size_t);
-    void *aligned_malloc(size_t size, size_t alignment);
-    size_t malloc_usable_size(void *);
-    umf_result_t free(void *ptr);
-    umf_result_t get_last_allocation_error();
-
-    DisjointPool();
-    ~DisjointPool();
-
-  private:
-    std::unique_ptr<AllocImpl> impl;
-};
-
 umf_disjoint_pool_shared_limits_t *
 umfDisjointPoolSharedLimitsCreate(size_t MaxSize) {
     return shared_limits_create(MaxSize);
@@ -106,125 +84,25 @@ typedef struct MemoryProviderError {
     umf_result_t code;
 } MemoryProviderError_t;
 
-class DisjointPool::AllocImpl {
-    // It's important for the map to be destroyed last after buckets and their
-    // slabs This is because slab's destructor removes the object from the map.
-    critnib *known_slabs; // (void *, slab_t *)
-
-    // prev std::shared_timed_mutex - ok?
-    utils_mutex_t known_slabs_map_lock;
-
-    // Handle to the memory provider
-    umf_memory_provider_handle_t MemHandle;
-
-    // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
-    bucket_t **buckets;
-    size_t buckets_num;
-
-    // Configuration for this instance
-    umf_disjoint_pool_params_t params;
-
-    umf_disjoint_pool_shared_limits_t *DefaultSharedLimits;
-
-    // Used in algorithm for finding buckets
-    size_t MinBucketSizeExp;
-
-    // Coarse-grain allocation min alignment
-    size_t ProviderMinPageSize;
-
+class DisjointPool {
   public:
-    AllocImpl(umf_memory_provider_handle_t hProvider,
-              umf_disjoint_pool_params_t *params)
-        : MemHandle{hProvider}, params(*params) {
-
-        VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0);
-
-        utils_mutex_init(&known_slabs_map_lock);
-        known_slabs = critnib_new();
-
-        // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
-        // Powers of 2 and the value halfway between the powers of 2.
-        size_t Size1 = this->params.MinBucketSize;
-
-        // MinBucketSize cannot be larger than CutOff.
-        Size1 = std::min(Size1, CutOff);
-
-        // Buckets sized smaller than the bucket default size- 8 aren't needed.
-        Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
-
-        // Calculate the exponent for MinBucketSize used for finding buckets.
-        MinBucketSizeExp = (size_t)log2Utils(Size1);
-        DefaultSharedLimits = shared_limits_create(SIZE_MAX);
-
-        // count number of buckets, start from 1
-        buckets_num = 1;
-        auto Size2 = Size1 + Size1 / 2;
-        size_t ts2 = Size2, ts1 = Size1;
-        for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
-            buckets_num += 2;
-        }
-        buckets =
-            (bucket_t **)umf_ba_global_alloc(sizeof(bucket_t *) * buckets_num);
-
-        int i = 0;
-        Size1 = ts1;
-        Size2 = ts2;
-        for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) {
-            buckets[i] = create_bucket(Size1, this, this->getLimits());
-            buckets[i + 1] = create_bucket(Size2, this, this->getLimits());
-        }
-        buckets[i] = create_bucket(CutOff, this, this->getLimits());
-
-        auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr,
-                                                   &ProviderMinPageSize);
-        if (ret != UMF_RESULT_SUCCESS) {
-            ProviderMinPageSize = 0;
-        }
-    }
-
-    ~AllocImpl() {
-        // TODO
-        // destroy DefaultSharedLimits
-
-        for (size_t i = 0; i < buckets_num; i++) {
-            destroy_bucket(buckets[i]);
-        }
-
-        VALGRIND_DO_DESTROY_MEMPOOL(this);
-
-        critnib_delete(known_slabs);
-
-        utils_mutex_destroy_not_free(&known_slabs_map_lock);
-    }
-
-    void *allocate(size_t Size, size_t Alignment, bool &FromPool);
-    void *allocate(size_t Size, bool &FromPool);
-    umf_result_t deallocate(void *Ptr, bool &ToPool);
-
-    umf_memory_provider_handle_t getMemHandle() { return MemHandle; }
-
-    utils_mutex_t *getKnownSlabsMapLock() { return &known_slabs_map_lock; }
-
-    critnib *getKnownSlabs() { return known_slabs; }
-
-    size_t SlabMinSize() { return params.SlabMinSize; };
-
-    umf_disjoint_pool_params_t &getParams() { return params; }
+    using Config = umf_disjoint_pool_params_t;
 
-    umf_disjoint_pool_shared_limits_t *getLimits() {
-        if (params.SharedLimits) {
-            return params.SharedLimits;
-        } else {
-            return DefaultSharedLimits;
-        }
-    };
+    umf_result_t initialize(umf_memory_provider_handle_t provider,
+                            umf_disjoint_pool_params_t *parameters);
+    void *malloc(size_t size);
+    void *calloc(size_t, size_t);
+    void *realloc(void *, size_t);
+    void *aligned_malloc(size_t size, size_t alignment);
+    size_t malloc_usable_size(void *);
+    umf_result_t free(void *ptr);
+    umf_result_t get_last_allocation_error();
 
-    void printStats(bool &TitlePrinted, size_t &HighBucketSize,
-                    size_t &HighPeakSlabsInUse, const std::string &Label);
+    DisjointPool();
+    ~DisjointPool();
 
   private:
-    bucket_t *findBucket(size_t Size);
-    size_t sizeToIdx(size_t Size);
+    AllocImpl *impl;
 };
 
 static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
@@ -293,16 +171,16 @@ void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
 }
 */
 
-void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) {
+void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
     void *Ptr;
 
     if (Size == 0) {
         return nullptr;
     }
 
-    FromPool = false;
-    if (Size > getParams().MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(getMemHandle(), Size);
+    *FromPool = false;
+    if (Size > AllocImpl_getParams(ai)->MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size);
 
         if (Ptr == NULL) {
             // TODO get code from func
@@ -315,12 +193,12 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) {
         return Ptr;
     }
 
-    bucket_t *bucket = findBucket(Size);
+    bucket_t *bucket = AllocImpl_findBucket(ai, Size);
 
     if (Size > bucket_chunk_cut_off(bucket)) {
-        Ptr = bucket_get_slab(bucket, &FromPool);
+        Ptr = bucket_get_slab(bucket, FromPool);
     } else {
-        Ptr = bucket_get_chunk(bucket, &FromPool);
+        Ptr = bucket_get_chunk(bucket, FromPool);
     }
 
     if (Ptr == NULL) {
@@ -330,18 +208,18 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) {
         return nullptr;
     }
 
-    if (getParams().PoolTrace > 1) {
+    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
         bucket_count_alloc(bucket, FromPool);
     }
 
-    VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size);
+    VALGRIND_DO_MEMPOOL_ALLOC(ai, Ptr, Size);
     annotate_memory_undefined(Ptr, bucket_get_size(bucket));
 
     return Ptr;
 }
 
-void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
-                                        bool &FromPool) {
+void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
+                               bool *FromPool) {
     void *Ptr;
 
     if (Size == 0) {
@@ -349,11 +227,11 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
     }
 
     if (Alignment <= 1) {
-        return allocate(Size, FromPool);
+        return AllocImpl_allocate(ai, Size, FromPool);
     }
 
     size_t AlignedSize;
-    if (Alignment <= ProviderMinPageSize) {
+    if (Alignment <= ai->ProviderMinPageSize) {
         // This allocation will be served from a Bucket which size is multiple
         // of Alignment and Slab address is aligned to ProviderMinPageSize
         // so the address will be properly aligned.
@@ -367,37 +245,37 @@ void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment,
 
     // Check if requested allocation size is within pooling limit.
     // If not, just request aligned pointer from the system.
-    FromPool = false;
-    if (AlignedSize > getParams().MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment);
+    *FromPool = false;
+    if (AlignedSize > AllocImpl_getParams(ai)->MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size, Alignment);
         assert(Ptr);
         annotate_memory_undefined(Ptr, Size);
         return Ptr;
     }
 
-    bucket_t *bucket = findBucket(AlignedSize);
+    bucket_t *bucket = AllocImpl_findBucket(ai, AlignedSize);
 
     if (AlignedSize > bucket_chunk_cut_off(bucket)) {
-        Ptr = bucket_get_slab(bucket, &FromPool);
+        Ptr = bucket_get_slab(bucket, FromPool);
     } else {
-        Ptr = bucket_get_chunk(bucket, &FromPool);
+        Ptr = bucket_get_chunk(bucket, FromPool);
     }
 
     assert(Ptr);
-    if (getParams().PoolTrace > 1) {
+    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
         bucket_count_alloc(bucket, FromPool);
     }
 
-    VALGRIND_DO_MEMPOOL_ALLOC(this, ALIGN_UP((size_t)Ptr, Alignment), Size);
+    VALGRIND_DO_MEMPOOL_ALLOC(ai, ALIGN_UP((size_t)Ptr, Alignment), Size);
     annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size);
     return (void *)ALIGN_UP((size_t)Ptr, Alignment);
 }
 
-std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) {
+size_t AllocImpl_sizeToIdx(AllocImpl *ai, size_t Size) {
     assert(Size <= CutOff && "Unexpected size");
     assert(Size > 0 && "Unexpected size");
 
-    size_t MinBucketSize = (size_t)1 << MinBucketSizeExp;
+    size_t MinBucketSize = (size_t)1 << ai->MinBucketSizeExp;
     if (Size < MinBucketSize) {
         return 0;
     }
@@ -408,44 +286,52 @@ std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) {
     auto isPowerOf2 = 0 == (Size & (Size - 1));
     auto largerThanHalfwayBetweenPowersOf2 =
         !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1)));
-    auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) +
+    auto index = (position - ai->MinBucketSizeExp) * 2 + (int)(!isPowerOf2) +
                  (int)largerThanHalfwayBetweenPowersOf2;
 
     return index;
 }
 
-bucket_t *DisjointPool::AllocImpl::findBucket(size_t Size) {
-    auto calculatedIdx = sizeToIdx(Size);
-    bucket_t *bucket = buckets[calculatedIdx];
+umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai) {
+    if (ai->params.SharedLimits) {
+        return ai->params.SharedLimits;
+    } else {
+        return ai->DefaultSharedLimits;
+    }
+};
+
+bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size) {
+    auto calculatedIdx = AllocImpl_sizeToIdx(ai, Size);
+    bucket_t *bucket = ai->buckets[calculatedIdx];
     assert(bucket_get_size(bucket) >= Size);
     (void)bucket;
 
     if (calculatedIdx > 0) {
-        bucket_t *bucket_prev = buckets[calculatedIdx - 1];
+        bucket_t *bucket_prev = ai->buckets[calculatedIdx - 1];
         assert(bucket_get_size(bucket_prev) < Size);
         (void)bucket_prev;
     }
 
-    return buckets[calculatedIdx];
+    return ai->buckets[calculatedIdx];
 }
 
-umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
+umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
     if (Ptr == nullptr) {
         return UMF_RESULT_SUCCESS;
     }
 
-    auto *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, SlabMinSize());
+    void *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, AllocImpl_SlabMinSize(ai));
 
     // Lock the map on read
-    utils_mutex_lock(getKnownSlabsMapLock());
+    utils_mutex_lock(AllocImpl_getKnownSlabsMapLock(ai));
 
-    ToPool = false;
+    *ToPool = false;
 
-    slab_t *slab = (slab_t *)critnib_get(known_slabs, (uintptr_t)SlabPtr);
+    slab_t *slab = (slab_t *)critnib_get(ai->known_slabs, (uintptr_t)SlabPtr);
     //auto Slabs = getKnownSlabs().equal_range(SlabPtr);
     if (slab == NULL) {
-        utils_mutex_unlock(getKnownSlabsMapLock());
-        umf_result_t ret = memoryProviderFree(getMemHandle(), Ptr);
+        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
+        umf_result_t ret = memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
         return ret;
     }
 
@@ -457,46 +343,45 @@ umf_result_t DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) {
     if (Ptr >= slab_get(slab) && Ptr < slab_get_end(slab)) {
         // Unlock the map before freeing the chunk, it may be locked on write
         // there
-        utils_mutex_unlock(getKnownSlabsMapLock());
+        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
         bucket_t *bucket = slab_get_bucket(slab);
 
-        if (getParams().PoolTrace > 1) {
+        if (AllocImpl_getParams(ai)->PoolTrace > 1) {
             bucket_count_free(bucket);
         }
 
-        VALGRIND_DO_MEMPOOL_FREE(this, Ptr);
+        VALGRIND_DO_MEMPOOL_FREE(ai, Ptr);
         annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
         if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
-            bucket_free_chunk(bucket, Ptr, slab, &ToPool);
+            bucket_free_chunk(bucket, Ptr, slab, ToPool);
         } else {
-            bucket_free_slab(bucket, slab, &ToPool);
+            bucket_free_slab(bucket, slab, ToPool);
         }
 
         return UMF_RESULT_SUCCESS;
     }
     //} // for multimap
 
-    utils_mutex_unlock(getKnownSlabsMapLock());
+    utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
     // There is a rare case when we have a pointer from system allocation next
     // to some slab with an entry in the map. So we find a slab
     // but the range checks fail.
-    memoryProviderFree(getMemHandle(), Ptr);
+    memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
     return UMF_RESULT_SUCCESS;
 }
 
-void DisjointPool::AllocImpl::printStats(bool &TitlePrinted,
-                                         size_t &HighBucketSize,
-                                         size_t &HighPeakSlabsInUse,
-                                         const std::string &MTName) {
+void AllocImpl_printStats(AllocImpl *ai, bool &TitlePrinted,
+                          size_t &HighBucketSize, size_t &HighPeakSlabsInUse,
+                          const std::string &MTName) {
     (void)TitlePrinted; // TODO
     (void)MTName;       // TODO
 
     HighBucketSize = 0;
     HighPeakSlabsInUse = 0;
-    for (size_t i = 0; i < buckets_num; i++) {
+    for (size_t i = 0; i < ai->buckets_num; i++) {
         // TODO
         //(*B).printStats(TitlePrinted, MTName);
-        bucket_t *bucket = buckets[i];
+        bucket_t *bucket = ai->buckets[i];
         HighPeakSlabsInUse =
             utils_max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
         if (bucket->allocCount) {
@@ -518,17 +403,17 @@ umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t provider,
         return UMF_RESULT_ERROR_INVALID_ARGUMENT;
     }
 
-    impl = std::make_unique<AllocImpl>(provider, parameters);
+    impl = create_AllocImpl(provider, parameters);
     return UMF_RESULT_SUCCESS;
 }
 
 void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates
                                           // whether slab is from Pool.
     bool FromPool;
-    auto Ptr = impl->allocate(size, FromPool);
+    void *Ptr = AllocImpl_allocate(impl, size, &FromPool);
 
-    if (impl->getParams().PoolTrace > 2) {
-        auto MT = impl->getParams().Name;
+    if (AllocImpl_getParams(impl)->PoolTrace > 2) {
+        auto MT = AllocImpl_getParams(impl)->Name;
         std::cout << "Allocated " << std::setw(8) << size << " " << MT
                   << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->"
                   << Ptr << std::endl;
@@ -550,10 +435,10 @@ void *DisjointPool::realloc(void *, size_t) {
 
 void *DisjointPool::aligned_malloc(size_t size, size_t alignment) {
     bool FromPool;
-    auto Ptr = impl->allocate(size, alignment, FromPool);
+    auto Ptr = AllocImpl_allocate_align(impl, size, alignment, &FromPool);
 
-    if (impl->getParams().PoolTrace > 2) {
-        auto MT = impl->getParams().Name;
+    if (AllocImpl_getParams(impl)->PoolTrace > 2) {
+        auto MT = AllocImpl_getParams(impl)->Name;
         std::cout << "Allocated " << std::setw(8) << size << " " << MT
                   << " bytes aligned at " << alignment << " from "
                   << (FromPool ? "Pool" : "Provider") << " ->" << Ptr
@@ -569,7 +454,7 @@ size_t DisjointPool::malloc_usable_size(void *) {
 
 umf_result_t DisjointPool::free(void *ptr) {
     bool ToPool;
-    umf_result_t ret = impl->deallocate(ptr, ToPool);
+    umf_result_t ret = AllocImpl_deallocate(impl, ptr, &ToPool);
     /*
     if (ret == UMF_RESULT_SUCCESS) {
 
@@ -594,6 +479,7 @@ DisjointPool::DisjointPool() {}
 
 // Define destructor for use with unique_ptr
 DisjointPool::~DisjointPool() {
+    destroy_AllocImpl(impl);
     /*
     if (impl->getParams().PoolTrace > 1) {
         bool TitlePrinted = false;
@@ -629,26 +515,6 @@ umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
 extern "C" {
 #endif
 
-umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket) {
-    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
-    return &t->getParams();
-}
-
-umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket) {
-    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
-    return t->getMemHandle();
-}
-
-critnib *bucket_get_known_slabs(bucket_t *bucket) {
-    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
-    return t->getKnownSlabs();
-}
-
-utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
-    auto t = (DisjointPool::AllocImpl *)bucket->OwnAllocCtx;
-    return t->getKnownSlabsMapLock();
-}
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
index 9be4552c3..86dc5c87a 100644
--- a/src/pool/pool_disjoint_temp.h
+++ b/src/pool/pool_disjoint_temp.h
@@ -144,4 +144,49 @@ umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
 utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket);
 critnib *bucket_get_known_slabs(bucket_t *bucket);
 
+typedef struct AllocImpl {
+    // It's important for the map to be destroyed last after buckets and their
+    // slabs This is because slab's destructor removes the object from the map.
+    critnib *known_slabs; // (void *, slab_t *)
+
+    // prev std::shared_timed_mutex - ok?
+    utils_mutex_t known_slabs_map_lock;
+
+    // Handle to the memory provider
+    umf_memory_provider_handle_t MemHandle;
+
+    // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
+    bucket_t **buckets;
+    size_t buckets_num;
+
+    // Configuration for this instance
+    umf_disjoint_pool_params_t params;
+
+    umf_disjoint_pool_shared_limits_t *DefaultSharedLimits;
+
+    // Used in algorithm for finding buckets
+    size_t MinBucketSizeExp;
+
+    // Coarse-grain allocation min alignment
+    size_t ProviderMinPageSize;
+
+} AllocImpl;
+
+AllocImpl *create_AllocImpl(umf_memory_provider_handle_t hProvider,
+                            umf_disjoint_pool_params_t *params);
+void destroy_AllocImpl(AllocImpl *ai);
+
+bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size);
+umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool);
+umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai);
+void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool);
+void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
+                               bool *FromPool);
+
+umf_memory_provider_handle_t AllocImpl_getMemHandle(AllocImpl *ai);
+utils_mutex_t *AllocImpl_getKnownSlabsMapLock(AllocImpl *ai);
+critnib *AllocImpl_getKnownSlabs(AllocImpl *ai);
+size_t AllocImpl_SlabMinSize(AllocImpl *ai);
+umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai);
+
 #endif // TEMP_H
diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h
index 87f2e05a1..861c621cb 100644
--- a/src/utils/utils_concurrency.h
+++ b/src/utils/utils_concurrency.h
@@ -94,7 +94,8 @@ static __inline unsigned char utils_mssb_index(long long value) {
     InterlockedExchangeAdd64((LONG64 *)(ptr), value)
 
 #define utils_compare_exchange(object, expected, desired)                      \
-    InterlockedCompareExchange64((LONG64 volatile *)object, *expected, *desired)
+    InterlockedCompareExchangePointer((LONG64 volatile *)object, expected,     \
+                                      desired)
 
 #else // !defined(_WIN32)
 

From b6e07d62936f9eb2165379afa6189ec45dc0e0a4 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 08:57:11 +0100
Subject: [PATCH 17/26] fix valgrind

---
 src/pool/pool_disjoint.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 5a68949e1..67145d938 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -675,6 +675,7 @@ void destroy_AllocImpl(AllocImpl *ai) {
 
     VALGRIND_DO_DESTROY_MEMPOOL(ai);
 
+    shared_limits_destroy(ai->DefaultSharedLimits);
     critnib_delete(ai->known_slabs);
 
     utils_mutex_destroy_not_free(&ai->known_slabs_map_lock);

From 6f083a5d3af5d8bf1ae7e64fbe4e5aa466dd389a Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 09:01:12 +0100
Subject: [PATCH 18/26] atomics cd

---
 src/pool/pool_disjoint.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 67145d938..f9465a069 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -529,12 +529,14 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
                 break;
             }
 
-            // TODO!!!
-            //if (utils_compare_exchange(&bucket->shared_limits->total_size,
-            //                           pool_size, new_pool_size)) {
+// TODO!!!
+#ifdef _WIN32
             if (bucket->shared_limits->total_size != new_pool_size) {
                 bucket->shared_limits->total_size = new_pool_size;
-
+#else
+            if (utils_compare_exchange(&bucket->shared_limits->total_size,
+                                       &pool_size, &new_pool_size)) {
+#endif
                 if (chunkedBucket) {
                     ++bucket->chunkedSlabsInPool;
                 }
@@ -693,7 +695,7 @@ utils_mutex_t *AllocImpl_getKnownSlabsMapLock(AllocImpl *ai) {
 
 critnib *AllocImpl_getKnownSlabs(AllocImpl *ai) { return ai->known_slabs; }
 
-size_t AllocImpl_SlabMinSize(AllocImpl *ai) { return ai->params.SlabMinSize; };
+size_t AllocImpl_SlabMinSize(AllocImpl *ai) { return ai->params.SlabMinSize; }
 
 umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai) {
     return &ai->params;

From 15881fcfad32df9725066933e40d47fac326dcda Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 09:09:54 +0100
Subject: [PATCH 19/26] move code

---
 src/pool/pool_disjoint.c   | 65 +++++++++++++++++++++++++++++++++
 src/pool/pool_disjoint.cpp | 75 --------------------------------------
 2 files changed, 65 insertions(+), 75 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index f9465a069..282c66b7b 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -701,6 +701,71 @@ umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai) {
     return &ai->params;
 }
 
+size_t AllocImpl_sizeToIdx(AllocImpl *ai, size_t size) {
+    assert(size <= CutOff && "Unexpected size");
+    assert(size > 0 && "Unexpected size");
+
+    size_t MinBucketSize = (size_t)1 << ai->MinBucketSizeExp;
+    if (size < MinBucketSize) {
+        return 0;
+    }
+
+    // Get the position of the leftmost set bit.
+    size_t position = getLeftmostSetBitPos(size);
+
+    bool isPowerOf2 = 0 == (size & (size - 1));
+    bool largerThanHalfwayBetweenPowersOf2 =
+        !isPowerOf2 && (bool)((size - 1) & ((uint64_t)(1) << (position - 1)));
+    size_t index = (position - ai->MinBucketSizeExp) * 2 + (int)(!isPowerOf2) +
+                   (int)largerThanHalfwayBetweenPowersOf2;
+
+    return index;
+}
+
+umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai) {
+    if (ai->params.SharedLimits) {
+        return ai->params.SharedLimits;
+    } else {
+        return ai->DefaultSharedLimits;
+    }
+}
+
+bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size) {
+    size_t calculatedIdx = AllocImpl_sizeToIdx(ai, Size);
+    bucket_t *bucket = ai->buckets[calculatedIdx];
+    assert(bucket_get_size(bucket) >= Size);
+    (void)bucket;
+
+    if (calculatedIdx > 0) {
+        bucket_t *bucket_prev = ai->buckets[calculatedIdx - 1];
+        assert(bucket_get_size(bucket_prev) < Size);
+        (void)bucket_prev;
+    }
+
+    return ai->buckets[calculatedIdx];
+}
+
+void AllocImpl_printStats(AllocImpl *ai, bool *TitlePrinted,
+                          size_t *HighBucketSize, size_t *HighPeakSlabsInUse,
+                          const char *MTName) {
+    (void)TitlePrinted; // TODO
+    (void)MTName;       // TODO
+
+    *HighBucketSize = 0;
+    *HighPeakSlabsInUse = 0;
+    for (size_t i = 0; i < ai->buckets_num; i++) {
+        // TODO
+        //(*B).printStats(TitlePrinted, MTName);
+        bucket_t *bucket = ai->buckets[i];
+        *HighPeakSlabsInUse =
+            utils_max(bucket->maxSlabsInUse, *HighPeakSlabsInUse);
+        if (bucket->allocCount) {
+            *HighBucketSize =
+                utils_max(bucket_slab_alloc_size(bucket), *HighBucketSize);
+        }
+    }
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index bbb79d1d5..93da7beb1 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -271,50 +271,6 @@ void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
     return (void *)ALIGN_UP((size_t)Ptr, Alignment);
 }
 
-size_t AllocImpl_sizeToIdx(AllocImpl *ai, size_t Size) {
-    assert(Size <= CutOff && "Unexpected size");
-    assert(Size > 0 && "Unexpected size");
-
-    size_t MinBucketSize = (size_t)1 << ai->MinBucketSizeExp;
-    if (Size < MinBucketSize) {
-        return 0;
-    }
-
-    // Get the position of the leftmost set bit.
-    size_t position = getLeftmostSetBitPos(Size);
-
-    auto isPowerOf2 = 0 == (Size & (Size - 1));
-    auto largerThanHalfwayBetweenPowersOf2 =
-        !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1)));
-    auto index = (position - ai->MinBucketSizeExp) * 2 + (int)(!isPowerOf2) +
-                 (int)largerThanHalfwayBetweenPowersOf2;
-
-    return index;
-}
-
-umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai) {
-    if (ai->params.SharedLimits) {
-        return ai->params.SharedLimits;
-    } else {
-        return ai->DefaultSharedLimits;
-    }
-};
-
-bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size) {
-    auto calculatedIdx = AllocImpl_sizeToIdx(ai, Size);
-    bucket_t *bucket = ai->buckets[calculatedIdx];
-    assert(bucket_get_size(bucket) >= Size);
-    (void)bucket;
-
-    if (calculatedIdx > 0) {
-        bucket_t *bucket_prev = ai->buckets[calculatedIdx - 1];
-        assert(bucket_get_size(bucket_prev) < Size);
-        (void)bucket_prev;
-    }
-
-    return ai->buckets[calculatedIdx];
-}
-
 umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
     if (Ptr == nullptr) {
         return UMF_RESULT_SUCCESS;
@@ -370,27 +326,6 @@ umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
     return UMF_RESULT_SUCCESS;
 }
 
-void AllocImpl_printStats(AllocImpl *ai, bool &TitlePrinted,
-                          size_t &HighBucketSize, size_t &HighPeakSlabsInUse,
-                          const std::string &MTName) {
-    (void)TitlePrinted; // TODO
-    (void)MTName;       // TODO
-
-    HighBucketSize = 0;
-    HighPeakSlabsInUse = 0;
-    for (size_t i = 0; i < ai->buckets_num; i++) {
-        // TODO
-        //(*B).printStats(TitlePrinted, MTName);
-        bucket_t *bucket = ai->buckets[i];
-        HighPeakSlabsInUse =
-            utils_max(bucket->maxSlabsInUse, HighPeakSlabsInUse);
-        if (bucket->allocCount) {
-            HighBucketSize =
-                utils_max(bucket_slab_alloc_size(bucket), HighBucketSize);
-        }
-    }
-}
-
 umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t provider,
                                       umf_disjoint_pool_params_t *parameters) {
     if (!provider) {
@@ -509,13 +444,3 @@ static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS =
 umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
     return &UMF_DISJOINT_POOL_OPS;
 }
-
-// TODO remove
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-// end TODO remove

From 7e9f4d886fc5403a4ef34c8d62c99518814263a9 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 09:29:45 +0100
Subject: [PATCH 20/26] fix for ThreadSanitizer

---
 src/pool/pool_disjoint.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 282c66b7b..e4c22d398 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -521,7 +521,9 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
     }
 
     if (bucket_capacity(bucket) >= NewFreeSlabsInBucket) {
-        size_t pool_size = bucket->shared_limits->total_size;
+        size_t pool_size = 0;
+        utils_atomic_load_acquire(&bucket->shared_limits->total_size,
+                                  &pool_size);
         while (true) {
             size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket);
 

From 9ce6514091a401508f6deac232e615dd27906de8 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 09:38:56 +0100
Subject: [PATCH 21/26] error handling

---
 src/pool/pool_disjoint.c   |  5 +++++
 src/pool/pool_disjoint.cpp | 18 +++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index e4c22d398..86ab864ce 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -27,10 +27,14 @@
 
 #include "pool_disjoint_temp.h"
 
+// TODO remove
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+//static <- make static rename to TLS_last_allocation_error
+__TLS umf_result_t TLS_last_allocation_error_dp;
+
 static size_t CutOff = (size_t)1 << 31; // 2GB
 
 // Temporary solution for disabling memory poisoning. This is needed because
@@ -768,6 +772,7 @@ void AllocImpl_printStats(AllocImpl *ai, bool *TitlePrinted,
     }
 }
 
+// TODO remove
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
index 93da7beb1..0837694f4 100644
--- a/src/pool/pool_disjoint.cpp
+++ b/src/pool/pool_disjoint.cpp
@@ -45,6 +45,8 @@ extern "C" {
 
 #include "pool_disjoint_temp.h"
 
+extern __TLS umf_result_t TLS_last_allocation_error_dp;
+
 #ifdef __cplusplus
 }
 #endif
@@ -110,7 +112,7 @@ static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
     void *ptr;
     auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
     if (ret != UMF_RESULT_SUCCESS) {
-        umf::getPoolLastStatusRef<DisjointPool>() = ret;
+        TLS_last_allocation_error_dp = ret;
         return NULL;
     }
     annotate_memory_inaccessible(ptr, size);
@@ -132,7 +134,7 @@ static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider,
     auto ret = umfMemoryProviderFree(hProvider, ptr, size);
     if (ret != UMF_RESULT_SUCCESS) {
 
-        umf::getPoolLastStatusRef<DisjointPool>() = ret;
+        TLS_last_allocation_error_dp = ret;
         // throw MemoryProviderError{ret};
         return ret;
     }
@@ -184,8 +186,7 @@ void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
 
         if (Ptr == NULL) {
             // TODO get code from func
-            umf::getPoolLastStatusRef<DisjointPool>() =
-                UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+            TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
             return nullptr;
         }
 
@@ -203,8 +204,7 @@ void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
 
     if (Ptr == NULL) {
         // TODO get code from func
-        umf::getPoolLastStatusRef<DisjointPool>() =
-            UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+        TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
         return nullptr;
     }
 
@@ -358,13 +358,13 @@ void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates
 
 void *DisjointPool::calloc(size_t, size_t) {
     // Not supported
-    umf::getPoolLastStatusRef<DisjointPool>() = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
     return NULL;
 }
 
 void *DisjointPool::realloc(void *, size_t) {
     // Not supported
-    umf::getPoolLastStatusRef<DisjointPool>() = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
     return NULL;
 }
 
@@ -407,7 +407,7 @@ umf_result_t DisjointPool::free(void *ptr) {
 }
 
 umf_result_t DisjointPool::get_last_allocation_error() {
-    return umf::getPoolLastStatusRef<DisjointPool>();
+    return TLS_last_allocation_error_dp;
 }
 
 DisjointPool::DisjointPool() {}

From 27a1e2c65f576d2f010743c2e589c8e4d7adc982 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 12:24:54 +0100
Subject: [PATCH 22/26] remove cpp file

---
 src/pool/CMakeLists.txt       |   2 +-
 src/pool/pool_disjoint.c      | 424 +++++++++++++++++++++++++++++++-
 src/pool/pool_disjoint.cpp    | 446 ----------------------------------
 src/pool/pool_disjoint_temp.h |   2 -
 4 files changed, 420 insertions(+), 454 deletions(-)
 delete mode 100644 src/pool/pool_disjoint.cpp

diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt
index d7b48254a..775bb1347 100644
--- a/src/pool/CMakeLists.txt
+++ b/src/pool/CMakeLists.txt
@@ -14,7 +14,7 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT)
     add_umf_library(
         NAME disjoint_pool
         TYPE STATIC
-        SRCS pool_disjoint.cpp pool_disjoint.c ../critnib/critnib.c
+        SRCS pool_disjoint.c ../critnib/critnib.c
              ${POOL_EXTRA_SRCS}
         LIBS ${POOL_EXTRA_LIBS})
 
diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 86ab864ce..b10d59eb1 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -19,6 +19,7 @@
 #include "uthash/utlist.h"
 
 #include "base_alloc_global.h"
+#include "provider_tracking.h"
 #include "utils_common.h"
 #include "utils_concurrency.h"
 #include "utils_log.h"
@@ -35,6 +36,24 @@ extern "C" {
 //static <- make static rename to TLS_last_allocation_error
 __TLS umf_result_t TLS_last_allocation_error_dp;
 
+// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is
+// requested. The implementation distinguishes between allocations of size
+// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger.
+// Allocation requests smaller than ChunkCutoff use chunks taken from a single
+// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation
+// size, and 8-byte allocations, only 1 in ~8000 requests results in a new
+// coarse-grain allocation. Freeing results only in a chunk of a larger
+// allocation to be marked as available and no real return to the system. An
+// allocation is returned to the system only when all chunks in the larger
+// allocation are freed by the program. Allocations larger than ChunkCutOff use
+// a separate coarse-grain allocation for each request. These are subject to
+// "pooling". That is, when such an allocation is freed by the program it is
+// retained in a pool. The pool is available for future allocations, which means
+// there are fewer actual coarse-grain allocations/deallocations.
+
+// The largest size which is allocated via the allocator.
+// Allocations with size > CutOff bypass the pool and
+// go directly to the provider.
 static size_t CutOff = (size_t)1 << 31; // 2GB
 
 // Temporary solution for disabling memory poisoning. This is needed because
@@ -65,7 +84,8 @@ typedef struct umf_disjoint_pool_shared_limits_t {
     size_t total_size; // requires atomic access
 } umf_disjoint_pool_shared_limits_t;
 
-umf_disjoint_pool_shared_limits_t *shared_limits_create(size_t max_size) {
+umf_disjoint_pool_shared_limits_t *
+umfDisjointPoolSharedLimitsCreate(size_t max_size) {
     umf_disjoint_pool_shared_limits_t *ptr =
         umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t));
     ptr->max_size = max_size;
@@ -73,10 +93,15 @@ umf_disjoint_pool_shared_limits_t *shared_limits_create(size_t max_size) {
     return ptr;
 }
 
-void shared_limits_destroy(umf_disjoint_pool_shared_limits_t *shared_limits) {
-    umf_ba_global_free(shared_limits);
+void umfDisjointPoolSharedLimitsDestroy(
+    umf_disjoint_pool_shared_limits_t *limits) {
+    umf_ba_global_free(limits);
 }
 
+typedef struct disjoint_pool_t {
+    AllocImpl *impl;
+} disjoint_pool_t;
+
 size_t bucket_get_size(bucket_t *bucket);
 
 void slab_reg(slab_t *slab);
@@ -595,6 +620,11 @@ void slab_reg_by_addr(void *addr, slab_t *slab) {
             (void *)slab);
     critnib_insert(slabs, (uintptr_t)addr, slab, 0);
 
+    // debug
+    slab_t *s = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
+    assert(s != NULL);
+    (void)s;
+
     utils_mutex_unlock(lock);
 }
 
@@ -643,7 +673,7 @@ AllocImpl *create_AllocImpl(umf_memory_provider_handle_t hProvider,
 
     // Calculate the exponent for MinBucketSize used for finding buckets.
     ai->MinBucketSizeExp = (size_t)log2Utils(Size1);
-    ai->DefaultSharedLimits = shared_limits_create(SIZE_MAX);
+    ai->DefaultSharedLimits = umfDisjointPoolSharedLimitsCreate(SIZE_MAX);
 
     // count number of buckets, start from 1
     ai->buckets_num = 1;
@@ -683,7 +713,7 @@ void destroy_AllocImpl(AllocImpl *ai) {
 
     VALGRIND_DO_DESTROY_MEMPOOL(ai);
 
-    shared_limits_destroy(ai->DefaultSharedLimits);
+    umfDisjointPoolSharedLimitsDestroy(ai->DefaultSharedLimits);
     critnib_delete(ai->known_slabs);
 
     utils_mutex_destroy_not_free(&ai->known_slabs_map_lock);
@@ -772,6 +802,390 @@ void AllocImpl_printStats(AllocImpl *ai, bool *TitlePrinted,
     }
 }
 
+static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
+                                 size_t size, size_t alignment) {
+    void *ptr;
+    umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
+    if (ret != UMF_RESULT_SUCCESS) {
+        TLS_last_allocation_error_dp = ret;
+        return NULL;
+    }
+    annotate_memory_inaccessible(ptr, size);
+    return ptr;
+}
+
+static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider,
+                                       void *ptr) {
+    size_t size = 0;
+
+    if (ptr) {
+        umf_alloc_info_t allocInfo = {NULL, 0, NULL};
+        umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo);
+        if (umf_result == UMF_RESULT_SUCCESS) {
+            size = allocInfo.baseSize;
+        }
+    }
+
+    umf_result_t ret = umfMemoryProviderFree(hProvider, ptr, size);
+    if (ret != UMF_RESULT_SUCCESS) {
+
+        TLS_last_allocation_error_dp = ret;
+        // throw MemoryProviderError{ret};
+        return ret;
+    }
+    return UMF_RESULT_SUCCESS;
+}
+
+void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
+    void *Ptr;
+
+    if (Size == 0) {
+        return NULL;
+    }
+
+    *FromPool = false;
+    if (Size > AllocImpl_getParams(ai)->MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size, 0);
+
+        if (Ptr == NULL) {
+            // TODO get code from func
+            TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+            return NULL;
+        }
+
+        annotate_memory_undefined(Ptr, Size);
+        return Ptr;
+    }
+
+    bucket_t *bucket = AllocImpl_findBucket(ai, Size);
+
+    if (Size > bucket_chunk_cut_off(bucket)) {
+        Ptr = bucket_get_slab(bucket, FromPool);
+    } else {
+        Ptr = bucket_get_chunk(bucket, FromPool);
+    }
+
+    if (Ptr == NULL) {
+        // TODO get code from func
+        TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+        return NULL;
+    }
+
+    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
+        bucket_count_alloc(bucket, FromPool);
+    }
+
+    VALGRIND_DO_MEMPOOL_ALLOC(ai, Ptr, Size);
+    annotate_memory_undefined(Ptr, bucket_get_size(bucket));
+
+    return Ptr;
+}
+
+void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
+                               bool *FromPool) {
+    void *Ptr;
+
+    if (Size == 0) {
+        return NULL;
+    }
+
+    if (Alignment <= 1) {
+        return AllocImpl_allocate(ai, Size, FromPool);
+    }
+
+    size_t AlignedSize;
+    if (Alignment <= ai->ProviderMinPageSize) {
+        // This allocation will be served from a Bucket which size is multiple
+        // of Alignment and Slab address is aligned to ProviderMinPageSize
+        // so the address will be properly aligned.
+        AlignedSize = (Size > 1) ? ALIGN_UP(Size, Alignment) : Alignment;
+    } else {
+        // Slabs are only aligned to ProviderMinPageSize, we need to compensate
+        // for that in case the allocation is within pooling limit.
+        // TODO: consider creating properly-aligned Slabs on demand
+        AlignedSize = Size + Alignment - 1;
+    }
+
+    // Check if requested allocation size is within pooling limit.
+    // If not, just request aligned pointer from the system.
+    *FromPool = false;
+    if (AlignedSize > AllocImpl_getParams(ai)->MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size, Alignment);
+        assert(Ptr);
+        annotate_memory_undefined(Ptr, Size);
+        return Ptr;
+    }
+
+    bucket_t *bucket = AllocImpl_findBucket(ai, AlignedSize);
+
+    if (AlignedSize > bucket_chunk_cut_off(bucket)) {
+        Ptr = bucket_get_slab(bucket, FromPool);
+    } else {
+        Ptr = bucket_get_chunk(bucket, FromPool);
+    }
+
+    assert(Ptr);
+    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
+        bucket_count_alloc(bucket, FromPool);
+    }
+
+    VALGRIND_DO_MEMPOOL_ALLOC(ai, ALIGN_UP((size_t)Ptr, Alignment), Size);
+    annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size);
+    return (void *)ALIGN_UP((size_t)Ptr, Alignment);
+}
+
+umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
+    if (Ptr == NULL) {
+        return UMF_RESULT_SUCCESS;
+    }
+
+    void *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, AllocImpl_SlabMinSize(ai));
+
+    // Lock the map on read
+    utils_mutex_lock(AllocImpl_getKnownSlabsMapLock(ai));
+
+    *ToPool = false;
+
+    slab_t *slab = (slab_t *)critnib_get(ai->known_slabs, (uintptr_t)SlabPtr);
+    //auto Slabs = getKnownSlabs().equal_range(SlabPtr);
+    if (slab == NULL) {
+        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
+        umf_result_t ret = memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
+        return ret;
+    }
+
+    // TODO - no multimap
+    // for (auto It = Slabs.first; It != Slabs.second; ++It) {
+
+    // The slab object won't be deleted until it's removed from the map which is
+    // protected by the lock, so it's safe to access it here.
+    if (Ptr >= slab_get(slab) && Ptr < slab_get_end(slab)) {
+        // Unlock the map before freeing the chunk, it may be locked on write
+        // there
+        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
+        bucket_t *bucket = slab_get_bucket(slab);
+
+        if (AllocImpl_getParams(ai)->PoolTrace > 1) {
+            bucket_count_free(bucket);
+        }
+
+        VALGRIND_DO_MEMPOOL_FREE(ai, Ptr);
+        annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
+        if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
+            bucket_free_chunk(bucket, Ptr, slab, ToPool);
+        } else {
+            bucket_free_slab(bucket, slab, ToPool);
+        }
+
+        return UMF_RESULT_SUCCESS;
+    }
+    //} // for multimap
+
+    utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
+    // There is a rare case when we have a pointer from system allocation next
+    // to some slab with an entry in the map. So we find a slab
+    // but the range checks fail.
+    memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
+    return UMF_RESULT_SUCCESS;
+}
+
+/*
+// TODO?
+std::ostream &operator<<(std::ostream &Os, slab_t &Slab) {
+    Os << "Slab<" << slab_get(&Slab) << ", " << slab_get_end(&Slab) << ", "
+       << slab_get_bucket(&Slab)->getSize() << ">";
+    return Os;
+}
+*/
+
+/*
+// TODO move
+void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
+    if (allocCount) {
+        if (!TitlePrinted) {
+            std::cout << Label << " memory statistics\n";
+            std::cout << std::setw(14) << "Bucket Size" << std::setw(12)
+                      << "Allocs" << std::setw(12) << "Frees" << std::setw(18)
+                      << "Allocs from Pool" << std::setw(20)
+                      << "Peak Slabs in Use" << std::setw(21)
+                      << "Peak Slabs in Pool" << std::endl;
+            TitlePrinted = true;
+        }
+        std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount
+                  << std::setw(12) << freeCount << std::setw(18)
+                  << allocPoolCount << std::setw(20) << maxSlabsInUse
+                  << std::setw(21) << maxSlabsInPool << std::endl;
+    }
+}
+*/
+
+umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
+                                      void *params, void **ppPool) {
+    if (!provider) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    disjoint_pool_t *disjoint_pool =
+        (disjoint_pool_t *)umf_ba_global_alloc(sizeof(struct disjoint_pool_t));
+    if (!disjoint_pool) {
+        return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+    }
+
+    umf_disjoint_pool_params_t *dp_params =
+        (umf_disjoint_pool_params_t *)params;
+
+    // MinBucketSize parameter must be a power of 2 for bucket sizes
+    // to generate correctly.
+    if (!dp_params->MinBucketSize ||
+        !((dp_params->MinBucketSize & (dp_params->MinBucketSize - 1)) == 0)) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    disjoint_pool->impl = create_AllocImpl(provider, dp_params);
+    *ppPool = (void *)disjoint_pool;
+
+    return UMF_RESULT_SUCCESS;
+}
+
+void *disjoint_pool_malloc(void *pool, size_t size) {
+    // For full-slab allocations indicates
+    // whether slab is from Pool.
+
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+
+    bool FromPool;
+    void *Ptr = AllocImpl_allocate(hPool->impl, size, &FromPool);
+
+    if (AllocImpl_getParams(hPool->impl)->PoolTrace > 2) {
+        const char *MT = AllocImpl_getParams(hPool->impl)->Name;
+        (void)MT;
+        //std::cout << "Allocated " << std::setw(8) << size << " " << MT
+        //          << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->"
+        //          << Ptr << std::endl;
+    }
+    return Ptr;
+}
+
+void *disjoint_pool_calloc(void *pool, size_t num, size_t size) {
+    (void)pool;
+    (void)num;
+    (void)size;
+
+    // Not supported
+    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    return NULL;
+}
+
+void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) {
+    (void)pool;
+    (void)ptr;
+    (void)size;
+
+    // Not supported
+    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    return NULL;
+}
+
+void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) {
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+
+    bool FromPool;
+    void *Ptr =
+        AllocImpl_allocate_align(hPool->impl, size, alignment, &FromPool);
+
+    if (AllocImpl_getParams(hPool->impl)->PoolTrace > 2) {
+        const char *MT = AllocImpl_getParams(hPool->impl)->Name;
+        (void)MT;
+        //std::cout << "Allocated " << std::setw(8) << size << " " << MT
+        //          << " bytes aligned at " << alignment << " from "
+        //          << (FromPool ? "Pool" : "Provider") << " ->" << Ptr
+        //          << std::endl;
+    }
+
+    return Ptr;
+}
+
+size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) {
+    (void)pool;
+    (void)ptr;
+
+    // Not supported
+    return 0;
+}
+
+umf_result_t disjoint_pool_free(void *pool, void *ptr) {
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+
+    bool ToPool;
+    umf_result_t ret = AllocImpl_deallocate(hPool->impl, ptr, &ToPool);
+    /*
+    if (ret == UMF_RESULT_SUCCESS) {
+
+        if (impl->getParams().PoolTrace > 2) {
+            auto MT = impl->getParams().Name;
+            std::cout << "Freed " << MT << " " << ptr << " to "
+                      << (ToPool ? "Pool" : "Provider")
+                      << ", Current total pool size "
+                      << impl->getLimits()->TotalSize.load()
+                      << ", Current pool size for " << MT << " "
+                      << impl->getParams().CurPoolSize << "\n";
+        }
+    }*/
+    return ret;
+}
+
+umf_result_t disjoint_pool_get_last_allocation_error(void *pool) {
+    (void)pool;
+
+    return TLS_last_allocation_error_dp;
+}
+
+// Define destructor for use with unique_ptr
+void disjoint_pool_finalize(void *pool) {
+
+    disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
+    destroy_AllocImpl(hPool->impl);
+
+    /*
+    if (impl->getParams().PoolTrace > 1) {
+        bool TitlePrinted = false;
+        size_t HighBucketSize;
+        size_t HighPeakSlabsInUse;
+        auto name = impl->getParams().Name;
+        //try { // cannot throw in destructor
+        impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse,
+                         name);
+        if (TitlePrinted) {
+            std::cout << "Current Pool Size "
+                      << impl->getLimits()->TotalSize.load() << std::endl;
+            std::cout << "Suggested Setting=;"
+                      << std::string(1, (char)tolower(name[0]))
+                      << std::string(name + 1) << ":" << HighBucketSize << ","
+                      << HighPeakSlabsInUse << ",64K" << std::endl;
+        }
+        //} catch (...) { // ignore exceptions
+        // }
+    }
+    */
+}
+
+static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = {
+    .version = UMF_VERSION_CURRENT,
+    .initialize = disjoint_pool_initialize,
+    .finalize = disjoint_pool_finalize,
+    .malloc = disjoint_pool_malloc,
+    .calloc = disjoint_pool_calloc,
+    .realloc = disjoint_pool_realloc,
+    .aligned_malloc = disjoint_pool_aligned_malloc,
+    .malloc_usable_size = disjoint_pool_malloc_usable_size,
+    .free = disjoint_pool_free,
+    .get_last_allocation_error = disjoint_pool_get_last_allocation_error,
+};
+
+umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
+    return &UMF_DISJOINT_POOL_OPS;
+}
+
 // TODO remove
 #ifdef __cplusplus
 }
diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp
deleted file mode 100644
index 0837694f4..000000000
--- a/src/pool/pool_disjoint.cpp
+++ /dev/null
@@ -1,446 +0,0 @@
-// Copyright (C) 2023 Intel Corporation
-// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include <algorithm>
-#include <array>
-#include <atomic>
-#include <bitset>
-#include <cassert>
-#include <cctype>
-#include <iomanip>
-#include <limits>
-#include <list>
-#include <memory>
-#include <mutex>
-#include <shared_mutex>
-#include <string>
-#include <unordered_map>
-#include <utility>
-#include <vector>
-
-// TODO: replace with logger?
-#include <iostream>
-
-#include "provider/provider_tracking.h"
-
-#include "uthash/utlist.h"
-
-#include "../cpp_helpers.hpp"
-#include "base_alloc_global.h"
-#include "pool_disjoint.h"
-#include "umf.h"
-#include "utils_common.h"
-#include "utils_concurrency.h"
-#include "utils_log.h"
-#include "utils_math.h"
-#include "utils_sanitizers.h"
-
-#include "critnib/critnib.h"
-
-// TODO remove
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "pool_disjoint_temp.h"
-
-extern __TLS umf_result_t TLS_last_allocation_error_dp;
-
-#ifdef __cplusplus
-}
-#endif
-// end TODO remove
-
-umf_disjoint_pool_shared_limits_t *
-umfDisjointPoolSharedLimitsCreate(size_t MaxSize) {
-    return shared_limits_create(MaxSize);
-}
-
-void umfDisjointPoolSharedLimitsDestroy(
-    umf_disjoint_pool_shared_limits_t *limits) {
-    shared_limits_destroy(limits);
-}
-
-// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is
-// requested. The implementation distinguishes between allocations of size
-// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger.
-// Allocation requests smaller than ChunkCutoff use chunks taken from a single
-// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation
-// size, and 8-byte allocations, only 1 in ~8000 requests results in a new
-// coarse-grain allocation. Freeing results only in a chunk of a larger
-// allocation to be marked as available and no real return to the system. An
-// allocation is returned to the system only when all chunks in the larger
-// allocation are freed by the program. Allocations larger than ChunkCutOff use
-// a separate coarse-grain allocation for each request. These are subject to
-// "pooling". That is, when such an allocation is freed by the program it is
-// retained in a pool. The pool is available for future allocations, which means
-// there are fewer actual coarse-grain allocations/deallocations.
-
-// The largest size which is allocated via the allocator.
-// Allocations with size > CutOff bypass the pool and
-// go directly to the provider.
-static constexpr size_t CutOff = (size_t)1 << 31; // 2GB
-
-typedef struct MemoryProviderError {
-    umf_result_t code;
-} MemoryProviderError_t;
-
-class DisjointPool {
-  public:
-    using Config = umf_disjoint_pool_params_t;
-
-    umf_result_t initialize(umf_memory_provider_handle_t provider,
-                            umf_disjoint_pool_params_t *parameters);
-    void *malloc(size_t size);
-    void *calloc(size_t, size_t);
-    void *realloc(void *, size_t);
-    void *aligned_malloc(size_t size, size_t alignment);
-    size_t malloc_usable_size(void *);
-    umf_result_t free(void *ptr);
-    umf_result_t get_last_allocation_error();
-
-    DisjointPool();
-    ~DisjointPool();
-
-  private:
-    AllocImpl *impl;
-};
-
-static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
-                                 size_t size, size_t alignment = 0) {
-    void *ptr;
-    auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
-    if (ret != UMF_RESULT_SUCCESS) {
-        TLS_last_allocation_error_dp = ret;
-        return NULL;
-    }
-    annotate_memory_inaccessible(ptr, size);
-    return ptr;
-}
-
-static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider,
-                                       void *ptr) {
-    size_t size = 0;
-
-    if (ptr) {
-        umf_alloc_info_t allocInfo = {NULL, 0, NULL};
-        umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo);
-        if (umf_result == UMF_RESULT_SUCCESS) {
-            size = allocInfo.baseSize;
-        }
-    }
-
-    auto ret = umfMemoryProviderFree(hProvider, ptr, size);
-    if (ret != UMF_RESULT_SUCCESS) {
-
-        TLS_last_allocation_error_dp = ret;
-        // throw MemoryProviderError{ret};
-        return ret;
-    }
-    return UMF_RESULT_SUCCESS;
-}
-
-bool operator==(const slab_t &Lhs, const slab_t &Rhs) {
-    return slab_get(&Lhs) == slab_get(&Rhs);
-}
-
-/*
-std::ostream &operator<<(std::ostream &Os, slab_t &Slab) {
-    Os << "Slab<" << slab_get(&Slab) << ", " << slab_get_end(&Slab) << ", "
-       << slab_get_bucket(&Slab)->getSize() << ">";
-    return Os;
-}
-*/
-
-/*
-void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
-    if (allocCount) {
-        if (!TitlePrinted) {
-            std::cout << Label << " memory statistics\n";
-            std::cout << std::setw(14) << "Bucket Size" << std::setw(12)
-                      << "Allocs" << std::setw(12) << "Frees" << std::setw(18)
-                      << "Allocs from Pool" << std::setw(20)
-                      << "Peak Slabs in Use" << std::setw(21)
-                      << "Peak Slabs in Pool" << std::endl;
-            TitlePrinted = true;
-        }
-        std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount
-                  << std::setw(12) << freeCount << std::setw(18)
-                  << allocPoolCount << std::setw(20) << maxSlabsInUse
-                  << std::setw(21) << maxSlabsInPool << std::endl;
-    }
-}
-*/
-
-void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
-    void *Ptr;
-
-    if (Size == 0) {
-        return nullptr;
-    }
-
-    *FromPool = false;
-    if (Size > AllocImpl_getParams(ai)->MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size);
-
-        if (Ptr == NULL) {
-            // TODO get code from func
-            TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-            return nullptr;
-        }
-
-        annotate_memory_undefined(Ptr, Size);
-        return Ptr;
-    }
-
-    bucket_t *bucket = AllocImpl_findBucket(ai, Size);
-
-    if (Size > bucket_chunk_cut_off(bucket)) {
-        Ptr = bucket_get_slab(bucket, FromPool);
-    } else {
-        Ptr = bucket_get_chunk(bucket, FromPool);
-    }
-
-    if (Ptr == NULL) {
-        // TODO get code from func
-        TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
-        return nullptr;
-    }
-
-    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
-        bucket_count_alloc(bucket, FromPool);
-    }
-
-    VALGRIND_DO_MEMPOOL_ALLOC(ai, Ptr, Size);
-    annotate_memory_undefined(Ptr, bucket_get_size(bucket));
-
-    return Ptr;
-}
-
-void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
-                               bool *FromPool) {
-    void *Ptr;
-
-    if (Size == 0) {
-        return nullptr;
-    }
-
-    if (Alignment <= 1) {
-        return AllocImpl_allocate(ai, Size, FromPool);
-    }
-
-    size_t AlignedSize;
-    if (Alignment <= ai->ProviderMinPageSize) {
-        // This allocation will be served from a Bucket which size is multiple
-        // of Alignment and Slab address is aligned to ProviderMinPageSize
-        // so the address will be properly aligned.
-        AlignedSize = (Size > 1) ? ALIGN_UP(Size, Alignment) : Alignment;
-    } else {
-        // Slabs are only aligned to ProviderMinPageSize, we need to compensate
-        // for that in case the allocation is within pooling limit.
-        // TODO: consider creating properly-aligned Slabs on demand
-        AlignedSize = Size + Alignment - 1;
-    }
-
-    // Check if requested allocation size is within pooling limit.
-    // If not, just request aligned pointer from the system.
-    *FromPool = false;
-    if (AlignedSize > AllocImpl_getParams(ai)->MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size, Alignment);
-        assert(Ptr);
-        annotate_memory_undefined(Ptr, Size);
-        return Ptr;
-    }
-
-    bucket_t *bucket = AllocImpl_findBucket(ai, AlignedSize);
-
-    if (AlignedSize > bucket_chunk_cut_off(bucket)) {
-        Ptr = bucket_get_slab(bucket, FromPool);
-    } else {
-        Ptr = bucket_get_chunk(bucket, FromPool);
-    }
-
-    assert(Ptr);
-    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
-        bucket_count_alloc(bucket, FromPool);
-    }
-
-    VALGRIND_DO_MEMPOOL_ALLOC(ai, ALIGN_UP((size_t)Ptr, Alignment), Size);
-    annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size);
-    return (void *)ALIGN_UP((size_t)Ptr, Alignment);
-}
-
-umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
-    if (Ptr == nullptr) {
-        return UMF_RESULT_SUCCESS;
-    }
-
-    void *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, AllocImpl_SlabMinSize(ai));
-
-    // Lock the map on read
-    utils_mutex_lock(AllocImpl_getKnownSlabsMapLock(ai));
-
-    *ToPool = false;
-
-    slab_t *slab = (slab_t *)critnib_get(ai->known_slabs, (uintptr_t)SlabPtr);
-    //auto Slabs = getKnownSlabs().equal_range(SlabPtr);
-    if (slab == NULL) {
-        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
-        umf_result_t ret = memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
-        return ret;
-    }
-
-    // TODO - no multimap
-    // for (auto It = Slabs.first; It != Slabs.second; ++It) {
-
-    // The slab object won't be deleted until it's removed from the map which is
-    // protected by the lock, so it's safe to access it here.
-    if (Ptr >= slab_get(slab) && Ptr < slab_get_end(slab)) {
-        // Unlock the map before freeing the chunk, it may be locked on write
-        // there
-        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
-        bucket_t *bucket = slab_get_bucket(slab);
-
-        if (AllocImpl_getParams(ai)->PoolTrace > 1) {
-            bucket_count_free(bucket);
-        }
-
-        VALGRIND_DO_MEMPOOL_FREE(ai, Ptr);
-        annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
-        if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
-            bucket_free_chunk(bucket, Ptr, slab, ToPool);
-        } else {
-            bucket_free_slab(bucket, slab, ToPool);
-        }
-
-        return UMF_RESULT_SUCCESS;
-    }
-    //} // for multimap
-
-    utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
-    // There is a rare case when we have a pointer from system allocation next
-    // to some slab with an entry in the map. So we find a slab
-    // but the range checks fail.
-    memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
-    return UMF_RESULT_SUCCESS;
-}
-
-umf_result_t DisjointPool::initialize(umf_memory_provider_handle_t provider,
-                                      umf_disjoint_pool_params_t *parameters) {
-    if (!provider) {
-        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
-    }
-    // MinBucketSize parameter must be a power of 2 for bucket sizes
-    // to generate correctly.
-    if (!parameters->MinBucketSize ||
-        !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) {
-        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
-    }
-
-    impl = create_AllocImpl(provider, parameters);
-    return UMF_RESULT_SUCCESS;
-}
-
-void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates
-                                          // whether slab is from Pool.
-    bool FromPool;
-    void *Ptr = AllocImpl_allocate(impl, size, &FromPool);
-
-    if (AllocImpl_getParams(impl)->PoolTrace > 2) {
-        auto MT = AllocImpl_getParams(impl)->Name;
-        std::cout << "Allocated " << std::setw(8) << size << " " << MT
-                  << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->"
-                  << Ptr << std::endl;
-    }
-    return Ptr;
-}
-
-void *DisjointPool::calloc(size_t, size_t) {
-    // Not supported
-    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
-    return NULL;
-}
-
-void *DisjointPool::realloc(void *, size_t) {
-    // Not supported
-    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
-    return NULL;
-}
-
-void *DisjointPool::aligned_malloc(size_t size, size_t alignment) {
-    bool FromPool;
-    auto Ptr = AllocImpl_allocate_align(impl, size, alignment, &FromPool);
-
-    if (AllocImpl_getParams(impl)->PoolTrace > 2) {
-        auto MT = AllocImpl_getParams(impl)->Name;
-        std::cout << "Allocated " << std::setw(8) << size << " " << MT
-                  << " bytes aligned at " << alignment << " from "
-                  << (FromPool ? "Pool" : "Provider") << " ->" << Ptr
-                  << std::endl;
-    }
-    return Ptr;
-}
-
-size_t DisjointPool::malloc_usable_size(void *) {
-    // Not supported
-    return 0;
-}
-
-umf_result_t DisjointPool::free(void *ptr) {
-    bool ToPool;
-    umf_result_t ret = AllocImpl_deallocate(impl, ptr, &ToPool);
-    /*
-    if (ret == UMF_RESULT_SUCCESS) {
-
-        if (impl->getParams().PoolTrace > 2) {
-            auto MT = impl->getParams().Name;
-            std::cout << "Freed " << MT << " " << ptr << " to "
-                      << (ToPool ? "Pool" : "Provider")
-                      << ", Current total pool size "
-                      << impl->getLimits()->TotalSize.load()
-                      << ", Current pool size for " << MT << " "
-                      << impl->getParams().CurPoolSize << "\n";
-        }
-    }*/
-    return ret;
-}
-
-umf_result_t DisjointPool::get_last_allocation_error() {
-    return TLS_last_allocation_error_dp;
-}
-
-DisjointPool::DisjointPool() {}
-
-// Define destructor for use with unique_ptr
-DisjointPool::~DisjointPool() {
-    destroy_AllocImpl(impl);
-    /*
-    if (impl->getParams().PoolTrace > 1) {
-        bool TitlePrinted = false;
-        size_t HighBucketSize;
-        size_t HighPeakSlabsInUse;
-        auto name = impl->getParams().Name;
-        //try { // cannot throw in destructor
-        impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse,
-                         name);
-        if (TitlePrinted) {
-            std::cout << "Current Pool Size "
-                      << impl->getLimits()->TotalSize.load() << std::endl;
-            std::cout << "Suggested Setting=;"
-                      << std::string(1, (char)tolower(name[0]))
-                      << std::string(name + 1) << ":" << HighBucketSize << ","
-                      << HighPeakSlabsInUse << ",64K" << std::endl;
-        }
-        //} catch (...) { // ignore exceptions
-        // }
-    }
-    */
-}
-
-static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS =
-    umf::poolMakeCOps<DisjointPool, umf_disjoint_pool_params_t>();
-
-umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
-    return &UMF_DISJOINT_POOL_OPS;
-}
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
index 86dc5c87a..47d2808cc 100644
--- a/src/pool/pool_disjoint_temp.h
+++ b/src/pool/pool_disjoint_temp.h
@@ -6,8 +6,6 @@
 void annotate_memory_inaccessible(void *ptr, size_t size);
 void annotate_memory_undefined(void *ptr, size_t size);
 
-umf_disjoint_pool_shared_limits_t *shared_limits_create(size_t max_size);
-void shared_limits_destroy(umf_disjoint_pool_shared_limits_t *shared_limits);
 
 typedef struct slab_list_item_t slab_list_item_t;
 

From 5713174894d97725ec006e0b78b7b221623dd0a3 Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 13:27:50 +0100
Subject: [PATCH 23/26] remove pool_disjoint_temp.h

---
 src/pool/pool_disjoint.c      | 199 ++++++++++++++++++++++++++++++++--
 src/pool/pool_disjoint_temp.h | 190 --------------------------------
 2 files changed, 189 insertions(+), 200 deletions(-)
 delete mode 100644 src/pool/pool_disjoint_temp.h

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index b10d59eb1..887707161 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -26,12 +26,77 @@
 #include "utils_math.h"
 #include "utils_sanitizers.h"
 
-#include "pool_disjoint_temp.h"
+typedef struct bucket_t bucket_t;
+typedef struct slab_t slab_t;
+typedef struct slab_list_item_t slab_list_item_t;
+typedef struct AllocImpl AllocImpl;
 
-// TODO remove
-#ifdef __cplusplus
-extern "C" {
-#endif
+
+slab_t *create_slab(bucket_t *bucket);
+void destroy_slab(slab_t *slab);
+
+void *slab_get(const slab_t *slab);
+void *slab_get_end(const slab_t *slab);
+bucket_t *slab_get_bucket(slab_t *slab);
+void *slab_get_chunk(slab_t *slab);
+size_t slab_get_num_chunks(const slab_t *slab);
+size_t slab_get_chunk_size(const slab_t *slab);
+size_t slab_get_num_allocated(const slab_t *slab);
+
+bool slab_has_avail(const slab_t *slab);
+void slab_free_chunk(slab_t *slab, void *ptr);
+
+void slab_reg(slab_t *slab);
+void slab_reg_by_addr(void *addr, slab_t *slab);
+void slab_unreg(slab_t *slab);
+void slab_unreg_by_addr(void *addr, slab_t *slab);
+
+bucket_t *create_bucket(size_t sz, void *alloc_ctx,
+                        umf_disjoint_pool_shared_limits_t *shared_limits);
+void destroy_bucket(bucket_t *bucket);
+
+void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool);
+bool bucket_can_pool(bucket_t *bucket, bool *to_pool);
+void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool);
+void bucket_decrement_pool(bucket_t *bucket, bool *from_pool);
+void *bucket_get_chunk(bucket_t *bucket, bool *from_pool);
+size_t bucket_get_size(bucket_t *bucket);
+size_t bucket_chunk_cut_off(bucket_t *bucket);
+size_t bucket_capacity(bucket_t *bucket);
+void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab,
+                       bool *to_pool);
+void bucket_count_alloc(bucket_t *bucket, bool from_pool);
+void bucket_count_free(bucket_t *bucket);
+
+void *bucket_get_slab(bucket_t *bucket, bool *from_pool);
+size_t bucket_slab_alloc_size(bucket_t *bucket);
+size_t bucket_slab_min_size(bucket_t *bucket);
+slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
+slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool);
+void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool);
+
+umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
+umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket);
+umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
+utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket);
+critnib *bucket_get_known_slabs(bucket_t *bucket);
+
+AllocImpl *create_AllocImpl(umf_memory_provider_handle_t hProvider,
+                            umf_disjoint_pool_params_t *params);
+void destroy_AllocImpl(AllocImpl *ai);
+
+bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size);
+umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool);
+umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai);
+void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool);
+void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
+                               bool *FromPool);
+
+umf_memory_provider_handle_t AllocImpl_getMemHandle(AllocImpl *ai);
+utils_mutex_t *AllocImpl_getKnownSlabsMapLock(AllocImpl *ai);
+critnib *AllocImpl_getKnownSlabs(AllocImpl *ai);
+size_t AllocImpl_SlabMinSize(AllocImpl *ai);
+umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai);
 
 //static <- make static rename to TLS_last_allocation_error
 __TLS umf_result_t TLS_last_allocation_error_dp;
@@ -79,6 +144,97 @@ static size_t CutOff = (size_t)1 << 31; // 2GB
 #endif
 }
 
+void annotate_memory_inaccessible(void *ptr, size_t size);
+void annotate_memory_undefined(void *ptr, size_t size);
+
+
+typedef struct slab_list_item_t slab_list_item_t;
+
+typedef struct bucket_t {
+    size_t Size;
+
+    // List of slabs which have at least 1 available chunk.
+    slab_list_item_t *AvailableSlabs;
+
+    // List of slabs with 0 available chunk.
+    slab_list_item_t *UnavailableSlabs;
+
+    // Protects the bucket and all the corresponding slabs
+    utils_mutex_t bucket_lock;
+
+    // Reference to the allocator context, used access memory allocation
+    // routines, slab map and etc.
+    void *OwnAllocCtx;
+
+    umf_disjoint_pool_shared_limits_t *shared_limits;
+
+    // For buckets used in chunked mode, a counter of slabs in the pool.
+    // For allocations that use an entire slab each, the entries in the Available
+    // list are entries in the pool.Each slab is available for a new
+    // allocation.The size of the Available list is the size of the pool.
+    // For allocations that use slabs in chunked mode, slabs will be in the
+    // Available list if any one or more of their chunks is free.The entire slab
+    // is not necessarily free, just some chunks in the slab are free. To
+    // implement pooling we will allow one slab in the Available list to be
+    // entirely empty. Normally such a slab would have been freed. But
+    // now we don't, and treat this slab as "in the pool".
+    // When a slab becomes entirely free we have to decide whether to return it
+    // to the provider or keep it allocated. A simple check for size of the
+    // Available list is not sufficient to check whether any slab has been
+    // pooled yet.We would have to traverse the entire Available listand check
+    // if any of them is entirely free. Instead we keep a counter of entirely
+    // empty slabs within the Available list to speed up the process of checking
+    // if a slab in this bucket is already pooled.
+    size_t chunkedSlabsInPool;
+
+    // Statistics
+    size_t allocPoolCount;
+    size_t freeCount;
+    size_t currSlabsInUse;
+    size_t currSlabsInPool;
+    size_t maxSlabsInPool;
+
+    // Statistics
+    size_t allocCount;
+    size_t maxSlabsInUse;
+
+} bucket_t;
+
+// Represents the allocated memory block of size 'slab_min_size'
+// Internally, it splits the memory block into chunks. The number of
+// chunks depends of the size of a Bucket which created the Slab.
+// Note: Bucket's methods are responsible for thread safety of Slab access,
+// so no locking happens here.
+typedef struct slab_t {
+    // Pointer to the allocated memory of slab_min_size bytes
+    void *mem_ptr;
+    size_t slab_size;
+
+    // Represents the current state of each chunk: if the bit is set then the
+    // chunk is allocated, and if the chunk is free for allocation otherwise
+    bool *chunks;
+    size_t num_chunks;
+
+    // Total number of allocated chunks at the moment.
+    size_t num_allocated;
+
+    // The bucket which the slab belongs to
+    bucket_t *bucket;
+
+    // Hints where to start search for free chunk in a slab
+    size_t first_free_chunk_idx;
+
+    // Store iterator to the corresponding node in avail/unavail list
+    // to achieve O(1) removal
+    slab_list_item_t *iter;
+} slab_t;
+
+typedef struct slab_list_item_t {
+    slab_t *val;
+    struct slab_list_item_t *prev, *next;
+} slab_list_item_t;
+
+
 typedef struct umf_disjoint_pool_shared_limits_t {
     size_t max_size;
     size_t total_size; // requires atomic access
@@ -98,6 +254,34 @@ void umfDisjointPoolSharedLimitsDestroy(
     umf_ba_global_free(limits);
 }
 
+typedef struct AllocImpl {
+    // It's important for the map to be destroyed last after buckets and their
+    // slabs This is because slab's destructor removes the object from the map.
+    critnib *known_slabs; // (void *, slab_t *)
+
+    // prev std::shared_timed_mutex - ok?
+    utils_mutex_t known_slabs_map_lock;
+
+    // Handle to the memory provider
+    umf_memory_provider_handle_t MemHandle;
+
+    // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
+    bucket_t **buckets;
+    size_t buckets_num;
+
+    // Configuration for this instance
+    umf_disjoint_pool_params_t params;
+
+    umf_disjoint_pool_shared_limits_t *DefaultSharedLimits;
+
+    // Used in algorithm for finding buckets
+    size_t MinBucketSizeExp;
+
+    // Coarse-grain allocation min alignment
+    size_t ProviderMinPageSize;
+
+} AllocImpl;
+
 typedef struct disjoint_pool_t {
     AllocImpl *impl;
 } disjoint_pool_t;
@@ -1185,8 +1369,3 @@ static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = {
 umf_memory_pool_ops_t *umfDisjointPoolOps(void) {
     return &UMF_DISJOINT_POOL_OPS;
 }
-
-// TODO remove
-#ifdef __cplusplus
-}
-#endif
diff --git a/src/pool/pool_disjoint_temp.h b/src/pool/pool_disjoint_temp.h
deleted file mode 100644
index 47d2808cc..000000000
--- a/src/pool/pool_disjoint_temp.h
+++ /dev/null
@@ -1,190 +0,0 @@
-
-
-#ifndef TEMP_H
-#define TEMP_H 1
-
-void annotate_memory_inaccessible(void *ptr, size_t size);
-void annotate_memory_undefined(void *ptr, size_t size);
-
-
-typedef struct slab_list_item_t slab_list_item_t;
-
-typedef struct bucket_t {
-    size_t Size;
-
-    // List of slabs which have at least 1 available chunk.
-    slab_list_item_t *AvailableSlabs;
-
-    // List of slabs with 0 available chunk.
-    slab_list_item_t *UnavailableSlabs;
-
-    // Protects the bucket and all the corresponding slabs
-    utils_mutex_t bucket_lock;
-
-    // Reference to the allocator context, used access memory allocation
-    // routines, slab map and etc.
-    void *OwnAllocCtx;
-
-    umf_disjoint_pool_shared_limits_t *shared_limits;
-
-    // For buckets used in chunked mode, a counter of slabs in the pool.
-    // For allocations that use an entire slab each, the entries in the Available
-    // list are entries in the pool.Each slab is available for a new
-    // allocation.The size of the Available list is the size of the pool.
-    // For allocations that use slabs in chunked mode, slabs will be in the
-    // Available list if any one or more of their chunks is free.The entire slab
-    // is not necessarily free, just some chunks in the slab are free. To
-    // implement pooling we will allow one slab in the Available list to be
-    // entirely empty. Normally such a slab would have been freed. But
-    // now we don't, and treat this slab as "in the pool".
-    // When a slab becomes entirely free we have to decide whether to return it
-    // to the provider or keep it allocated. A simple check for size of the
-    // Available list is not sufficient to check whether any slab has been
-    // pooled yet.We would have to traverse the entire Available listand check
-    // if any of them is entirely free. Instead we keep a counter of entirely
-    // empty slabs within the Available list to speed up the process of checking
-    // if a slab in this bucket is already pooled.
-    size_t chunkedSlabsInPool;
-
-    // Statistics
-    size_t allocPoolCount;
-    size_t freeCount;
-    size_t currSlabsInUse;
-    size_t currSlabsInPool;
-    size_t maxSlabsInPool;
-
-    // Statistics
-    size_t allocCount;
-    size_t maxSlabsInUse;
-
-} bucket_t;
-
-// Represents the allocated memory block of size 'slab_min_size'
-// Internally, it splits the memory block into chunks. The number of
-// chunks depends of the size of a Bucket which created the Slab.
-// Note: Bucket's methods are responsible for thread safety of Slab access,
-// so no locking happens here.
-typedef struct slab_t {
-    // Pointer to the allocated memory of slab_min_size bytes
-    void *mem_ptr;
-    size_t slab_size;
-
-    // Represents the current state of each chunk: if the bit is set then the
-    // chunk is allocated, and if the chunk is free for allocation otherwise
-    bool *chunks;
-    size_t num_chunks;
-
-    // Total number of allocated chunks at the moment.
-    size_t num_allocated;
-
-    // The bucket which the slab belongs to
-    bucket_t *bucket;
-
-    // Hints where to start search for free chunk in a slab
-    size_t first_free_chunk_idx;
-
-    // Store iterator to the corresponding node in avail/unavail list
-    // to achieve O(1) removal
-    slab_list_item_t *iter;
-} slab_t;
-
-typedef struct slab_list_item_t {
-    slab_t *val;
-    struct slab_list_item_t *prev, *next;
-} slab_list_item_t;
-
-slab_t *create_slab(bucket_t *bucket);
-void destroy_slab(slab_t *slab);
-
-void *slab_get(const slab_t *slab);
-void *slab_get_end(const slab_t *slab);
-bucket_t *slab_get_bucket(slab_t *slab);
-void *slab_get_chunk(slab_t *slab);
-size_t slab_get_num_chunks(const slab_t *slab);
-size_t slab_get_chunk_size(const slab_t *slab);
-size_t slab_get_num_allocated(const slab_t *slab);
-
-bool slab_has_avail(const slab_t *slab);
-void slab_free_chunk(slab_t *slab, void *ptr);
-
-void slab_reg(slab_t *slab);
-void slab_reg_by_addr(void *addr, slab_t *slab);
-void slab_unreg(slab_t *slab);
-void slab_unreg_by_addr(void *addr, slab_t *slab);
-
-bucket_t *create_bucket(size_t sz, void *alloc_ctx,
-                        umf_disjoint_pool_shared_limits_t *shared_limits);
-void destroy_bucket(bucket_t *bucket);
-
-void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool);
-bool bucket_can_pool(bucket_t *bucket, bool *to_pool);
-void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool);
-void bucket_decrement_pool(bucket_t *bucket, bool *from_pool);
-void *bucket_get_chunk(bucket_t *bucket, bool *from_pool);
-size_t bucket_get_size(bucket_t *bucket);
-size_t bucket_chunk_cut_off(bucket_t *bucket);
-size_t bucket_capacity(bucket_t *bucket);
-void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab,
-                       bool *to_pool);
-void bucket_count_alloc(bucket_t *bucket, bool from_pool);
-void bucket_count_free(bucket_t *bucket);
-
-void *bucket_get_slab(bucket_t *bucket, bool *from_pool);
-size_t bucket_slab_alloc_size(bucket_t *bucket);
-size_t bucket_slab_min_size(bucket_t *bucket);
-slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
-slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool);
-void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool);
-
-umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
-umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket);
-umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
-utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket);
-critnib *bucket_get_known_slabs(bucket_t *bucket);
-
-typedef struct AllocImpl {
-    // It's important for the map to be destroyed last after buckets and their
-    // slabs This is because slab's destructor removes the object from the map.
-    critnib *known_slabs; // (void *, slab_t *)
-
-    // prev std::shared_timed_mutex - ok?
-    utils_mutex_t known_slabs_map_lock;
-
-    // Handle to the memory provider
-    umf_memory_provider_handle_t MemHandle;
-
-    // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
-    bucket_t **buckets;
-    size_t buckets_num;
-
-    // Configuration for this instance
-    umf_disjoint_pool_params_t params;
-
-    umf_disjoint_pool_shared_limits_t *DefaultSharedLimits;
-
-    // Used in algorithm for finding buckets
-    size_t MinBucketSizeExp;
-
-    // Coarse-grain allocation min alignment
-    size_t ProviderMinPageSize;
-
-} AllocImpl;
-
-AllocImpl *create_AllocImpl(umf_memory_provider_handle_t hProvider,
-                            umf_disjoint_pool_params_t *params);
-void destroy_AllocImpl(AllocImpl *ai);
-
-bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size);
-umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool);
-umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai);
-void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool);
-void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
-                               bool *FromPool);
-
-umf_memory_provider_handle_t AllocImpl_getMemHandle(AllocImpl *ai);
-utils_mutex_t *AllocImpl_getKnownSlabsMapLock(AllocImpl *ai);
-critnib *AllocImpl_getKnownSlabs(AllocImpl *ai);
-size_t AllocImpl_SlabMinSize(AllocImpl *ai);
-umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai);
-
-#endif // TEMP_H

From 769268bfefaf772732d767216200b8dbdf5eadff Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 13:29:09 +0100
Subject: [PATCH 24/26] rename TLS_last_allocation_error

---
 src/pool/pool_disjoint.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 887707161..9289b2c26 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -31,7 +31,6 @@ typedef struct slab_t slab_t;
 typedef struct slab_list_item_t slab_list_item_t;
 typedef struct AllocImpl AllocImpl;
 
-
 slab_t *create_slab(bucket_t *bucket);
 void destroy_slab(slab_t *slab);
 
@@ -98,8 +97,7 @@ critnib *AllocImpl_getKnownSlabs(AllocImpl *ai);
 size_t AllocImpl_SlabMinSize(AllocImpl *ai);
 umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai);
 
-//static <- make static rename to TLS_last_allocation_error
-__TLS umf_result_t TLS_last_allocation_error_dp;
+static __TLS umf_result_t TLS_last_allocation_error;
 
 // Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is
 // requested. The implementation distinguishes between allocations of size
@@ -147,7 +145,6 @@ static size_t CutOff = (size_t)1 << 31; // 2GB
 void annotate_memory_inaccessible(void *ptr, size_t size);
 void annotate_memory_undefined(void *ptr, size_t size);
 
-
 typedef struct slab_list_item_t slab_list_item_t;
 
 typedef struct bucket_t {
@@ -234,7 +231,6 @@ typedef struct slab_list_item_t {
     struct slab_list_item_t *prev, *next;
 } slab_list_item_t;
 
-
 typedef struct umf_disjoint_pool_shared_limits_t {
     size_t max_size;
     size_t total_size; // requires atomic access
@@ -991,7 +987,7 @@ static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider,
     void *ptr;
     umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
     if (ret != UMF_RESULT_SUCCESS) {
-        TLS_last_allocation_error_dp = ret;
+        TLS_last_allocation_error = ret;
         return NULL;
     }
     annotate_memory_inaccessible(ptr, size);
@@ -1013,7 +1009,7 @@ static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider,
     umf_result_t ret = umfMemoryProviderFree(hProvider, ptr, size);
     if (ret != UMF_RESULT_SUCCESS) {
 
-        TLS_last_allocation_error_dp = ret;
+        TLS_last_allocation_error = ret;
         // throw MemoryProviderError{ret};
         return ret;
     }
@@ -1033,7 +1029,7 @@ void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
 
         if (Ptr == NULL) {
             // TODO get code from func
-            TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+            TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
             return NULL;
         }
 
@@ -1051,7 +1047,7 @@ void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
 
     if (Ptr == NULL) {
         // TODO get code from func
-        TLS_last_allocation_error_dp = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
+        TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
         return NULL;
     }
 
@@ -1256,7 +1252,7 @@ void *disjoint_pool_calloc(void *pool, size_t num, size_t size) {
     (void)size;
 
     // Not supported
-    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED;
     return NULL;
 }
 
@@ -1266,7 +1262,7 @@ void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) {
     (void)size;
 
     // Not supported
-    TLS_last_allocation_error_dp = UMF_RESULT_ERROR_NOT_SUPPORTED;
+    TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED;
     return NULL;
 }
 
@@ -1321,7 +1317,7 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) {
 umf_result_t disjoint_pool_get_last_allocation_error(void *pool) {
     (void)pool;
 
-    return TLS_last_allocation_error_dp;
+    return TLS_last_allocation_error;
 }
 
 // Define destructor for use with unique_ptr

From 81900c552808318f12bed22333af1c87183dcabf Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 14:01:01 +0100
Subject: [PATCH 25/26] cleanup

---
 src/pool/CMakeLists.txt  |   3 +-
 src/pool/pool_disjoint.c | 320 +++++++++++++++++----------------------
 2 files changed, 143 insertions(+), 180 deletions(-)

diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt
index 775bb1347..56ec58de9 100644
--- a/src/pool/CMakeLists.txt
+++ b/src/pool/CMakeLists.txt
@@ -14,8 +14,7 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT)
     add_umf_library(
         NAME disjoint_pool
         TYPE STATIC
-        SRCS pool_disjoint.c ../critnib/critnib.c
-             ${POOL_EXTRA_SRCS}
+        SRCS pool_disjoint.c ../critnib/critnib.c ${POOL_EXTRA_SRCS}
         LIBS ${POOL_EXTRA_LIBS})
 
     target_compile_definitions(disjoint_pool
diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index 9289b2c26..cb4ec6828 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -29,7 +29,7 @@
 typedef struct bucket_t bucket_t;
 typedef struct slab_t slab_t;
 typedef struct slab_list_item_t slab_list_item_t;
-typedef struct AllocImpl AllocImpl;
+typedef struct disjoint_pool_t disjoint_pool_t;
 
 slab_t *create_slab(bucket_t *bucket);
 void destroy_slab(slab_t *slab);
@@ -50,7 +50,7 @@ void slab_reg_by_addr(void *addr, slab_t *slab);
 void slab_unreg(slab_t *slab);
 void slab_unreg_by_addr(void *addr, slab_t *slab);
 
-bucket_t *create_bucket(size_t sz, void *alloc_ctx,
+bucket_t *create_bucket(size_t sz, disjoint_pool_t *pool,
                         umf_disjoint_pool_shared_limits_t *shared_limits);
 void destroy_bucket(bucket_t *bucket);
 
@@ -74,28 +74,27 @@ slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
 slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool);
 void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool);
 
-umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
+const umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
 umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket);
 umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
 utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket);
 critnib *bucket_get_known_slabs(bucket_t *bucket);
 
-AllocImpl *create_AllocImpl(umf_memory_provider_handle_t hProvider,
-                            umf_disjoint_pool_params_t *params);
-void destroy_AllocImpl(AllocImpl *ai);
-
-bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size);
-umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool);
-umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai);
-void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool);
-void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
-                               bool *FromPool);
-
-umf_memory_provider_handle_t AllocImpl_getMemHandle(AllocImpl *ai);
-utils_mutex_t *AllocImpl_getKnownSlabsMapLock(AllocImpl *ai);
-critnib *AllocImpl_getKnownSlabs(AllocImpl *ai);
-size_t AllocImpl_SlabMinSize(AllocImpl *ai);
-umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai);
+bucket_t *disjoint_pool_findBucket(disjoint_pool_t *pool, size_t Size);
+umf_result_t disjoint_pool_deallocate(disjoint_pool_t *pool, void *Ptr,
+                                      bool *ToPool);
+umf_disjoint_pool_shared_limits_t *
+disjoint_pool_getLimits(disjoint_pool_t *pool);
+void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t Size,
+                             bool *FromPool);
+void *disjoint_pool_allocate_align(disjoint_pool_t *pool, size_t Size,
+                                   size_t Alignment, bool *FromPool);
+
+umf_memory_provider_handle_t disjoint_pool_getMemHandle(disjoint_pool_t *pool);
+utils_mutex_t *disjoint_pool_getKnownSlabsMapLock(disjoint_pool_t *pool);
+critnib *disjoint_pool_getKnownSlabs(disjoint_pool_t *pool);
+size_t disjoint_pool_SlabMinSize(disjoint_pool_t *pool);
+umf_disjoint_pool_params_t *disjoint_pool_getParams(disjoint_pool_t *pool);
 
 static __TLS umf_result_t TLS_last_allocation_error;
 
@@ -161,7 +160,7 @@ typedef struct bucket_t {
 
     // Reference to the allocator context, used access memory allocation
     // routines, slab map and etc.
-    void *OwnAllocCtx;
+    disjoint_pool_t *pool;
 
     umf_disjoint_pool_shared_limits_t *shared_limits;
 
@@ -250,7 +249,7 @@ void umfDisjointPoolSharedLimitsDestroy(
     umf_ba_global_free(limits);
 }
 
-typedef struct AllocImpl {
+typedef struct disjoint_pool_t {
     // It's important for the map to be destroyed last after buckets and their
     // slabs This is because slab's destructor removes the object from the map.
     critnib *known_slabs; // (void *, slab_t *)
@@ -275,11 +274,6 @@ typedef struct AllocImpl {
 
     // Coarse-grain allocation min alignment
     size_t ProviderMinPageSize;
-
-} AllocImpl;
-
-typedef struct disjoint_pool_t {
-    AllocImpl *impl;
 } disjoint_pool_t;
 
 size_t bucket_get_size(bucket_t *bucket);
@@ -459,12 +453,12 @@ void slab_unreg(slab_t *slab) {
     slab_unreg_by_addr(end_addr, slab);
 }
 
-bucket_t *create_bucket(size_t Sz, void *AllocCtx,
+bucket_t *create_bucket(size_t Sz, disjoint_pool_t *pool,
                         umf_disjoint_pool_shared_limits_t *shared_limits) {
     bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t));
 
     bucket->Size = Sz;
-    bucket->OwnAllocCtx = AllocCtx;
+    bucket->pool = pool;
     bucket->AvailableSlabs = NULL;
     bucket->UnavailableSlabs = NULL;
     bucket->chunkedSlabsInPool = 0;
@@ -532,7 +526,7 @@ void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *ToPool) {
 // Return the allocation size of this bucket.
 size_t bucket_get_size(bucket_t *bucket) { return bucket->Size; }
 
-void *bucket_get_alloc_ctx(bucket_t *bucket) { return bucket->OwnAllocCtx; }
+disjoint_pool_t *bucket_get_alloc_ctx(bucket_t *bucket) { return bucket->pool; }
 
 void bucket_count_free(bucket_t *bucket) { ++bucket->freeCount; }
 
@@ -765,23 +759,19 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
 }
 
 umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket) {
-    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
-    return AllocImpl_getParams(t);
+    return &bucket->pool->params;
 }
 
 umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket) {
-    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
-    return AllocImpl_getMemHandle(t);
+    return bucket->pool->MemHandle;
 }
 
 critnib *bucket_get_known_slabs(bucket_t *bucket) {
-    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
-    return AllocImpl_getKnownSlabs(t);
+    return bucket->pool->known_slabs;
 }
 
 utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
-    AllocImpl *t = (AllocImpl *)bucket->OwnAllocCtx;
-    return AllocImpl_getKnownSlabsMapLock(t);
+    return &bucket->pool->known_slabs_map_lock;
 }
 
 void slab_reg_by_addr(void *addr, slab_t *slab) {
@@ -791,7 +781,7 @@ void slab_reg_by_addr(void *addr, slab_t *slab) {
 
     utils_mutex_lock(lock);
 
-    // TODO multimap
+    // TODO multimap?
     slab_t *t = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
     assert(t == NULL);
     (void)t;
@@ -829,99 +819,11 @@ void slab_unreg_by_addr(void *addr, slab_t *slab) {
     utils_mutex_unlock(lock);
 }
 
-AllocImpl *create_AllocImpl(umf_memory_provider_handle_t hProvider,
-                            umf_disjoint_pool_params_t *params) {
-
-    AllocImpl *ai = (AllocImpl *)umf_ba_global_alloc(sizeof(AllocImpl));
-
-    VALGRIND_DO_CREATE_MEMPOOL(ai, 0, 0);
-    ai->MemHandle = hProvider;
-    ai->params = *params;
-
-    utils_mutex_init(&ai->known_slabs_map_lock);
-    ai->known_slabs = critnib_new();
-
-    // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
-    // Powers of 2 and the value halfway between the powers of 2.
-    size_t Size1 = ai->params.MinBucketSize;
-
-    // MinBucketSize cannot be larger than CutOff.
-    Size1 = utils_min(Size1, CutOff);
-
-    // Buckets sized smaller than the bucket default size- 8 aren't needed.
-    Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
-
-    // Calculate the exponent for MinBucketSize used for finding buckets.
-    ai->MinBucketSizeExp = (size_t)log2Utils(Size1);
-    ai->DefaultSharedLimits = umfDisjointPoolSharedLimitsCreate(SIZE_MAX);
-
-    // count number of buckets, start from 1
-    ai->buckets_num = 1;
-    size_t Size2 = Size1 + Size1 / 2;
-    size_t ts2 = Size2, ts1 = Size1;
-    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
-        ai->buckets_num += 2;
-    }
-    ai->buckets =
-        (bucket_t **)umf_ba_global_alloc(sizeof(bucket_t *) * ai->buckets_num);
-
-    int i = 0;
-    Size1 = ts1;
-    Size2 = ts2;
-    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) {
-        ai->buckets[i] = create_bucket(Size1, ai, AllocImpl_getLimits(ai));
-        ai->buckets[i + 1] = create_bucket(Size2, ai, AllocImpl_getLimits(ai));
-    }
-    ai->buckets[i] = create_bucket(CutOff, ai, AllocImpl_getLimits(ai));
-
-    umf_result_t ret = umfMemoryProviderGetMinPageSize(
-        hProvider, NULL, &ai->ProviderMinPageSize);
-    if (ret != UMF_RESULT_SUCCESS) {
-        ai->ProviderMinPageSize = 0;
-    }
-
-    return ai;
-}
-
-void destroy_AllocImpl(AllocImpl *ai) {
-    // TODO
-    // destroy DefaultSharedLimits
-
-    for (size_t i = 0; i < ai->buckets_num; i++) {
-        destroy_bucket(ai->buckets[i]);
-    }
-
-    VALGRIND_DO_DESTROY_MEMPOOL(ai);
-
-    umfDisjointPoolSharedLimitsDestroy(ai->DefaultSharedLimits);
-    critnib_delete(ai->known_slabs);
-
-    utils_mutex_destroy_not_free(&ai->known_slabs_map_lock);
-
-    umf_ba_global_free(ai);
-}
-
-umf_memory_provider_handle_t AllocImpl_getMemHandle(AllocImpl *ai) {
-    return ai->MemHandle;
-}
-
-utils_mutex_t *AllocImpl_getKnownSlabsMapLock(AllocImpl *ai) {
-    return &ai->known_slabs_map_lock;
-}
-
-critnib *AllocImpl_getKnownSlabs(AllocImpl *ai) { return ai->known_slabs; }
-
-size_t AllocImpl_SlabMinSize(AllocImpl *ai) { return ai->params.SlabMinSize; }
-
-umf_disjoint_pool_params_t *AllocImpl_getParams(AllocImpl *ai) {
-    return &ai->params;
-}
-
-size_t AllocImpl_sizeToIdx(AllocImpl *ai, size_t size) {
+size_t AllocImpl_sizeToIdx(disjoint_pool_t *pool, size_t size) {
     assert(size <= CutOff && "Unexpected size");
     assert(size > 0 && "Unexpected size");
 
-    size_t MinBucketSize = (size_t)1 << ai->MinBucketSizeExp;
+    size_t MinBucketSize = (size_t)1 << pool->MinBucketSizeExp;
     if (size < MinBucketSize) {
         return 0;
     }
@@ -932,36 +834,36 @@ size_t AllocImpl_sizeToIdx(AllocImpl *ai, size_t size) {
     bool isPowerOf2 = 0 == (size & (size - 1));
     bool largerThanHalfwayBetweenPowersOf2 =
         !isPowerOf2 && (bool)((size - 1) & ((uint64_t)(1) << (position - 1)));
-    size_t index = (position - ai->MinBucketSizeExp) * 2 + (int)(!isPowerOf2) +
-                   (int)largerThanHalfwayBetweenPowersOf2;
+    size_t index = (position - pool->MinBucketSizeExp) * 2 +
+                   (int)(!isPowerOf2) + (int)largerThanHalfwayBetweenPowersOf2;
 
     return index;
 }
 
-umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(AllocImpl *ai) {
-    if (ai->params.SharedLimits) {
-        return ai->params.SharedLimits;
+umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(disjoint_pool_t *pool) {
+    if (pool->params.SharedLimits) {
+        return pool->params.SharedLimits;
     } else {
-        return ai->DefaultSharedLimits;
+        return pool->DefaultSharedLimits;
     }
 }
 
-bucket_t *AllocImpl_findBucket(AllocImpl *ai, size_t Size) {
-    size_t calculatedIdx = AllocImpl_sizeToIdx(ai, Size);
-    bucket_t *bucket = ai->buckets[calculatedIdx];
+bucket_t *AllocImpl_findBucket(disjoint_pool_t *pool, size_t Size) {
+    size_t calculatedIdx = AllocImpl_sizeToIdx(pool, Size);
+    bucket_t *bucket = pool->buckets[calculatedIdx];
     assert(bucket_get_size(bucket) >= Size);
     (void)bucket;
 
     if (calculatedIdx > 0) {
-        bucket_t *bucket_prev = ai->buckets[calculatedIdx - 1];
+        bucket_t *bucket_prev = pool->buckets[calculatedIdx - 1];
         assert(bucket_get_size(bucket_prev) < Size);
         (void)bucket_prev;
     }
 
-    return ai->buckets[calculatedIdx];
+    return pool->buckets[calculatedIdx];
 }
 
-void AllocImpl_printStats(AllocImpl *ai, bool *TitlePrinted,
+void AllocImpl_printStats(disjoint_pool_t *pool, bool *TitlePrinted,
                           size_t *HighBucketSize, size_t *HighPeakSlabsInUse,
                           const char *MTName) {
     (void)TitlePrinted; // TODO
@@ -969,10 +871,10 @@ void AllocImpl_printStats(AllocImpl *ai, bool *TitlePrinted,
 
     *HighBucketSize = 0;
     *HighPeakSlabsInUse = 0;
-    for (size_t i = 0; i < ai->buckets_num; i++) {
+    for (size_t i = 0; i < pool->buckets_num; i++) {
         // TODO
         //(*B).printStats(TitlePrinted, MTName);
-        bucket_t *bucket = ai->buckets[i];
+        bucket_t *bucket = pool->buckets[i];
         *HighPeakSlabsInUse =
             utils_max(bucket->maxSlabsInUse, *HighPeakSlabsInUse);
         if (bucket->allocCount) {
@@ -1016,7 +918,7 @@ static umf_result_t memoryProviderFree(umf_memory_provider_handle_t hProvider,
     return UMF_RESULT_SUCCESS;
 }
 
-void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
+void *AllocImpl_allocate(disjoint_pool_t *pool, size_t Size, bool *FromPool) {
     void *Ptr;
 
     if (Size == 0) {
@@ -1024,8 +926,8 @@ void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
     }
 
     *FromPool = false;
-    if (Size > AllocImpl_getParams(ai)->MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size, 0);
+    if (Size > pool->params.MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(pool->MemHandle, Size, 0);
 
         if (Ptr == NULL) {
             // TODO get code from func
@@ -1037,7 +939,7 @@ void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
         return Ptr;
     }
 
-    bucket_t *bucket = AllocImpl_findBucket(ai, Size);
+    bucket_t *bucket = AllocImpl_findBucket(pool, Size);
 
     if (Size > bucket_chunk_cut_off(bucket)) {
         Ptr = bucket_get_slab(bucket, FromPool);
@@ -1051,18 +953,18 @@ void *AllocImpl_allocate(AllocImpl *ai, size_t Size, bool *FromPool) {
         return NULL;
     }
 
-    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
+    if (pool->params.PoolTrace > 1) {
         bucket_count_alloc(bucket, FromPool);
     }
 
-    VALGRIND_DO_MEMPOOL_ALLOC(ai, Ptr, Size);
+    VALGRIND_DO_MEMPOOL_ALLOC(pool, Ptr, Size);
     annotate_memory_undefined(Ptr, bucket_get_size(bucket));
 
     return Ptr;
 }
 
-void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
-                               bool *FromPool) {
+void *AllocImpl_allocate_align(disjoint_pool_t *pool, size_t Size,
+                               size_t Alignment, bool *FromPool) {
     void *Ptr;
 
     if (Size == 0) {
@@ -1070,11 +972,11 @@ void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
     }
 
     if (Alignment <= 1) {
-        return AllocImpl_allocate(ai, Size, FromPool);
+        return AllocImpl_allocate(pool, Size, FromPool);
     }
 
     size_t AlignedSize;
-    if (Alignment <= ai->ProviderMinPageSize) {
+    if (Alignment <= pool->ProviderMinPageSize) {
         // This allocation will be served from a Bucket which size is multiple
         // of Alignment and Slab address is aligned to ProviderMinPageSize
         // so the address will be properly aligned.
@@ -1089,14 +991,14 @@ void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
     // Check if requested allocation size is within pooling limit.
     // If not, just request aligned pointer from the system.
     *FromPool = false;
-    if (AlignedSize > AllocImpl_getParams(ai)->MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(AllocImpl_getMemHandle(ai), Size, Alignment);
+    if (AlignedSize > pool->params.MaxPoolableSize) {
+        Ptr = memoryProviderAlloc(pool->MemHandle, Size, Alignment);
         assert(Ptr);
         annotate_memory_undefined(Ptr, Size);
         return Ptr;
     }
 
-    bucket_t *bucket = AllocImpl_findBucket(ai, AlignedSize);
+    bucket_t *bucket = AllocImpl_findBucket(pool, AlignedSize);
 
     if (AlignedSize > bucket_chunk_cut_off(bucket)) {
         Ptr = bucket_get_slab(bucket, FromPool);
@@ -1105,32 +1007,33 @@ void *AllocImpl_allocate_align(AllocImpl *ai, size_t Size, size_t Alignment,
     }
 
     assert(Ptr);
-    if (AllocImpl_getParams(ai)->PoolTrace > 1) {
+    if (pool->params.PoolTrace > 1) {
         bucket_count_alloc(bucket, FromPool);
     }
 
-    VALGRIND_DO_MEMPOOL_ALLOC(ai, ALIGN_UP((size_t)Ptr, Alignment), Size);
+    VALGRIND_DO_MEMPOOL_ALLOC(pool, ALIGN_UP((size_t)Ptr, Alignment), Size);
     annotate_memory_undefined((void *)ALIGN_UP((size_t)Ptr, Alignment), Size);
     return (void *)ALIGN_UP((size_t)Ptr, Alignment);
 }
 
-umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
+umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr,
+                                  bool *ToPool) {
     if (Ptr == NULL) {
         return UMF_RESULT_SUCCESS;
     }
 
-    void *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, AllocImpl_SlabMinSize(ai));
+    void *SlabPtr = (void *)ALIGN_DOWN((size_t)Ptr, pool->params.SlabMinSize);
 
     // Lock the map on read
-    utils_mutex_lock(AllocImpl_getKnownSlabsMapLock(ai));
+    utils_mutex_lock(&pool->known_slabs_map_lock);
 
     *ToPool = false;
 
-    slab_t *slab = (slab_t *)critnib_get(ai->known_slabs, (uintptr_t)SlabPtr);
+    slab_t *slab = (slab_t *)critnib_get(pool->known_slabs, (uintptr_t)SlabPtr);
     //auto Slabs = getKnownSlabs().equal_range(SlabPtr);
     if (slab == NULL) {
-        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
-        umf_result_t ret = memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
+        utils_mutex_unlock(&pool->known_slabs_map_lock);
+        umf_result_t ret = memoryProviderFree(pool->MemHandle, Ptr);
         return ret;
     }
 
@@ -1142,14 +1045,14 @@ umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
     if (Ptr >= slab_get(slab) && Ptr < slab_get_end(slab)) {
         // Unlock the map before freeing the chunk, it may be locked on write
         // there
-        utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
+        utils_mutex_unlock(&pool->known_slabs_map_lock);
         bucket_t *bucket = slab_get_bucket(slab);
 
-        if (AllocImpl_getParams(ai)->PoolTrace > 1) {
+        if (pool->params.PoolTrace > 1) {
             bucket_count_free(bucket);
         }
 
-        VALGRIND_DO_MEMPOOL_FREE(ai, Ptr);
+        VALGRIND_DO_MEMPOOL_FREE(pool, Ptr);
         annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
         if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
             bucket_free_chunk(bucket, Ptr, slab, ToPool);
@@ -1161,11 +1064,11 @@ umf_result_t AllocImpl_deallocate(AllocImpl *ai, void *Ptr, bool *ToPool) {
     }
     //} // for multimap
 
-    utils_mutex_unlock(AllocImpl_getKnownSlabsMapLock(ai));
+    utils_mutex_unlock(&pool->known_slabs_map_lock);
     // There is a rare case when we have a pointer from system allocation next
     // to some slab with an entry in the map. So we find a slab
     // but the range checks fail.
-    memoryProviderFree(AllocImpl_getMemHandle(ai), Ptr);
+    memoryProviderFree(pool->MemHandle, Ptr);
     return UMF_RESULT_SUCCESS;
 }
 
@@ -1221,7 +1124,57 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
         return UMF_RESULT_ERROR_INVALID_ARGUMENT;
     }
 
-    disjoint_pool->impl = create_AllocImpl(provider, dp_params);
+    VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0);
+
+    disjoint_pool->MemHandle = provider;
+    disjoint_pool->params = *dp_params;
+
+    utils_mutex_init(&disjoint_pool->known_slabs_map_lock);
+    disjoint_pool->known_slabs = critnib_new();
+
+    // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff.
+    // Powers of 2 and the value halfway between the powers of 2.
+    size_t Size1 = disjoint_pool->params.MinBucketSize;
+
+    // MinBucketSize cannot be larger than CutOff.
+    Size1 = utils_min(Size1, CutOff);
+
+    // Buckets sized smaller than the bucket default size- 8 aren't needed.
+    Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
+
+    // Calculate the exponent for MinBucketSize used for finding buckets.
+    disjoint_pool->MinBucketSizeExp = (size_t)log2Utils(Size1);
+    disjoint_pool->DefaultSharedLimits =
+        umfDisjointPoolSharedLimitsCreate(SIZE_MAX);
+
+    // count number of buckets, start from 1
+    disjoint_pool->buckets_num = 1;
+    size_t Size2 = Size1 + Size1 / 2;
+    size_t ts2 = Size2, ts1 = Size1;
+    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) {
+        disjoint_pool->buckets_num += 2;
+    }
+    disjoint_pool->buckets = (bucket_t **)umf_ba_global_alloc(
+        sizeof(bucket_t *) * disjoint_pool->buckets_num);
+
+    int i = 0;
+    Size1 = ts1;
+    Size2 = ts2;
+    for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) {
+        disjoint_pool->buckets[i] = create_bucket(
+            Size1, disjoint_pool, AllocImpl_getLimits(disjoint_pool));
+        disjoint_pool->buckets[i + 1] = create_bucket(
+            Size2, disjoint_pool, AllocImpl_getLimits(disjoint_pool));
+    }
+    disjoint_pool->buckets[i] = create_bucket(
+        CutOff, disjoint_pool, AllocImpl_getLimits(disjoint_pool));
+
+    umf_result_t ret = umfMemoryProviderGetMinPageSize(
+        provider, NULL, &disjoint_pool->ProviderMinPageSize);
+    if (ret != UMF_RESULT_SUCCESS) {
+        disjoint_pool->ProviderMinPageSize = 0;
+    }
+
     *ppPool = (void *)disjoint_pool;
 
     return UMF_RESULT_SUCCESS;
@@ -1234,10 +1187,10 @@ void *disjoint_pool_malloc(void *pool, size_t size) {
     disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
 
     bool FromPool;
-    void *Ptr = AllocImpl_allocate(hPool->impl, size, &FromPool);
+    void *Ptr = AllocImpl_allocate(hPool, size, &FromPool);
 
-    if (AllocImpl_getParams(hPool->impl)->PoolTrace > 2) {
-        const char *MT = AllocImpl_getParams(hPool->impl)->Name;
+    if (hPool->params.PoolTrace > 2) {
+        const char *MT = hPool->params.Name;
         (void)MT;
         //std::cout << "Allocated " << std::setw(8) << size << " " << MT
         //          << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->"
@@ -1270,11 +1223,10 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) {
     disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
 
     bool FromPool;
-    void *Ptr =
-        AllocImpl_allocate_align(hPool->impl, size, alignment, &FromPool);
+    void *Ptr = AllocImpl_allocate_align(hPool, size, alignment, &FromPool);
 
-    if (AllocImpl_getParams(hPool->impl)->PoolTrace > 2) {
-        const char *MT = AllocImpl_getParams(hPool->impl)->Name;
+    if (hPool->params.PoolTrace > 2) {
+        const char *MT = hPool->params.Name;
         (void)MT;
         //std::cout << "Allocated " << std::setw(8) << size << " " << MT
         //          << " bytes aligned at " << alignment << " from "
@@ -1297,7 +1249,7 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) {
     disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
 
     bool ToPool;
-    umf_result_t ret = AllocImpl_deallocate(hPool->impl, ptr, &ToPool);
+    umf_result_t ret = AllocImpl_deallocate(hPool, ptr, &ToPool);
     /*
     if (ret == UMF_RESULT_SUCCESS) {
 
@@ -1324,7 +1276,19 @@ umf_result_t disjoint_pool_get_last_allocation_error(void *pool) {
 void disjoint_pool_finalize(void *pool) {
 
     disjoint_pool_t *hPool = (disjoint_pool_t *)pool;
-    destroy_AllocImpl(hPool->impl);
+
+    for (size_t i = 0; i < hPool->buckets_num; i++) {
+        destroy_bucket(hPool->buckets[i]);
+    }
+
+    VALGRIND_DO_DESTROY_MEMPOOL(hPool);
+
+    umfDisjointPoolSharedLimitsDestroy(hPool->DefaultSharedLimits);
+    critnib_delete(hPool->known_slabs);
+
+    utils_mutex_destroy_not_free(&hPool->known_slabs_map_lock);
+
+    umf_ba_global_free(hPool);
 
     /*
     if (impl->getParams().PoolTrace > 1) {

From 438178e4cf3842df067e231e7616045bf4d694cc Mon Sep 17 00:00:00 2001
From: Rafal Rudnicki <rafal.rudnicki@intel.com>
Date: Thu, 14 Nov 2024 14:18:30 +0100
Subject: [PATCH 26/26] cleanup

---
 src/pool/pool_disjoint.c | 374 ++++++++++++++++-----------------------
 1 file changed, 151 insertions(+), 223 deletions(-)

diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c
index cb4ec6828..20e315656 100644
--- a/src/pool/pool_disjoint.c
+++ b/src/pool/pool_disjoint.c
@@ -36,11 +36,7 @@ void destroy_slab(slab_t *slab);
 
 void *slab_get(const slab_t *slab);
 void *slab_get_end(const slab_t *slab);
-bucket_t *slab_get_bucket(slab_t *slab);
 void *slab_get_chunk(slab_t *slab);
-size_t slab_get_num_chunks(const slab_t *slab);
-size_t slab_get_chunk_size(const slab_t *slab);
-size_t slab_get_num_allocated(const slab_t *slab);
 
 bool slab_has_avail(const slab_t *slab);
 void slab_free_chunk(slab_t *slab, void *ptr);
@@ -59,13 +55,11 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool);
 void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool);
 void bucket_decrement_pool(bucket_t *bucket, bool *from_pool);
 void *bucket_get_chunk(bucket_t *bucket, bool *from_pool);
-size_t bucket_get_size(bucket_t *bucket);
 size_t bucket_chunk_cut_off(bucket_t *bucket);
 size_t bucket_capacity(bucket_t *bucket);
 void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab,
                        bool *to_pool);
 void bucket_count_alloc(bucket_t *bucket, bool from_pool);
-void bucket_count_free(bucket_t *bucket);
 
 void *bucket_get_slab(bucket_t *bucket, bool *from_pool);
 size_t bucket_slab_alloc_size(bucket_t *bucket);
@@ -74,28 +68,6 @@ slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool);
 slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, bool *from_pool);
 void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool);
 
-const umf_disjoint_pool_shared_limits_t *bucket_get_limits(bucket_t *bucket);
-umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket);
-umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket);
-utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket);
-critnib *bucket_get_known_slabs(bucket_t *bucket);
-
-bucket_t *disjoint_pool_findBucket(disjoint_pool_t *pool, size_t Size);
-umf_result_t disjoint_pool_deallocate(disjoint_pool_t *pool, void *Ptr,
-                                      bool *ToPool);
-umf_disjoint_pool_shared_limits_t *
-disjoint_pool_getLimits(disjoint_pool_t *pool);
-void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t Size,
-                             bool *FromPool);
-void *disjoint_pool_allocate_align(disjoint_pool_t *pool, size_t Size,
-                                   size_t Alignment, bool *FromPool);
-
-umf_memory_provider_handle_t disjoint_pool_getMemHandle(disjoint_pool_t *pool);
-utils_mutex_t *disjoint_pool_getKnownSlabsMapLock(disjoint_pool_t *pool);
-critnib *disjoint_pool_getKnownSlabs(disjoint_pool_t *pool);
-size_t disjoint_pool_SlabMinSize(disjoint_pool_t *pool);
-umf_disjoint_pool_params_t *disjoint_pool_getParams(disjoint_pool_t *pool);
-
 static __TLS umf_result_t TLS_last_allocation_error;
 
 // Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is
@@ -147,13 +119,13 @@ void annotate_memory_undefined(void *ptr, size_t size);
 typedef struct slab_list_item_t slab_list_item_t;
 
 typedef struct bucket_t {
-    size_t Size;
+    size_t size;
 
-    // List of slabs which have at least 1 available chunk.
-    slab_list_item_t *AvailableSlabs;
+    // Linked list of slabs which have at least 1 available chunk.
+    slab_list_item_t *available_slabs;
 
-    // List of slabs with 0 available chunk.
-    slab_list_item_t *UnavailableSlabs;
+    // Linked list of slabs with 0 available chunk.
+    slab_list_item_t *unavailable_slabs;
 
     // Protects the bucket and all the corresponding slabs
     utils_mutex_t bucket_lock;
@@ -181,19 +153,16 @@ typedef struct bucket_t {
     // if any of them is entirely free. Instead we keep a counter of entirely
     // empty slabs within the Available list to speed up the process of checking
     // if a slab in this bucket is already pooled.
-    size_t chunkedSlabsInPool;
-
-    // Statistics
-    size_t allocPoolCount;
-    size_t freeCount;
-    size_t currSlabsInUse;
-    size_t currSlabsInPool;
-    size_t maxSlabsInPool;
+    size_t chunked_slabs_in_pool;
 
     // Statistics
-    size_t allocCount;
-    size_t maxSlabsInUse;
-
+    size_t alloc_pool_count;
+    size_t free_count;
+    size_t curr_slabs_in_use;
+    size_t curr_slabs_in_pool;
+    size_t max_slabs_in_pool;
+    size_t alloc_count;
+    size_t max_slabs_in_use;
 } bucket_t;
 
 // Represents the allocated memory block of size 'slab_min_size'
@@ -254,11 +223,11 @@ typedef struct disjoint_pool_t {
     // slabs This is because slab's destructor removes the object from the map.
     critnib *known_slabs; // (void *, slab_t *)
 
-    // prev std::shared_timed_mutex - ok?
+    // TODO: prev std::shared_timed_mutex - ok?
     utils_mutex_t known_slabs_map_lock;
 
     // Handle to the memory provider
-    umf_memory_provider_handle_t MemHandle;
+    umf_memory_provider_handle_t provider;
 
     // Store as unique_ptrs since Bucket is not Movable(because of std::mutex)
     bucket_t **buckets;
@@ -267,24 +236,20 @@ typedef struct disjoint_pool_t {
     // Configuration for this instance
     umf_disjoint_pool_params_t params;
 
-    umf_disjoint_pool_shared_limits_t *DefaultSharedLimits;
+    umf_disjoint_pool_shared_limits_t *default_shared_limits;
 
     // Used in algorithm for finding buckets
-    size_t MinBucketSizeExp;
+    size_t min_bucket_size_exp;
 
     // Coarse-grain allocation min alignment
-    size_t ProviderMinPageSize;
+    size_t provider_min_page_size;
 } disjoint_pool_t;
 
-size_t bucket_get_size(bucket_t *bucket);
-
-void slab_reg(slab_t *slab);
-void slab_unreg(slab_t *slab);
-
 slab_t *create_slab(bucket_t *bucket) {
     // In case bucket size is not a multiple of SlabMinSize, we would have
     // some padding at the end of the slab.
     slab_t *slab = umf_ba_global_alloc(sizeof(slab_t));
+
     // TODO check res and errors here and everywhere
     // TODO use logger
     slab->num_allocated = 0;
@@ -296,7 +261,7 @@ slab_t *create_slab(bucket_t *bucket) {
     slab->iter->val = slab;
     slab->iter->prev = slab->iter->next = NULL;
 
-    slab->num_chunks = bucket_slab_min_size(bucket) / bucket_get_size(bucket);
+    slab->num_chunks = bucket_slab_min_size(bucket) / bucket->size;
     slab->chunks = umf_ba_global_alloc(sizeof(bool) * slab->num_chunks);
     memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks);
 
@@ -304,8 +269,9 @@ slab_t *create_slab(bucket_t *bucket) {
 
     // NOTE: originally slabs memory were allocated without alignment
     // with this registering a slab is simpler and doesn't require multimap
+    umf_memory_provider_handle_t provider = bucket->pool->provider;
     umf_result_t res =
-        umfMemoryProviderAlloc(bucket_get_mem_handle(bucket), slab->slab_size,
+        umfMemoryProviderAlloc(provider, slab->slab_size,
                                bucket_slab_min_size(bucket), &slab->mem_ptr);
 
     if (res == UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY) {
@@ -324,8 +290,9 @@ void destroy_slab(slab_t *slab) {
     fprintf(stderr, "[DP destroy_slab] bucket: %p, slab_size: %zu\n",
             (void *)slab->bucket, slab->slab_size);
 
-    umf_result_t res = umfMemoryProviderFree(
-        bucket_get_mem_handle(slab->bucket), slab->mem_ptr, slab->slab_size);
+    umf_memory_provider_handle_t provider = slab->bucket->pool->provider;
+    umf_result_t res =
+        umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size);
     assert(res == UMF_RESULT_SUCCESS);
     (void)res;
 
@@ -334,12 +301,6 @@ void destroy_slab(slab_t *slab) {
     umf_ba_global_free(slab);
 }
 
-size_t slab_get_num_allocated(const slab_t *slab) {
-    return slab->num_allocated;
-}
-
-size_t slab_get_num_chunks(const slab_t *slab) { return slab->num_chunks; }
-
 // Return the index of the first available chunk, SIZE_MAX otherwise
 size_t slab_find_first_available_chunk_idx(const slab_t *slab) {
     // Use the first free chunk index as a hint for the search.
@@ -367,7 +328,7 @@ void *slab_get_chunk(slab_t *slab) {
     assert(chunk_idx != SIZE_MAX);
 
     void *free_chunk =
-        (uint8_t *)slab->mem_ptr + chunk_idx * slab_get_chunk_size(slab);
+        (uint8_t *)slab->mem_ptr + chunk_idx * slab->bucket->size;
     // mark as used
     slab->chunks[chunk_idx] = true;
     slab->num_allocated += 1;
@@ -381,15 +342,10 @@ void *slab_get_chunk(slab_t *slab) {
     return free_chunk;
 }
 
-void *slab_get_end(const slab_t *slab) {
-    return (uint8_t *)slab->mem_ptr + bucket_slab_min_size(slab->bucket);
-}
-
 // TODO remove? why need getter/setter?
 void *slab_get(const slab_t *slab) { return slab->mem_ptr; }
-bucket_t *slab_get_bucket(slab_t *slab) { return slab->bucket; }
-size_t slab_get_chunk_size(const slab_t *slab) {
-    return bucket_get_size(slab->bucket);
+void *slab_get_end(const slab_t *slab) {
+    return (uint8_t *)slab->mem_ptr + bucket_slab_min_size(slab->bucket);
 }
 
 void slab_free_chunk(slab_t *slab, void *ptr) {
@@ -402,7 +358,7 @@ void slab_free_chunk(slab_t *slab, void *ptr) {
     // Even if the pointer p was previously aligned, it's still inside the
     // corresponding chunk, so we get the correct index here.
     size_t chunk_idx =
-        ((uint8_t *)ptr - (uint8_t *)slab->mem_ptr) / slab_get_chunk_size(slab);
+        ((uint8_t *)ptr - (uint8_t *)slab->mem_ptr) / slab->bucket->size;
 
     // Make sure that the chunk was allocated
     assert(slab->chunks[chunk_idx] && "double free detected");
@@ -424,7 +380,7 @@ bool slab_has_avail(const slab_t *slab) {
 }
 
 void slab_reg(slab_t *slab) {
-    bucket_t *bucket = slab_get_bucket(slab);
+    bucket_t *bucket = slab->bucket;
     // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size
     // I also decr end_addr by 1
     void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
@@ -439,7 +395,7 @@ void slab_reg(slab_t *slab) {
 }
 
 void slab_unreg(slab_t *slab) {
-    bucket_t *bucket = slab_get_bucket(slab);
+    bucket_t *bucket = slab->bucket;
     // NOTE: changed vs original - slab is already aligned to bucket_slab_min_size
     // I also decr end_addr by 1
     void *start_addr = (void *)ALIGN_DOWN((size_t)slab_get(slab),
@@ -455,23 +411,22 @@ void slab_unreg(slab_t *slab) {
 
 bucket_t *create_bucket(size_t Sz, disjoint_pool_t *pool,
                         umf_disjoint_pool_shared_limits_t *shared_limits) {
+
     bucket_t *bucket = (bucket_t *)umf_ba_global_alloc(sizeof(bucket_t));
 
-    bucket->Size = Sz;
+    bucket->size = Sz;
     bucket->pool = pool;
-    bucket->AvailableSlabs = NULL;
-    bucket->UnavailableSlabs = NULL;
-    bucket->chunkedSlabsInPool = 0;
-    bucket->allocPoolCount = 0;
-    bucket->freeCount = 0;
-    bucket->currSlabsInUse = 0;
-    bucket->currSlabsInPool = 0;
-    bucket->maxSlabsInPool = 0;
-    bucket->allocCount = 0;
-    bucket->maxSlabsInUse = 0;
-
+    bucket->available_slabs = NULL;
+    bucket->unavailable_slabs = NULL;
+    bucket->chunked_slabs_in_pool = 0;
+    bucket->alloc_pool_count = 0;
+    bucket->free_count = 0;
+    bucket->curr_slabs_in_use = 0;
+    bucket->curr_slabs_in_pool = 0;
+    bucket->max_slabs_in_pool = 0;
+    bucket->alloc_count = 0;
+    bucket->max_slabs_in_use = 0;
     bucket->shared_limits = shared_limits;
-    assert(shared_limits);
 
     utils_mutex_init(&bucket->bucket_lock);
 
@@ -479,78 +434,69 @@ bucket_t *create_bucket(size_t Sz, disjoint_pool_t *pool,
 }
 
 void destroy_bucket(bucket_t *bucket) {
+    // use an extra tmp to store the next iterator before destroying the slab
     slab_list_item_t *it = NULL, *tmp = NULL;
-    // TODO check eng
-    // use extra tmp to store next iterator before the slab is destroyed
-    LL_FOREACH_SAFE(bucket->AvailableSlabs, it, tmp) { destroy_slab(it->val); }
-    LL_FOREACH_SAFE(bucket->UnavailableSlabs, it, tmp) {
+    LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { destroy_slab(it->val); }
+    LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) {
         destroy_slab(it->val);
     }
 
     utils_mutex_destroy_not_free(&bucket->bucket_lock);
-
     umf_ba_global_free(bucket);
 }
 
 // The lock must be acquired before calling this method
-void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *ToPool) {
-    *ToPool = true;
+void bucket_on_free_chunk(bucket_t *bucket, slab_t *slab, bool *to_pool) {
+    *to_pool = true;
 
     // In case if the slab was previously full and now has 1 available
     // chunk, it should be moved to the list of available slabs
-    if (slab_get_num_allocated(slab) == (slab_get_num_chunks(slab) - 1)) {
+    if (slab->num_allocated == (slab->num_chunks - 1)) {
         slab_list_item_t *slab_it = slab->iter;
         assert(slab_it->val != NULL);
-        DL_DELETE(bucket->UnavailableSlabs, slab_it);
-        DL_PREPEND(bucket->AvailableSlabs, slab_it);
+        DL_DELETE(bucket->unavailable_slabs, slab_it);
+        DL_PREPEND(bucket->available_slabs, slab_it);
     }
 
     // Check if slab is empty, and pool it if we can.
-    if (slab_get_num_allocated(slab) == 0) {
+    if (slab->num_allocated == 0) {
         // The slab is now empty.
         // If pool has capacity then put the slab in the pool.
         // The ToPool parameter indicates whether the Slab will be put in the
         // pool or freed.
-        if (!bucket_can_pool(bucket, ToPool)) {
+        if (!bucket_can_pool(bucket, to_pool)) {
             // Note: since the slab is stored as unique_ptr, just remove it from
             // the list to destroy the object.
             slab_list_item_t *slab_it = slab->iter;
             assert(slab_it->val != NULL);
             slab_unreg(slab_it->val);
-            DL_DELETE(bucket->AvailableSlabs, slab_it);
+            DL_DELETE(bucket->available_slabs, slab_it);
             destroy_slab(slab_it->val);
         }
     }
 }
 
-// Return the allocation size of this bucket.
-size_t bucket_get_size(bucket_t *bucket) { return bucket->Size; }
-
-disjoint_pool_t *bucket_get_alloc_ctx(bucket_t *bucket) { return bucket->pool; }
-
-void bucket_count_free(bucket_t *bucket) { ++bucket->freeCount; }
-
-void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *Slab,
-                       bool *ToPool) {
+void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab,
+                       bool *to_pool) {
     utils_mutex_lock(&bucket->bucket_lock);
 
-    slab_free_chunk(Slab, ptr);
-    bucket_on_free_chunk(bucket, Slab, ToPool);
+    slab_free_chunk(slab, ptr);
+    bucket_on_free_chunk(bucket, slab, to_pool);
 
     utils_mutex_unlock(&bucket->bucket_lock);
 }
 
-void bucket_count_alloc(bucket_t *bucket, bool FromPool) {
-    ++bucket->allocCount;
-    if (FromPool) {
-        ++bucket->allocPoolCount;
+void bucket_count_alloc(bucket_t *bucket, bool from_pool) {
+    ++bucket->alloc_count;
+    if (from_pool) {
+        ++bucket->alloc_pool_count;
     }
 }
 
-void *bucket_get_chunk(bucket_t *bucket, bool *FromPool) {
+void *bucket_get_chunk(bucket_t *bucket, bool *from_pool) {
     utils_mutex_lock(&bucket->bucket_lock);
 
-    slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, FromPool);
+    slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool);
     if (slab_it == NULL) {
         utils_mutex_unlock(&bucket->bucket_lock);
         return NULL;
@@ -560,8 +506,8 @@ void *bucket_get_chunk(bucket_t *bucket, bool *FromPool) {
 
     // If the slab is full, move it to unavailable slabs and update its iterator
     if (!(slab_has_avail(slab_it->val))) {
-        DL_DELETE(bucket->AvailableSlabs, slab_it);
-        DL_PREPEND(bucket->UnavailableSlabs, slab_it);
+        DL_DELETE(bucket->available_slabs, slab_it);
+        DL_PREPEND(bucket->unavailable_slabs, slab_it);
     }
 
     utils_mutex_unlock(&bucket->bucket_lock);
@@ -573,20 +519,17 @@ size_t bucket_chunk_cut_off(bucket_t *bucket) {
 }
 
 size_t bucket_slab_alloc_size(bucket_t *bucket) {
-    // return max
-    return (bucket_get_size(bucket) > bucket_slab_min_size(bucket))
-               ? bucket_get_size(bucket)
-               : bucket_slab_min_size(bucket);
+    return utils_max(bucket->size, bucket_slab_min_size(bucket));
 }
 
 size_t bucket_slab_min_size(bucket_t *bucket) {
-    return bucket_get_params(bucket)->SlabMinSize;
+    return bucket->pool->params.SlabMinSize;
 }
 
 slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket,
                                              bool *from_pool) {
     // Return a slab that will be used for a single allocation.
-    if (bucket->AvailableSlabs == NULL) {
+    if (bucket->available_slabs == NULL) {
         slab_t *slab = create_slab(bucket);
         if (slab == NULL) {
             //assert(0);
@@ -594,14 +537,14 @@ slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket,
         }
 
         slab_reg(slab);
-        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
+        DL_PREPEND(bucket->available_slabs, slab->iter);
         *from_pool = false;
         bucket_update_stats(bucket, 1, 0);
     } else {
         bucket_decrement_pool(bucket, from_pool);
     }
 
-    return bucket->AvailableSlabs;
+    return bucket->available_slabs;
 }
 
 void *bucket_get_slab(bucket_t *bucket, bool *from_pool) {
@@ -615,8 +558,9 @@ void *bucket_get_slab(bucket_t *bucket, bool *from_pool) {
     slab_t *slab = slab_it->val;
     void *ptr = slab_get(slab);
 
-    DL_DELETE(bucket->AvailableSlabs, slab_it);
-    DL_PREPEND(bucket->UnavailableSlabs, slab_it);
+    DL_DELETE(bucket->available_slabs, slab_it);
+    slab_it->prev = NULL;
+    DL_PREPEND(bucket->unavailable_slabs, slab_it);
 
     utils_mutex_unlock(&bucket->bucket_lock);
     return ptr;
@@ -628,18 +572,19 @@ void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) {
     slab_list_item_t *slab_it = slab->iter;
     assert(slab_it->val != NULL);
     if (bucket_can_pool(bucket, to_pool)) {
-        DL_DELETE(bucket->UnavailableSlabs, slab_it);
-        DL_PREPEND(bucket->AvailableSlabs, slab_it);
+        DL_DELETE(bucket->unavailable_slabs, slab_it);
+        slab_it->prev = NULL;
+        DL_PREPEND(bucket->available_slabs, slab_it);
     } else {
         slab_unreg(slab_it->val);
-        DL_DELETE(bucket->UnavailableSlabs, slab_it);
+        DL_DELETE(bucket->unavailable_slabs, slab_it);
         destroy_slab(slab_it->val);
     }
     utils_mutex_unlock(&bucket->bucket_lock);
 }
 
 slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) {
-    if (bucket->AvailableSlabs == NULL) {
+    if (bucket->available_slabs == NULL) {
         slab_t *slab = create_slab(bucket);
         if (slab == NULL) {
             // TODO log
@@ -648,14 +593,15 @@ slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) {
         }
 
         slab_reg(slab);
-        DL_PREPEND(bucket->AvailableSlabs, slab->iter);
+        DL_PREPEND(bucket->available_slabs, slab->iter);
         bucket_update_stats(bucket, 1, 0);
         *from_pool = false;
     } else {
-        if (slab_get_num_allocated(bucket->AvailableSlabs->val) == 0) {
+        slab_t *slab = bucket->available_slabs->val;
+        if (slab->num_allocated == 0) {
             // If this was an empty slab, it was in the pool.
             // Now it is no longer in the pool, so update count.
-            --bucket->chunkedSlabsInPool;
+            --bucket->chunked_slabs_in_pool;
             bucket_decrement_pool(bucket, from_pool);
         } else {
             // Allocation from existing slab is treated as from pool for statistics.
@@ -663,38 +609,34 @@ slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) {
         }
     }
 
-    return bucket->AvailableSlabs;
+    return bucket->available_slabs;
 }
 
 size_t bucket_capacity(bucket_t *bucket) {
     // For buckets used in chunked mode, just one slab in pool is sufficient.
     // For larger buckets, the capacity could be more and is adjustable.
-    if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
+    if (bucket->size <= bucket_chunk_cut_off(bucket)) {
         return 1;
     } else {
-        return bucket_get_params(bucket)->Capacity;
+        return bucket->pool->params.Capacity;
     }
 }
 
-size_t bucket_max_poolable_size(bucket_t *bucket) {
-    return bucket_get_params(bucket)->MaxPoolableSize;
-}
-
 void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) {
-    if (bucket_get_params(bucket)->PoolTrace == 0) {
+    if (bucket->pool->params.PoolTrace == 0) {
         return;
     }
 
-    bucket->currSlabsInUse += in_use;
-    bucket->maxSlabsInUse =
-        utils_max(bucket->currSlabsInUse, bucket->maxSlabsInUse);
-    bucket->currSlabsInPool += in_pool;
-    bucket->maxSlabsInPool =
-        utils_max(bucket->currSlabsInPool, bucket->maxSlabsInPool);
+    bucket->curr_slabs_in_use += in_use;
+    bucket->max_slabs_in_use =
+        utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use);
+    bucket->curr_slabs_in_pool += in_pool;
+    bucket->max_slabs_in_pool =
+        utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool);
 
     // Increment or decrement current pool sizes based on whether
     // slab was added to or removed from pool.
-    bucket_get_params(bucket)->CurPoolSize +=
+    bucket->pool->params.CurPoolSize +=
         in_pool * bucket_slab_alloc_size(bucket);
 }
 
@@ -708,22 +650,21 @@ void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) {
 }
 
 bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
-    size_t NewFreeSlabsInBucket;
+    size_t new_free_slabs_in_bucket;
 
     // Check if this bucket is used in chunked form or as full slabs.
-    bool chunkedBucket =
-        bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket);
+    bool chunkedBucket = bucket->size <= bucket_chunk_cut_off(bucket);
     if (chunkedBucket) {
-        NewFreeSlabsInBucket = bucket->chunkedSlabsInPool + 1;
+        new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1;
     } else {
         // TODO optimize
         size_t avail_num = 0;
         slab_list_item_t *it = NULL;
-        DL_FOREACH(bucket->AvailableSlabs, it) { avail_num++; }
-        NewFreeSlabsInBucket = avail_num + 1;
+        DL_FOREACH(bucket->available_slabs, it) { avail_num++; }
+        new_free_slabs_in_bucket = avail_num + 1;
     }
 
-    if (bucket_capacity(bucket) >= NewFreeSlabsInBucket) {
+    if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) {
         size_t pool_size = 0;
         utils_atomic_load_acquire(&bucket->shared_limits->total_size,
                                   &pool_size);
@@ -743,7 +684,7 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
                                        &pool_size, &new_pool_size)) {
 #endif
                 if (chunkedBucket) {
-                    ++bucket->chunkedSlabsInPool;
+                    ++bucket->chunked_slabs_in_pool;
                 }
 
                 bucket_update_stats(bucket, -1, 1);
@@ -758,26 +699,15 @@ bool bucket_can_pool(bucket_t *bucket, bool *to_pool) {
     return false;
 }
 
-umf_disjoint_pool_params_t *bucket_get_params(bucket_t *bucket) {
-    return &bucket->pool->params;
-}
-
-umf_memory_provider_handle_t bucket_get_mem_handle(bucket_t *bucket) {
-    return bucket->pool->MemHandle;
-}
-
-critnib *bucket_get_known_slabs(bucket_t *bucket) {
-    return bucket->pool->known_slabs;
-}
-
 utils_mutex_t *bucket_get_known_slabs_map_lock(bucket_t *bucket) {
     return &bucket->pool->known_slabs_map_lock;
 }
 
 void slab_reg_by_addr(void *addr, slab_t *slab) {
-    bucket_t *bucket = slab_get_bucket(slab);
-    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
-    critnib *slabs = bucket_get_known_slabs(bucket);
+    bucket_t *bucket = slab->bucket;
+    disjoint_pool_t *pool = bucket->pool;
+    utils_mutex_t *lock = &pool->known_slabs_map_lock;
+    critnib *slabs = pool->known_slabs;
 
     utils_mutex_lock(lock);
 
@@ -790,18 +720,14 @@ void slab_reg_by_addr(void *addr, slab_t *slab) {
             (void *)slab);
     critnib_insert(slabs, (uintptr_t)addr, slab, 0);
 
-    // debug
-    slab_t *s = (slab_t *)critnib_get(slabs, (uintptr_t)addr);
-    assert(s != NULL);
-    (void)s;
-
     utils_mutex_unlock(lock);
 }
 
 void slab_unreg_by_addr(void *addr, slab_t *slab) {
-    bucket_t *bucket = slab_get_bucket(slab);
-    utils_mutex_t *lock = bucket_get_known_slabs_map_lock(bucket);
-    critnib *slabs = bucket_get_known_slabs(bucket);
+    bucket_t *bucket = slab->bucket;
+    disjoint_pool_t *pool = bucket->pool;
+    utils_mutex_t *lock = &pool->known_slabs_map_lock;
+    critnib *slabs = pool->known_slabs;
 
     utils_mutex_lock(lock);
 
@@ -823,19 +749,21 @@ size_t AllocImpl_sizeToIdx(disjoint_pool_t *pool, size_t size) {
     assert(size <= CutOff && "Unexpected size");
     assert(size > 0 && "Unexpected size");
 
-    size_t MinBucketSize = (size_t)1 << pool->MinBucketSizeExp;
-    if (size < MinBucketSize) {
+    size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp;
+    if (size < min_bucket_size) {
         return 0;
     }
 
     // Get the position of the leftmost set bit.
     size_t position = getLeftmostSetBitPos(size);
 
-    bool isPowerOf2 = 0 == (size & (size - 1));
-    bool largerThanHalfwayBetweenPowersOf2 =
-        !isPowerOf2 && (bool)((size - 1) & ((uint64_t)(1) << (position - 1)));
-    size_t index = (position - pool->MinBucketSizeExp) * 2 +
-                   (int)(!isPowerOf2) + (int)largerThanHalfwayBetweenPowersOf2;
+    bool is_power_of_2 = 0 == (size & (size - 1));
+    bool larger_than_halfway_between_powers_of_2 =
+        !is_power_of_2 &&
+        (bool)((size - 1) & ((uint64_t)(1) << (position - 1)));
+    size_t index = (position - pool->min_bucket_size_exp) * 2 +
+                   (int)(!is_power_of_2) +
+                   (int)larger_than_halfway_between_powers_of_2;
 
     return index;
 }
@@ -844,19 +772,19 @@ umf_disjoint_pool_shared_limits_t *AllocImpl_getLimits(disjoint_pool_t *pool) {
     if (pool->params.SharedLimits) {
         return pool->params.SharedLimits;
     } else {
-        return pool->DefaultSharedLimits;
+        return pool->default_shared_limits;
     }
 }
 
 bucket_t *AllocImpl_findBucket(disjoint_pool_t *pool, size_t Size) {
     size_t calculatedIdx = AllocImpl_sizeToIdx(pool, Size);
     bucket_t *bucket = pool->buckets[calculatedIdx];
-    assert(bucket_get_size(bucket) >= Size);
+    assert(bucket->size >= Size);
     (void)bucket;
 
     if (calculatedIdx > 0) {
         bucket_t *bucket_prev = pool->buckets[calculatedIdx - 1];
-        assert(bucket_get_size(bucket_prev) < Size);
+        assert(bucket_prev->size < Size);
         (void)bucket_prev;
     }
 
@@ -876,8 +804,8 @@ void AllocImpl_printStats(disjoint_pool_t *pool, bool *TitlePrinted,
         //(*B).printStats(TitlePrinted, MTName);
         bucket_t *bucket = pool->buckets[i];
         *HighPeakSlabsInUse =
-            utils_max(bucket->maxSlabsInUse, *HighPeakSlabsInUse);
-        if (bucket->allocCount) {
+            utils_max(bucket->max_slabs_in_use, *HighPeakSlabsInUse);
+        if (bucket->alloc_count) {
             *HighBucketSize =
                 utils_max(bucket_slab_alloc_size(bucket), *HighBucketSize);
         }
@@ -927,7 +855,7 @@ void *AllocImpl_allocate(disjoint_pool_t *pool, size_t Size, bool *FromPool) {
 
     *FromPool = false;
     if (Size > pool->params.MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(pool->MemHandle, Size, 0);
+        Ptr = memoryProviderAlloc(pool->provider, Size, 0);
 
         if (Ptr == NULL) {
             // TODO get code from func
@@ -958,7 +886,7 @@ void *AllocImpl_allocate(disjoint_pool_t *pool, size_t Size, bool *FromPool) {
     }
 
     VALGRIND_DO_MEMPOOL_ALLOC(pool, Ptr, Size);
-    annotate_memory_undefined(Ptr, bucket_get_size(bucket));
+    annotate_memory_undefined(Ptr, bucket->size);
 
     return Ptr;
 }
@@ -976,13 +904,13 @@ void *AllocImpl_allocate_align(disjoint_pool_t *pool, size_t Size,
     }
 
     size_t AlignedSize;
-    if (Alignment <= pool->ProviderMinPageSize) {
+    if (Alignment <= pool->provider_min_page_size) {
         // This allocation will be served from a Bucket which size is multiple
-        // of Alignment and Slab address is aligned to ProviderMinPageSize
+        // of Alignment and Slab address is aligned to provider_min_page_size
         // so the address will be properly aligned.
         AlignedSize = (Size > 1) ? ALIGN_UP(Size, Alignment) : Alignment;
     } else {
-        // Slabs are only aligned to ProviderMinPageSize, we need to compensate
+        // Slabs are only aligned to provider_min_page_size, we need to compensate
         // for that in case the allocation is within pooling limit.
         // TODO: consider creating properly-aligned Slabs on demand
         AlignedSize = Size + Alignment - 1;
@@ -992,7 +920,7 @@ void *AllocImpl_allocate_align(disjoint_pool_t *pool, size_t Size,
     // If not, just request aligned pointer from the system.
     *FromPool = false;
     if (AlignedSize > pool->params.MaxPoolableSize) {
-        Ptr = memoryProviderAlloc(pool->MemHandle, Size, Alignment);
+        Ptr = memoryProviderAlloc(pool->provider, Size, Alignment);
         assert(Ptr);
         annotate_memory_undefined(Ptr, Size);
         return Ptr;
@@ -1017,7 +945,7 @@ void *AllocImpl_allocate_align(disjoint_pool_t *pool, size_t Size,
 }
 
 umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr,
-                                  bool *ToPool) {
+                                  bool *to_pool) {
     if (Ptr == NULL) {
         return UMF_RESULT_SUCCESS;
     }
@@ -1027,13 +955,13 @@ umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr,
     // Lock the map on read
     utils_mutex_lock(&pool->known_slabs_map_lock);
 
-    *ToPool = false;
+    *to_pool = false;
 
     slab_t *slab = (slab_t *)critnib_get(pool->known_slabs, (uintptr_t)SlabPtr);
     //auto Slabs = getKnownSlabs().equal_range(SlabPtr);
     if (slab == NULL) {
         utils_mutex_unlock(&pool->known_slabs_map_lock);
-        umf_result_t ret = memoryProviderFree(pool->MemHandle, Ptr);
+        umf_result_t ret = memoryProviderFree(pool->provider, Ptr);
         return ret;
     }
 
@@ -1046,18 +974,18 @@ umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr,
         // Unlock the map before freeing the chunk, it may be locked on write
         // there
         utils_mutex_unlock(&pool->known_slabs_map_lock);
-        bucket_t *bucket = slab_get_bucket(slab);
+        bucket_t *bucket = slab->bucket;
 
         if (pool->params.PoolTrace > 1) {
-            bucket_count_free(bucket);
+            bucket->free_count++;
         }
 
         VALGRIND_DO_MEMPOOL_FREE(pool, Ptr);
-        annotate_memory_inaccessible(Ptr, bucket_get_size(bucket));
-        if (bucket_get_size(bucket) <= bucket_chunk_cut_off(bucket)) {
-            bucket_free_chunk(bucket, Ptr, slab, ToPool);
+        annotate_memory_inaccessible(Ptr, bucket->size);
+        if (bucket->size <= bucket_chunk_cut_off(bucket)) {
+            bucket_free_chunk(bucket, Ptr, slab, to_pool);
         } else {
-            bucket_free_slab(bucket, slab, ToPool);
+            bucket_free_slab(bucket, slab, to_pool);
         }
 
         return UMF_RESULT_SUCCESS;
@@ -1068,7 +996,7 @@ umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr,
     // There is a rare case when we have a pointer from system allocation next
     // to some slab with an entry in the map. So we find a slab
     // but the range checks fail.
-    memoryProviderFree(pool->MemHandle, Ptr);
+    memoryProviderFree(pool->provider, Ptr);
     return UMF_RESULT_SUCCESS;
 }
 
@@ -1076,7 +1004,7 @@ umf_result_t AllocImpl_deallocate(disjoint_pool_t *pool, void *Ptr,
 // TODO?
 std::ostream &operator<<(std::ostream &Os, slab_t &Slab) {
     Os << "Slab<" << slab_get(&Slab) << ", " << slab_get_end(&Slab) << ", "
-       << slab_get_bucket(&Slab)->getSize() << ">";
+       << slab->bucket->getSize() << ">";
     return Os;
 }
 */
@@ -1084,7 +1012,7 @@ std::ostream &operator<<(std::ostream &Os, slab_t &Slab) {
 /*
 // TODO move
 void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
-    if (allocCount) {
+    if (alloc_count) {
         if (!TitlePrinted) {
             std::cout << Label << " memory statistics\n";
             std::cout << std::setw(14) << "Bucket Size" << std::setw(12)
@@ -1094,10 +1022,10 @@ void Bucket::printStats(bool &TitlePrinted, const std::string &Label) {
                       << "Peak Slabs in Pool" << std::endl;
             TitlePrinted = true;
         }
-        std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount
-                  << std::setw(12) << freeCount << std::setw(18)
-                  << allocPoolCount << std::setw(20) << maxSlabsInUse
-                  << std::setw(21) << maxSlabsInPool << std::endl;
+        std::cout << std::setw(14) << getSize() << std::setw(12) << alloc_count
+                  << std::setw(12) << free_count << std::setw(18)
+                  << allocPoolCount << std::setw(20) << max_slabs_in_use
+                  << std::setw(21) << max_slabs_in_pool << std::endl;
     }
 }
 */
@@ -1126,7 +1054,7 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
 
     VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0);
 
-    disjoint_pool->MemHandle = provider;
+    disjoint_pool->provider = provider;
     disjoint_pool->params = *dp_params;
 
     utils_mutex_init(&disjoint_pool->known_slabs_map_lock);
@@ -1143,8 +1071,8 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
     Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE);
 
     // Calculate the exponent for MinBucketSize used for finding buckets.
-    disjoint_pool->MinBucketSizeExp = (size_t)log2Utils(Size1);
-    disjoint_pool->DefaultSharedLimits =
+    disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1);
+    disjoint_pool->default_shared_limits =
         umfDisjointPoolSharedLimitsCreate(SIZE_MAX);
 
     // count number of buckets, start from 1
@@ -1170,9 +1098,9 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider,
         CutOff, disjoint_pool, AllocImpl_getLimits(disjoint_pool));
 
     umf_result_t ret = umfMemoryProviderGetMinPageSize(
-        provider, NULL, &disjoint_pool->ProviderMinPageSize);
+        provider, NULL, &disjoint_pool->provider_min_page_size);
     if (ret != UMF_RESULT_SUCCESS) {
-        disjoint_pool->ProviderMinPageSize = 0;
+        disjoint_pool->provider_min_page_size = 0;
     }
 
     *ppPool = (void *)disjoint_pool;
@@ -1283,7 +1211,7 @@ void disjoint_pool_finalize(void *pool) {
 
     VALGRIND_DO_DESTROY_MEMPOOL(hPool);
 
-    umfDisjointPoolSharedLimitsDestroy(hPool->DefaultSharedLimits);
+    umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits);
     critnib_delete(hPool->known_slabs);
 
     utils_mutex_destroy_not_free(&hPool->known_slabs_map_lock);