Skip to content

Commit

Permalink
Hkv code improve, fix bugs in the draft version, and enhance document…
Browse files Browse the repository at this point in the history
…ation and testing.
  • Loading branch information
LinGeLin committed Nov 15, 2023
1 parent 5ab4cec commit 045f5cb
Show file tree
Hide file tree
Showing 44 changed files with 2,612 additions and 7,336 deletions.
40 changes: 40 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,46 @@ http_archive(
url = "https://github.com/sewenew/redis-plus-plus/archive/refs/tags/1.2.3.zip",
)

http_archive(
name = "hkv",
build_file = "//build_deps/toolchains/hkv:hkv.BUILD",
patch_cmds = [
"""sed -i.bak '1772i\\'$'\\n ThrustAllocator<uint8_t> thrust_allocator_;\\n' include/merlin_hashtable.cuh""",
"""sed -i.bak '225i\\'$'\\n thrust_allocator_.set_allocator(allocator_);\\n' include/merlin_hashtable.cuh""",
"sed -i.bak 's/thrust::sort_by_key(thrust_par.on(stream)/thrust::sort_by_key(thrust_par(thrust_allocator_).on(stream)/' include/merlin_hashtable.cuh",
"sed -i.bak 's/reduce(thrust_par.on(stream)/reduce(thrust_par(thrust_allocator_).on(stream)/' include/merlin_hashtable.cuh",
"""sed -i.bak '125i\\'$'\\n template <typename T>\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '126i\\'$'\\n struct ThrustAllocator : thrust::device_malloc_allocator<T> {\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '127i\\'$'\\n public:\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '128i\\'$'\\n typedef thrust::device_malloc_allocator<T> super_t;\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '129i\\'$'\\n typedef typename super_t::pointer pointer;\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '130i\\'$'\\n typedef typename super_t::size_type size_type;\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '131i\\'$'\\n public:\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '132i\\'$'\\n pointer allocate(size_type n) {\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '133i\\'$'\\n void* ptr = nullptr;\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '134i\\'$'\\n MERLIN_CHECK(\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '135i\\'$'\\n allocator_ != nullptr,\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '136i\\'$'\\n "[ThrustAllocator] set_allocator should be called in advance!");\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '137i\\'$'\\n allocator_->alloc(MemoryType::Device, &ptr, sizeof(T) * n);\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '138i\\'$'\\n return pointer(reinterpret_cast<T*>(ptr));\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '139i\\'$'\\n }\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '140i\\'$'\\n void deallocate(pointer p, size_type n) {\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '141i\\'$'\\n MERLIN_CHECK(\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '142i\\'$'\\n allocator_ != nullptr,\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '143i\\'$'\\n "[ThrustAllocator] set_allocator should be called in advance!");\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '144i\\'$'\\n allocator_->free(MemoryType::Device, reinterpret_cast<void*>(p.get()));\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '145i\\'$'\\n }\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '146i\\'$'\\n void set_allocator(BaseAllocator* allocator) { allocator_ = allocator; }\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '147i\\'$'\\n public:\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '148i\\'$'\\n BaseAllocator* allocator_ = nullptr;\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '149i\\'$'\\n };\\n' include/merlin/allocator.cuh""",
"""sed -i.bak '20i\\'$'\\n #include <thrust/device_malloc_allocator.h>\\n' include/merlin/allocator.cuh""",
],
sha256 = "f8179c445a06a558262946cda4d8ae7252d313e73f792586be9b1bc0c993b1cf",
strip_prefix = "HierarchicalKV-0.1.0-beta.6",
url = "https://github.com/NVIDIA-Merlin/HierarchicalKV/archive/refs/tags/v0.1.0-beta.6.tar.gz",
)

tf_configure(
name = "local_config_tf",
)
Expand Down
4 changes: 2 additions & 2 deletions build_deps/toolchains/gpu/cuda_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ _DEFAULT_CUDA_COMPUTE_CAPABILITIES = {

_DEFAULT_CUDA_COMPUTE_CAPABILITIES.update(
{"11.{}".format(v): [
"6.0",
"6.1",
# "6.0",
# "6.1",
"7.0",
"7.5",
"8.0",
Expand Down
Empty file added build_deps/toolchains/hkv/BUILD
Empty file.
18 changes: 18 additions & 0 deletions build_deps/toolchains/hkv/hkv.BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured")

package(default_visibility = ["//visibility:public"])

cc_library(
name = "hkv",
hdrs = glob([
"include/merlin/core_kernels/*.cuh",
"include/merlin/*.cuh",
"include/merlin_hashtable.cuh",
"include/merlin_localfile.hpp",
]),
copts = [
"-Ofast",
],
include_prefix = "include",
includes = ["include"],
)
6 changes: 6 additions & 0 deletions docs/api_docs/tfra/dynamic_embedding.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ Export dynamic_embedding APIs.

[`class ModelMode`](../tfra/dynamic_embedding/ModelMode.md): The global config of model modes.

[`class HkvHashTable`](../tfra/dynamic_embedding/HkvHashTable.md): A generic mutable hash table implementation.

[`class HkvHashTableConfig`](../tfra/dynamic_embedding/HkvHashTableConfig.md): HkvHashTableConfig config init_capacity, max_capacity, max_hbm_for_values of HkvHashTable

[`class HkvHashTableCreator`](../tfra/dynamic_embedding/HkvHashTableCreator.md): A generic KV table creator.

[`class RedisTable`](../tfra/dynamic_embedding/RedisTable.md): A generic mutable hash table implementation.

[`class RedisTableConfig`](../tfra/dynamic_embedding/RedisTableConfig.md): RedisTableConfig config json file for connecting Redis service and
Expand Down
7 changes: 6 additions & 1 deletion docs/api_docs/tfra/dynamic_embedding/CuckooHashTable.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ remove method. It does not support initialization via the init method.
```python
table = tfra.dynamic_embedding.CuckooHashTable(key_dtype=tf.string,
value_dtype=tf.int64,
default_value=-1)
default_value=-1,
device=['/GPU:0'])
sess.run(table.insert(keys, values))
out = table.lookup(query_keys)
print(out.eval())
Expand Down Expand Up @@ -106,6 +107,10 @@ A `CuckooHashTable` object.
* <b>`ValueError`</b>: If checkpoint is True and no name was specified.


## <b>`Important update!!`</b>

We have made updates to the underlying implementation of the CuckooHashTable. The original CPU table remains unchanged, but the GPU table now uses the HKV implementation instead of nvhash. To ensure interface consistency, the init_capacity and max_capacity of HKV will be set to the init_size value you pass in. It is important to note that after this setting, the GPU hash table will not automatically resize, and the final capacity will be the same as the init_size. The max_hbm_for_values parameter of hkv will be set to a sufficiently large number to ensure that all your data is stored in the GPU table. Additionally, hkv has requirements for GPU compute capability, which needs to be 8.0 or above. For more detailed information about HKV, please refer to the documentation of HKV.


## Properties

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,10 @@ class for creating the real KV table backend(TF resource).

#### Example usage:


Due to CuckooHashTableConfig include nothing for parameter default satisfied. Just setting the parameter saver is enough.

```python
redis_config1=tfra.dynamic_embedding.RedisTableConfig(
redis_config_abs_dir="xx/yy.json"
)
redis_creator1=tfra.dynamic_embedding.RedisTableCreator(redis_config1)
cuckoo_creator=tfra.dynamic_embedding.CuckooHashTableCreator(saver=de.FileSystemSaver())
```

<h2 id="__init__"><code>__init__</code></h2>
Expand Down
Loading

0 comments on commit 045f5cb

Please sign in to comment.