From 743bb3e13ed35548dfddbfab8f8d0bffd634b834 Mon Sep 17 00:00:00 2001 From: Micka Date: Fri, 5 May 2023 09:14:35 +0100 Subject: [PATCH] Add mdspan API to raft IVF functions (#810) * Adding mdspan calls to ivf raft Signed-off-by: Mickael Ide * Add pylibraft Signed-off-by: Mickael Ide * Remove pylibraft Signed-off-by: Mickael Ide * Fix style Signed-off-by: Mickael Ide --------- Signed-off-by: Mickael Ide --- CMakeLists.txt | 10 +++- README.md | 2 +- cmake/libs/libraft.cmake | 26 +++++---- cmake/utils/fetch_rapids.cmake | 8 +-- src/index/ivf_raft/ivf_raft.cuh | 98 ++++++++++++++++++--------------- 5 files changed, 83 insertions(+), 61 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f4ca3164..ab50f61e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,7 +147,15 @@ list(APPEND KNOWHERE_LINKER_LIBS prometheus-cpp::core prometheus-cpp::push) add_library(knowhere SHARED ${KNOWHERE_SRCS}) add_dependencies(knowhere ${KNOWHERE_LINKER_LIBS}) if(WITH_RAFT) - list(APPEND KNOWHERE_LINKER_LIBS raft::raft) + list(APPEND KNOWHERE_LINKER_LIBS raft::raft raft::compiled) + find_library(LIBRAFT_FOUND raft) + if (NOT LIBRAFT_FOUND) + message(WARNING "libraft not found") + else() + message(STATUS "libraft found") + list(APPEND KNOWHERE_LINKER_LIBS ${LIBRAFT_FOUND}) + add_definitions(-DRAFT_COMPILED) + endif() endif() target_link_libraries(knowhere PUBLIC ${KNOWHERE_LINKER_LIBS}) target_include_directories( diff --git a/README.md b/README.md index 51992f292..c6bd686c3 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ $ conan install .. --build=missing -o with_ut=True -o with_raft=True -s compiler #DISKANN SUPPORT $ conan install .. --build=missing -o with_ut=True -o with_diskann=True -s compiler.libcxx=libstdc++11 -s build_type=Debug/Release #build with conan -$conan build .. +$ conan build .. #verbose export VERBOSE=1 ``` diff --git a/cmake/libs/libraft.cmake b/cmake/libs/libraft.cmake index 75e11b5b0..e0f858ffe 100644 --- a/cmake/libs/libraft.cmake +++ b/cmake/libs/libraft.cmake @@ -32,10 +32,14 @@ set(RAFT_FORK "rapidsai") set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}") function(find_and_configure_raft) - set(oneValueArgs VERSION FORK PINNED_TAG) + set(oneValueArgs VERSION FORK PINNED_TAG COMPILE_LIBRARY) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(RAFT_COMPONENTS "") + if(PKG_COMPILE_LIBRARY) + string(APPEND RAFT_COMPONENTS " compiled") + endif() # ----------------------------------------------------- # Invoke CPM find_package() # ----------------------------------------------------- @@ -44,12 +48,8 @@ function(find_and_configure_raft) ${PKG_VERSION} GLOBAL_TARGETS raft::raft - BUILD_EXPORT_SET - faiss-exports - INSTALL_EXPORT_SET - faiss-exports COMPONENTS - "distance nn" + ${RAFT_COMPONENTS} CPM_ARGS GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git @@ -60,13 +60,17 @@ function(find_and_configure_raft) OPTIONS "BUILD_TESTS OFF" "BUILD_BENCH OFF" - "RAFT_COMPILE_LIBRARIES OFF" - "RAFT_COMPILE_NN_LIBRARY OFF" - "RAFT_USE_FAISS_STATIC OFF" # Turn this on to build FAISS into your binary - "RAFT_ENABLE_NN_DEPENDENCIES OFF") + "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}" + "RAFT_USE_FAISS_STATIC OFF") # Turn this on to build FAISS into your binary + + if(raft_ADDED) + message(VERBOSE "KNOWHERE: Using RAFT located in ${raft_SOURCE_DIR}") + else() + message(VERBOSE "KNOWHERE: Using RAFT located in ${raft_DIR}") + endif() endfunction() # Change pinned tag here to test a commit in CI To use a different RAFT locally, # set the CMake variable CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${RAFT_VERSION}.00 FORK ${RAFT_FORK} PINNED_TAG - ${RAFT_PINNED_TAG}) + ${RAFT_PINNED_TAG} COMPILE_LIBRARY OFF) diff --git a/cmake/utils/fetch_rapids.cmake b/cmake/utils/fetch_rapids.cmake index e6f76f6e0..56899f2c5 100644 --- a/cmake/utils/fetch_rapids.cmake +++ b/cmake/utils/fetch_rapids.cmake @@ -13,12 +13,12 @@ # License for the specific language governing permissions and limitations under # the License. -set(RAPIDS_VERSION "23.02") +set(RAPIDS_VERSION "23.04") -if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake) +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) file( DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake - ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake) + ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) endif() -include(${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake) +include(${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) diff --git a/src/index/ivf_raft/ivf_raft.cuh b/src/index/ivf_raft/ivf_raft.cuh index c202ca5e5..8a5e1f184 100644 --- a/src/index/ivf_raft/ivf_raft.cuh +++ b/src/index/ivf_raft/ivf_raft.cuh @@ -38,6 +38,10 @@ #include "thrust/execution_policy.h" #include "thrust/sequence.h" +#ifdef RAFT_COMPILED +#include +#endif + namespace knowhere { __global__ void @@ -303,9 +307,9 @@ class RaftIvfIndexNode : public IndexNode { auto* data = reinterpret_cast(dataset.GetTensor()); auto stream = res_->get_stream(); - auto data_gpu = rmm::device_uvector(rows * dim, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data(), data, data_gpu.size() * sizeof(float), cudaMemcpyDefault, - stream.value())); + auto data_gpu = raft::make_device_matrix(*res_, rows, dim); + RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data_handle(), data, data_gpu.size() * sizeof(float), + cudaMemcpyDefault, stream.value())); if constexpr (std::is_same_v) { auto build_params = raft::neighbors::ivf_flat::index_params{}; build_params.metric = metric.value(); @@ -313,8 +317,8 @@ class RaftIvfIndexNode : public IndexNode { build_params.kmeans_n_iters = ivf_raft_cfg.kmeans_n_iters; build_params.kmeans_trainset_fraction = ivf_raft_cfg.kmeans_trainset_fraction; build_params.adaptive_centers = ivf_raft_cfg.adaptive_centers; - gpu_index_ = raft::neighbors::ivf_flat::build(*res_, build_params, - data_gpu.data(), rows, dim); + gpu_index_ = + raft::neighbors::ivf_flat::build(*res_, build_params, data_gpu.view()); } else if constexpr (std::is_same_v) { auto build_params = raft::neighbors::ivf_pq::index_params{}; build_params.metric = metric.value(); @@ -330,8 +334,8 @@ class RaftIvfIndexNode : public IndexNode { } build_params.codebook_kind = codebook_kind.value(); build_params.force_random_rotation = ivf_raft_cfg.force_random_rotation; - gpu_index_ = raft::neighbors::ivf_pq::build(*res_, build_params, - data_gpu.data(), rows, dim); + gpu_index_ = + raft::neighbors::ivf_pq::build(*res_, build_params, data_gpu.view()); } else { static_assert(std::is_same_v); } @@ -366,19 +370,25 @@ class RaftIvfIndexNode : public IndexNode { auto stream = res_->get_stream(); // TODO(wphicks): Clean up transfer with raft // buffer objects when available - auto data_gpu = rmm::device_uvector(rows * dim, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data(), data, data_gpu.size() * sizeof(float), cudaMemcpyDefault, - stream.value())); + auto data_gpu = raft::make_device_matrix(*res_, rows, dim); + RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data_handle(), data, data_gpu.size() * sizeof(float), + cudaMemcpyDefault, stream.value())); auto indices = rmm::device_uvector(rows, stream); thrust::sequence(thrust::device, indices.begin(), indices.end(), gpu_index_->size()); if constexpr (std::is_same_v) { - raft::neighbors::ivf_flat::extend(*res_, *gpu_index_, data_gpu.data(), - indices.data(), rows); + raft::neighbors::ivf_flat::extend( + *res_, raft::make_const_mdspan(data_gpu.view()), + std::make_optional( + raft::make_device_vector_view(indices.data(), rows)), + gpu_index_.value()); } else if constexpr (std::is_same_v) { - raft::neighbors::ivf_pq::extend(*res_, *gpu_index_, data_gpu.data(), - indices.data(), rows); + raft::neighbors::ivf_pq::extend( + *res_, raft::make_const_mdspan(data_gpu.view()), + std::make_optional( + raft::make_device_matrix_view(indices.data(), rows, 1)), + gpu_index_.value()); } else { static_assert(std::is_same_v); } @@ -410,20 +420,20 @@ class RaftIvfIndexNode : public IndexNode { auto stream = res_->get_stream(); // TODO(wphicks): Clean up transfer with raft // buffer objects when available - auto data_gpu = rmm::device_uvector(rows * dim, stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data(), data, data_gpu.size() * sizeof(float), cudaMemcpyDefault, - stream.value())); + auto data_gpu = raft::make_device_matrix(*res_, rows, dim); + RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data_handle(), data, data_gpu.size() * sizeof(float), + cudaMemcpyDefault, stream.value())); - auto ids_gpu = rmm::device_uvector(output_size, stream); - auto dis_gpu = rmm::device_uvector(output_size, stream); + auto ids_gpu = raft::make_device_matrix(*res_, rows, ivf_raft_cfg.k); + auto dis_gpu = raft::make_device_matrix(*res_, rows, ivf_raft_cfg.k); if constexpr (std::is_same_v) { auto search_params = raft::neighbors::ivf_flat::search_params{}; search_params.n_probes = ivf_raft_cfg.nprobe; if (bitset.empty()) { raft::neighbors::ivf_flat::search(*res_, search_params, *gpu_index_, - data_gpu.data(), rows, ivf_raft_cfg.k, - ids_gpu.data(), dis_gpu.data()); + raft::make_const_mdspan(data_gpu.view()), + ids_gpu.view(), dis_gpu.view()); } else { auto k1 = ivf_raft_cfg.k; auto k2 = k1; @@ -434,21 +444,21 @@ class RaftIvfIndexNode : public IndexNode { k2 |= k2 >> 14; k2 += 1; while (k2 <= 1024) { - auto ids_gpu_before = rmm::device_uvector(k2 * rows, stream); - auto dis_gpu_before = rmm::device_uvector(k2 * rows, stream); - auto bs_gpu = rmm::device_uvector(bitset.byte_size(), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(bs_gpu.data(), bitset.data(), bitset.byte_size(), + auto ids_gpu_before = raft::make_device_matrix(*res_, rows, k2); + auto dis_gpu_before = raft::make_device_matrix(*res_, rows, k2); + auto bs_gpu = raft::make_device_vector(*res_, bitset.byte_size()); + RAFT_CUDA_TRY(cudaMemcpyAsync(bs_gpu.data_handle(), bitset.data(), bitset.byte_size(), cudaMemcpyDefault, stream.value())); raft::neighbors::ivf_flat::search( - *res_, search_params, *gpu_index_, data_gpu.data(), rows, k2, ids_gpu_before.data(), - dis_gpu_before.data()); + *res_, search_params, *gpu_index_, raft::make_const_mdspan(data_gpu.view()), + ids_gpu_before.view(), dis_gpu_before.view()); filter<<>>( - k1, k2, rows, bs_gpu.data(), ids_gpu_before.data(), dis_gpu_before.data(), ids_gpu.data(), - dis_gpu.data()); + k1, k2, rows, bs_gpu.data_handle(), ids_gpu_before.data_handle(), + dis_gpu_before.data_handle(), ids_gpu.data_handle(), dis_gpu.data_handle()); std::int64_t is_fine = 0; - RAFT_CUDA_TRY(cudaMemcpyAsync(&is_fine, ids_gpu_before.data(), sizeof(std::int64_t), + RAFT_CUDA_TRY(cudaMemcpyAsync(&is_fine, ids_gpu_before.data_handle(), sizeof(std::int64_t), cudaMemcpyDefault, stream.value())); stream.synchronize(); if (is_fine != -1) @@ -485,8 +495,8 @@ class RaftIvfIndexNode : public IndexNode { search_params.preferred_shmem_carveout = search_params.preferred_shmem_carveout; if (bitset.empty()) { raft::neighbors::ivf_pq::search(*res_, search_params, *gpu_index_, - data_gpu.data(), rows, ivf_raft_cfg.k, - ids_gpu.data(), dis_gpu.data()); + raft::make_const_mdspan(data_gpu.view()), + ids_gpu.view(), dis_gpu.view()); } else { auto k1 = ivf_raft_cfg.k; auto k2 = k1; @@ -497,22 +507,22 @@ class RaftIvfIndexNode : public IndexNode { k2 |= k2 >> 14; k2 += 1; while (k2 <= 1024) { - auto ids_gpu_before = rmm::device_uvector(k2 * rows, stream); - auto dis_gpu_before = rmm::device_uvector(k2 * rows, stream); - auto bs_gpu = rmm::device_uvector(bitset.byte_size(), stream); - RAFT_CUDA_TRY(cudaMemcpyAsync(bs_gpu.data(), bitset.data(), bitset.byte_size(), + auto ids_gpu_before = raft::make_device_matrix(*res_, rows, k2); + auto dis_gpu_before = raft::make_device_matrix(*res_, rows, k2); + auto bs_gpu = raft::make_device_vector(*res_, bitset.byte_size()); + RAFT_CUDA_TRY(cudaMemcpyAsync(bs_gpu.data_handle(), bitset.data(), bitset.byte_size(), cudaMemcpyDefault, stream.value())); raft::neighbors::ivf_pq::search( - *res_, search_params, *gpu_index_, data_gpu.data(), rows, k2, ids_gpu_before.data(), - dis_gpu_before.data()); + *res_, search_params, *gpu_index_, raft::make_const_mdspan(data_gpu.view()), + ids_gpu_before.view(), dis_gpu_before.view()); filter<<>>( - k1, k2, rows, bs_gpu.data(), ids_gpu_before.data(), dis_gpu_before.data(), ids_gpu.data(), - dis_gpu.data()); + k1, k2, rows, bs_gpu.data_handle(), ids_gpu_before.data_handle(), + dis_gpu_before.data_handle(), ids_gpu.data_handle(), dis_gpu.data_handle()); std::int64_t is_fine = 0; - RAFT_CUDA_TRY(cudaMemcpyAsync(&is_fine, ids_gpu_before.data(), sizeof(std::int64_t), + RAFT_CUDA_TRY(cudaMemcpyAsync(&is_fine, ids_gpu_before.data_handle(), sizeof(std::int64_t), cudaMemcpyDefault, stream.value())); stream.synchronize(); if (is_fine != -1) @@ -524,10 +534,10 @@ class RaftIvfIndexNode : public IndexNode { } else { static_assert(std::is_same_v); } - RAFT_CUDA_TRY(cudaMemcpyAsync(ids.get(), ids_gpu.data(), ids_gpu.size() * sizeof(std::int64_t), + RAFT_CUDA_TRY(cudaMemcpyAsync(ids.get(), ids_gpu.data_handle(), ids_gpu.size() * sizeof(std::int64_t), + cudaMemcpyDefault, stream.value())); + RAFT_CUDA_TRY(cudaMemcpyAsync(dis.get(), dis_gpu.data_handle(), dis_gpu.size() * sizeof(float), cudaMemcpyDefault, stream.value())); - RAFT_CUDA_TRY(cudaMemcpyAsync(dis.get(), dis_gpu.data(), dis_gpu.size() * sizeof(float), cudaMemcpyDefault, - stream.value())); stream.synchronize(); } catch (std::exception& e) { LOG_KNOWHERE_WARNING_ << "RAFT inner error, " << e.what();