rapidsai · rapids-bot · Oct 17, 2024 · Oct 11, 2024 · Oct 16, 2024 · Oct 16, 2024
@@ -24,7 +24,7 @@ dependencies:
 - fmt>=11.0.2,<12
 - gcc_linux-64=11.*
 - graphviz
-- hdbscan>=0.8.38,<0.8.39
+- hdbscan>=0.8.39,<0.8.40
 - hypothesis>=6.0,<7
 - ipykernel
 - ipython
@@ -51,6 +51,7 @@ dependencies:
 - numpydoc
 - nvcc_linux-64=11.8
 - packaging
+- pip
 - pydata-sphinx-theme!=0.14.2
 - pylibraft==24.12.*,>=0.0.0a0
 - pynndescent
@@ -69,7 +70,6 @@ dependencies:
 - scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
-- setuptools
 - spdlog>=1.14.1,<1.15
 - sphinx-copybutton
 - sphinx-markdown-tables
@@ -78,4 +78,6 @@ dependencies:
 - sysroot_linux-64==2.17
 - treelite==4.3.0
 - umap-learn==0.5.6
+- pip:
+  - hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
 name: all_cuda-118_arch-x86_64
@@ -26,7 +26,7 @@ dependencies:
 - fmt>=11.0.2,<12
 - gcc_linux-64=11.*
 - graphviz
-- hdbscan>=0.8.38,<0.8.39
+- hdbscan>=0.8.39,<0.8.40
 - hypothesis>=6.0,<7
 - ipykernel
 - ipython
@@ -47,6 +47,7 @@ dependencies:
 - numpy>=1.23,<3.0a0
 - numpydoc
 - packaging
+- pip
 - pydata-sphinx-theme!=0.14.2
 - pylibraft==24.12.*,>=0.0.0a0
 - pynndescent
@@ -65,7 +66,6 @@ dependencies:
 - scikit-learn==1.5
 - scipy>=1.8.0
 - seaborn
-- setuptools
 - spdlog>=1.14.1,<1.15
 - sphinx-copybutton
 - sphinx-markdown-tables
@@ -74,4 +74,6 @@ dependencies:
 - sysroot_linux-64==2.17
 - treelite==4.3.0
 - umap-learn==0.5.6
+- pip:
+  - hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
 name: all_cuda-125_arch-x86_64
@@ -512,7 +512,7 @@ dependencies:
         packages:
           - *cython
           - dask-ml
-          - hdbscan>=0.8.38,<0.8.39
+          - hdbscan>=0.8.39,<0.8.40
           - hypothesis>=6.0,<7
           - nltk
           - numpydoc
@@ -526,7 +526,15 @@ dependencies:
           - statsmodels
           - umap-learn==0.5.6
           - pynndescent
-          - setuptools  # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default
+      - output_types: conda
+        packages:
+          - pip
+          - pip:
+              - hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
+      - output_types: pyproject
+        packages:
+          - hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master
+
   test_notebooks:
     common:
       - output_types: [conda, requirements]

@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -166,19 +166,25 @@ def all_points_membership_vectors(clusterer, batch_size=4096):
 
         # trained on gpu
         if not hasattr(clusterer, "_cpu_model"):
-            # the reference HDBSCAN implementations uses @property
-            # for attributes without setters available for them,
-            # so they can't be transferred from the GPU model
-            # to the CPU model
-            raise ValueError("Inferring on CPU is not supported yet when the "
-                             "model has been trained on GPU")
+            clusterer.import_cpu_model()
+            clusterer.build_cpu_model()
+            clusterer.gpu_to_cpu()
+            # These attributes have to be reassigned to the CPU model
+            # as the raw arrays because the reference HDBSCAN implementation
+            # reconstructs the objects from the raw arrays
+            clusterer._cpu_model.condensed_tree_ = \
+                clusterer.condensed_tree_._raw_tree
+            clusterer._cpu_model.single_linkage_tree_ = \
+                clusterer.single_linkage_tree_._linkage
+            clusterer._cpu_model.minimum_spanning_tree_ = \
+                clusterer.minimum_spanning_tree_._mst
 
         # this took a long debugging session to figure out, but
         # this method on cpu does not work without this copy for some reason
         clusterer._cpu_model.prediction_data_.raw_data = \
             clusterer._cpu_model.prediction_data_.raw_data.copy()
         return cpu_all_points_membership_vectors(clusterer._cpu_model)
-
+    # gpu infer, cpu/gpu train
     elif device_type == DeviceType.device:
         # trained on cpu
         if hasattr(clusterer, "_cpu_model"):

@@ -932,9 +932,6 @@ def test_nn_methods(train_device, infer_device):
 @pytest.mark.parametrize("infer_device", ["cpu", "gpu"])
 def test_hdbscan_methods(train_device, infer_device):
 
-    if train_device == "gpu" and infer_device == "cpu":
-        pytest.skip("Can't transfer attributes to cpu for now")
-
     ref_model = refHDBSCAN(
         prediction_data=True,
         approx_min_span_tree=False,
@@ -951,11 +948,13 @@ def test_hdbscan_methods(train_device, infer_device):
     ref_membership = cpu_all_points_membership_vectors(ref_model)
     ref_labels, ref_probs = cpu_approximate_predict(ref_model, X_test_blob)
 
+    gen_min_span_tree = train_device == "gpu" and infer_device == "cpu"
     model = HDBSCAN(
         prediction_data=True,
         approx_min_span_tree=False,
         max_cluster_size=0,
         min_cluster_size=30,
+        gen_min_span_tree=gen_min_span_tree,
     )
     with using_device_type(train_device):
         trained_labels = model.fit_predict(X_train_blob)

@@ -113,7 +113,8 @@ classifiers = [
 test = [
     "cython>=3.0.0",
     "dask-ml",
-    "hdbscan>=0.8.38,<0.8.39",
+    "hdbscan @ git+https://github.com/scikit-learn-contrib/hdbscan.git@master",
+    "hdbscan>=0.8.39,<0.8.40",
     "hypothesis>=6.0,<7",
     "nltk",
     "numpydoc",
@@ -125,7 +126,6 @@ test = [
     "pytest==7.*",
     "scikit-learn==1.5",
     "seaborn",
-    "setuptools",
     "statsmodels",
     "umap-learn==0.5.6",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.