onedal code support float and double data type

Signed-off-by: minmingzhu <[email protected]>
oap-project · Aug 9, 2023 · a404670 · a404670
1 parent c199710
commit a404670
Show file tree

Hide file tree

Showing 8 changed files with 372 additions and 99 deletions.
diff --git a/mllib-dal/src/main/native/CorrelationImpl.cpp b/mllib-dal/src/main/native/CorrelationImpl.cpp
@@ -153,20 +153,47 @@ static void doCorrelationOneAPICompute(
     const bool isRoot = (comm.get_rank() == ccl_root);
     homogen_table htable =
         *reinterpret_cast<const homogen_table *>(pNumTabData);
-
-    const auto cor_desc = covariance_gpu::descriptor{}.set_result_options(
-        covariance_gpu::result_options::cor_matrix |
-        covariance_gpu::result_options::means);
+    const auto &dtype = htable.get_metadata().get_data_type(0);
+    covariance_gpu::compute_result result_train;
     auto t1 = std::chrono::high_resolution_clock::now();
-    const auto result_train = preview::compute(comm, cor_desc, htable);
+    switch (dtype) {
+    case data_type::float32: {
+        const auto cor_desc =
+            covariance_gpu::descriptor<float>{}.set_result_options(
+                covariance_gpu::result_options::cor_matrix |
+                covariance_gpu::result_options::means);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train = preview::compute(comm, cor_desc, htable);
+        break;
+    }
+    case data_type::float64: {
+        const auto cor_desc =
+            covariance_gpu::descriptor<double>{}.set_result_options(
+                covariance_gpu::result_options::cor_matrix |
+                covariance_gpu::result_options::means);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train = preview::compute(comm, cor_desc, htable);
+        break;
+    }
+    default: {
+        std::cout << "no supported data type :" << &dtype << std::endl;
+        exit(-1);
+    }
+    }
+    auto t2 = std::chrono::high_resolution_clock::now();
+    auto duration =
+        (float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
+            .count();
+    std::cout << "Correlation batch(native): computing step took "
+              << duration / 1000 << " secs." << std::endl;
     if (isRoot) {
         std::cout << "Mean:\n" << result_train.get_means() << std::endl;
         std::cout << "Correlation:\n"
                   << result_train.get_cor_matrix() << std::endl;
-        auto t2 = std::chrono::high_resolution_clock::now();
-        auto duration =
-            std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
-                .count();
+        t2 = std::chrono::high_resolution_clock::now();
+        duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
+                       t2 - t1)
+                       .count();
         std::cout << "Correlation batch(native): computing step took "
                   << duration / 1000 << " secs." << std::endl;
         // Return all covariance & mean

diff --git a/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp b/mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp
@@ -225,28 +225,82 @@ static jobject doRFClassifierOneAPICompute(
               << hFeaturetable.get_column_count() << std::endl;
     std::cout << "doRFClassifierOneAPICompute classCount = " << classCount
               << std::endl;
-    const auto df_desc =
-        df::descriptor<float, df::method::hist, df::task::classification>{}
-            .set_class_count(classCount)
-            .set_tree_count(treeCount)
-            .set_features_per_node(numFeaturesPerNode)
-            .set_min_observations_in_leaf_node(minObservationsLeafNode)
-            .set_min_observations_in_split_node(minObservationsSplitNode)
-            .set_min_weight_fraction_in_leaf_node(minWeightFractionLeafNode)
-            .set_min_impurity_decrease_in_split_node(
-                minImpurityDecreaseSplitNode)
-            .set_error_metric_mode(df::error_metric_mode::out_of_bag_error)
-            .set_variable_importance_mode(df::variable_importance_mode::mdi)
-            .set_infer_mode(df::infer_mode::class_responses |
-                            df::infer_mode::class_probabilities)
-            .set_voting_mode(df::voting_mode::weighted)
-            .set_max_tree_depth(maxTreeDepth)
-            .set_max_bins(maxBins);
-
-    const auto result_train =
-        preview::train(comm, df_desc, hFeaturetable, hLabeltable);
-    const auto result_infer =
-        preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable);
+    const auto &dtype = hFeaturetable.get_metadata().get_data_type(0);
+    df::train_result result_train;
+    df::infer_result result_infer;
+    auto t1 = std::chrono::high_resolution_clock::now();
+    auto t2 = std::chrono::high_resolution_clock::now();
+    auto duration =
+        (float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
+            .count();
+    switch (dtype) {
+    case data_type::float32: {
+        const auto df_desc =
+            df::descriptor<float, df::method::hist, df::task::classification>{}
+                .set_class_count(classCount)
+                .set_tree_count(treeCount)
+                .set_features_per_node(numFeaturesPerNode)
+                .set_min_observations_in_leaf_node(minObservationsLeafNode)
+                .set_min_observations_in_split_node(minObservationsSplitNode)
+                .set_min_weight_fraction_in_leaf_node(minWeightFractionLeafNode)
+                .set_min_impurity_decrease_in_split_node(
+                    minImpurityDecreaseSplitNode)
+                .set_error_metric_mode(df::error_metric_mode::out_of_bag_error)
+                .set_variable_importance_mode(df::variable_importance_mode::mdi)
+                .set_infer_mode(df::infer_mode::class_responses |
+                                df::infer_mode::class_probabilities)
+                .set_voting_mode(df::voting_mode::weighted)
+                .set_max_tree_depth(maxTreeDepth)
+                .set_max_bins(maxBins);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train =
+            preview::train(comm, df_desc, hFeaturetable, hLabeltable);
+        t2 = std::chrono::high_resolution_clock::now();
+        duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
+                       t2 - t1)
+                       .count();
+        std::cout << "DF Classifier (native): training step took "
+                  << duration / 1000 << " secs." << std::endl;
+        result_infer = preview::infer(comm, df_desc, result_train.get_model(),
+                                      hFeaturetable);
+        break;
+    }
+    case data_type::float64: {
+        const auto df_desc =
+            df::descriptor<double, df::method::hist, df::task::classification>{}
+                .set_class_count(classCount)
+                .set_tree_count(treeCount)
+                .set_features_per_node(numFeaturesPerNode)
+                .set_min_observations_in_leaf_node(minObservationsLeafNode)
+                .set_min_observations_in_split_node(minObservationsSplitNode)
+                .set_min_weight_fraction_in_leaf_node(minWeightFractionLeafNode)
+                .set_min_impurity_decrease_in_split_node(
+                    minImpurityDecreaseSplitNode)
+                .set_error_metric_mode(df::error_metric_mode::out_of_bag_error)
+                .set_variable_importance_mode(df::variable_importance_mode::mdi)
+                .set_infer_mode(df::infer_mode::class_responses |
+                                df::infer_mode::class_probabilities)
+                .set_voting_mode(df::voting_mode::weighted)
+                .set_max_tree_depth(maxTreeDepth)
+                .set_max_bins(maxBins);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train =
+            preview::train(comm, df_desc, hFeaturetable, hLabeltable);
+        t2 = std::chrono::high_resolution_clock::now();
+        duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
+                       t2 - t1)
+                       .count();
+        std::cout << "DF Classifier (native): training step took "
+                  << duration / 1000 << " secs." << std::endl;
+        result_infer = preview::infer(comm, df_desc, result_train.get_model(),
+                                      hFeaturetable);
+        break;
+    }
+    default: {
+        std::cout << "no supported data type :" << &dtype << std::endl;
+        exit(-1);
+    }
+    }
     jobject trees = nullptr;
     if (isRoot) {
         std::cout << "Variable importance results:\n"
@@ -256,7 +310,12 @@ static jobject doRFClassifierOneAPICompute(
                   << result_infer.get_responses() << std::endl;
         std::cout << "Probabilities results:\n"
                   << result_infer.get_probabilities() << std::endl;
-
+        t2 = std::chrono::high_resolution_clock::now();
+        duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
+                       t2 - t1)
+                       .count();
+        std::cout << "DF Classifier (native): training step took "
+                  << duration / 1000 << " secs." << std::endl;
         // convert to java hashmap
         trees = collect_model(env, result_train.get_model(), classCount);
 

diff --git a/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp b/mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp
@@ -220,22 +220,72 @@ static jobject doRFRegressorOneAPICompute(
         *reinterpret_cast<const homogen_table *>(pNumTabLabel);
     std::cout << "doRFRegressorOneAPICompute get_column_count = "
               << hFeaturetable.get_column_count() << std::endl;
-    const auto df_desc =
-        df::descriptor<float, df::method::hist, df::task::regression>{}
-            .set_tree_count(treeCount)
-            .set_features_per_node(numFeaturesPerNode)
-            .set_min_observations_in_leaf_node(minObservationsLeafNode)
-            .set_max_tree_depth(maxTreeDepth)
-            .set_max_bins(maxbins)
-            .set_error_metric_mode(
-                df::error_metric_mode::out_of_bag_error |
-                df::error_metric_mode::out_of_bag_error_per_observation)
-            .set_variable_importance_mode(df::variable_importance_mode::mdi);
-
-    const auto result_train =
-        preview::train(comm, df_desc, hFeaturetable, hLabeltable);
-    const auto result_infer =
-        preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable);
+    const auto &dtype = hFeaturetable.get_metadata().get_data_type(0);
+    df::train_result result_train;
+    df::infer_result result_infer;
+    auto t1 = std::chrono::high_resolution_clock::now();
+    auto t2 = std::chrono::high_resolution_clock::now();
+    auto duration =
+        (float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
+            .count();
+    switch (dtype) {
+    case data_type::float32: {
+        const auto df_desc =
+            df::descriptor<float, df::method::hist, df::task::regression>{}
+                .set_tree_count(treeCount)
+                .set_features_per_node(numFeaturesPerNode)
+                .set_min_observations_in_leaf_node(minObservationsLeafNode)
+                .set_max_tree_depth(maxTreeDepth)
+                .set_max_bins(maxbins)
+                .set_error_metric_mode(
+                    df::error_metric_mode::out_of_bag_error |
+                    df::error_metric_mode::out_of_bag_error_per_observation)
+                .set_variable_importance_mode(
+                    df::variable_importance_mode::mdi);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train =
+            preview::train(comm, df_desc, hFeaturetable, hLabeltable);
+        t2 = std::chrono::high_resolution_clock::now();
+        duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
+                       t2 - t1)
+                       .count();
+        std::cout << "DF Classifier (native): training step took "
+                  << duration / 1000 << " secs." << std::endl;
+        result_infer = preview::infer(comm, df_desc, result_train.get_model(),
+                                      hFeaturetable);
+        break;
+    }
+    case data_type::float64: {
+        const auto df_desc =
+            df::descriptor<double, df::method::hist, df::task::regression>{}
+                .set_tree_count(treeCount)
+                .set_features_per_node(numFeaturesPerNode)
+                .set_min_observations_in_leaf_node(minObservationsLeafNode)
+                .set_max_tree_depth(maxTreeDepth)
+                .set_max_bins(maxbins)
+                .set_error_metric_mode(
+                    df::error_metric_mode::out_of_bag_error |
+                    df::error_metric_mode::out_of_bag_error_per_observation)
+                .set_variable_importance_mode(
+                    df::variable_importance_mode::mdi);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train =
+            preview::train(comm, df_desc, hFeaturetable, hLabeltable);
+        t2 = std::chrono::high_resolution_clock::now();
+        duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
+                       t2 - t1)
+                       .count();
+        std::cout << "DF Classifier (native): training step took "
+                  << duration / 1000 << " secs." << std::endl;
+        result_infer = preview::infer(comm, df_desc, result_train.get_model(),
+                                      hFeaturetable);
+        break;
+    }
+    default: {
+        std::cout << "no supported data type :" << &dtype << std::endl;
+        exit(-1);
+    }
+    }
     jobject trees = nullptr;
     if (isRoot) {
         std::cout << "Variable importance results:\n"

diff --git a/mllib-dal/src/main/native/KMeansImpl.cpp b/mllib-dal/src/main/native/KMeansImpl.cpp
@@ -251,23 +251,54 @@ static jlong doKMeansOneAPICompute(
         *reinterpret_cast<const homogen_table *>(pNumTabData);
     homogen_table centroids =
         *reinterpret_cast<const homogen_table *>(pNumTabCenters);
-    const auto kmeans_desc = kmeans_gpu::descriptor<double>()
-                                 .set_cluster_count(clusterNum)
-                                 .set_max_iteration_count(iterationNum)
-                                 .set_accuracy_threshold(tolerance);
-    kmeans_gpu::train_input local_input{htable, centroids};
+    const auto &dtype = htable.get_metadata().get_data_type(0);
+    kmeans_gpu::train_result result_train;
     auto t1 = std::chrono::high_resolution_clock::now();
-    kmeans_gpu::train_result result_train =
-        preview::train(comm, kmeans_desc, local_input);
+    auto t2 = std::chrono::high_resolution_clock::now();
+    auto duration =
+        (float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
+            .count();
+    kmeans_gpu::train_input local_input{htable, centroids};
+
+    switch (dtype) {
+    case data_type::float32: {
+        const auto kmeans_desc = kmeans_gpu::descriptor<float>()
+                                     .set_cluster_count(clusterNum)
+                                     .set_max_iteration_count(iterationNum)
+                                     .set_accuracy_threshold(tolerance);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train = preview::train(comm, kmeans_desc, local_input);
+        break;
+    }
+    case data_type::float64: {
+        const auto kmeans_desc = kmeans_gpu::descriptor<double>()
+                                     .set_cluster_count(clusterNum)
+                                     .set_max_iteration_count(iterationNum)
+                                     .set_accuracy_threshold(tolerance);
+        t1 = std::chrono::high_resolution_clock::now();
+        result_train = preview::train(comm, kmeans_desc, local_input);
+        break;
+    }
+    default: {
+        std::cout << "no supported data type :" << &dtype << std::endl;
+        exit(-1);
+    }
+    }
+    duration =
+        (float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
+            .count();
+    t2 = std::chrono::high_resolution_clock::now();
+    std::cout << "KMeans (native): training step took " << duration / 1000
+              << " secs." << std::endl;
     if (isRoot) {
         std::cout << "Iteration count: " << result_train.get_iteration_count()
                   << std::endl;
         std::cout << "Centroids:\n"
                   << result_train.get_model().get_centroids() << std::endl;
-        auto t2 = std::chrono::high_resolution_clock::now();
-        auto duration =
-            std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
-                .count();
+        t2 = std::chrono::high_resolution_clock::now();
+        duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
+                       t2 - t1)
+                       .count();
         std::cout << "KMeans (native): training step took " << duration / 1000
                   << " secs." << std::endl;
         // Get the class of the input object

diff --git a/mllib-dal/src/main/native/LinearRegressionImpl.cpp b/mllib-dal/src/main/native/LinearRegressionImpl.cpp
@@ -235,12 +235,37 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId,
     homogen_table ytrain = *reinterpret_cast<const homogen_table *>(pLabel);
 
     linear_regression_gpu::train_input local_input{xtrain, ytrain};
-    const auto linear_regression_desc =
-        linear_regression_gpu::descriptor<>(fitIntercept);
-
-    linear_regression_gpu::train_result result_train =
-        preview::train(comm, linear_regression_desc, xtrain, ytrain);
+    const auto &dtype = xtrain.get_metadata().get_data_type(0);
+    linear_regression_gpu::train_result result_train;
+    auto t1 = std::chrono::high_resolution_clock::now();
+    switch (dtype) {
+    case data_type::float32: {
+        const auto linear_regression_desc =
+            linear_regression_gpu::descriptor<float>(fitIntercept);
+        result_train =
+            preview::train(comm, linear_regression_desc, xtrain, ytrain);
+        break;
+    }
+    case data_type::float64: {
+        const auto linear_regression_desc =
+            linear_regression_gpu::descriptor<double>(fitIntercept);
+        result_train =
+            preview::train(comm, linear_regression_desc, xtrain, ytrain);
+        break;
+    }
+    default: {
+        std::cout << "no supported data type :" << &dtype << std::endl;
+        exit(-1);
+    }
+    }
     if (isRoot) {
+        auto t2 = std::chrono::high_resolution_clock::now();
+        auto duration =
+            (float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 -
+                                                                         t1)
+                .count();
+        std::cout << "LinerRegression(native): training step took "
+                  << duration / 1000 << " secs." << std::endl;
         HomogenTablePtr result_matrix = std::make_shared<homogen_table>(
             result_train.get_model().get_betas());
         saveHomogenTablePtrToVector(result_matrix);