Skip to content

Commit

Permalink
onedal code support float and double data type
Browse files Browse the repository at this point in the history
Signed-off-by: minmingzhu <[email protected]>
  • Loading branch information
minmingzhu committed Aug 9, 2023
1 parent c199710 commit a404670
Show file tree
Hide file tree
Showing 8 changed files with 372 additions and 99 deletions.
45 changes: 36 additions & 9 deletions mllib-dal/src/main/native/CorrelationImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,47 @@ static void doCorrelationOneAPICompute(
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table htable =
*reinterpret_cast<const homogen_table *>(pNumTabData);

const auto cor_desc = covariance_gpu::descriptor{}.set_result_options(
covariance_gpu::result_options::cor_matrix |
covariance_gpu::result_options::means);
const auto &dtype = htable.get_metadata().get_data_type(0);
covariance_gpu::compute_result result_train;
auto t1 = std::chrono::high_resolution_clock::now();
const auto result_train = preview::compute(comm, cor_desc, htable);
switch (dtype) {
case data_type::float32: {
const auto cor_desc =
covariance_gpu::descriptor<float>{}.set_result_options(
covariance_gpu::result_options::cor_matrix |
covariance_gpu::result_options::means);
t1 = std::chrono::high_resolution_clock::now();
result_train = preview::compute(comm, cor_desc, htable);
break;
}
case data_type::float64: {
const auto cor_desc =
covariance_gpu::descriptor<double>{}.set_result_options(
covariance_gpu::result_options::cor_matrix |
covariance_gpu::result_options::means);
t1 = std::chrono::high_resolution_clock::now();
result_train = preview::compute(comm, cor_desc, htable);
break;
}
default: {
std::cout << "no supported data type :" << &dtype << std::endl;
exit(-1);
}
}
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
std::cout << "Correlation batch(native): computing step took "
<< duration / 1000 << " secs." << std::endl;
if (isRoot) {
std::cout << "Mean:\n" << result_train.get_means() << std::endl;
std::cout << "Correlation:\n"
<< result_train.get_cor_matrix() << std::endl;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
t2 = std::chrono::high_resolution_clock::now();
duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
t2 - t1)
.count();
std::cout << "Correlation batch(native): computing step took "
<< duration / 1000 << " secs." << std::endl;
// Return all covariance & mean
Expand Down
105 changes: 82 additions & 23 deletions mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,28 +225,82 @@ static jobject doRFClassifierOneAPICompute(
<< hFeaturetable.get_column_count() << std::endl;
std::cout << "doRFClassifierOneAPICompute classCount = " << classCount
<< std::endl;
const auto df_desc =
df::descriptor<float, df::method::hist, df::task::classification>{}
.set_class_count(classCount)
.set_tree_count(treeCount)
.set_features_per_node(numFeaturesPerNode)
.set_min_observations_in_leaf_node(minObservationsLeafNode)
.set_min_observations_in_split_node(minObservationsSplitNode)
.set_min_weight_fraction_in_leaf_node(minWeightFractionLeafNode)
.set_min_impurity_decrease_in_split_node(
minImpurityDecreaseSplitNode)
.set_error_metric_mode(df::error_metric_mode::out_of_bag_error)
.set_variable_importance_mode(df::variable_importance_mode::mdi)
.set_infer_mode(df::infer_mode::class_responses |
df::infer_mode::class_probabilities)
.set_voting_mode(df::voting_mode::weighted)
.set_max_tree_depth(maxTreeDepth)
.set_max_bins(maxBins);

const auto result_train =
preview::train(comm, df_desc, hFeaturetable, hLabeltable);
const auto result_infer =
preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable);
const auto &dtype = hFeaturetable.get_metadata().get_data_type(0);
df::train_result result_train;
df::infer_result result_infer;
auto t1 = std::chrono::high_resolution_clock::now();
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
switch (dtype) {
case data_type::float32: {
const auto df_desc =
df::descriptor<float, df::method::hist, df::task::classification>{}
.set_class_count(classCount)
.set_tree_count(treeCount)
.set_features_per_node(numFeaturesPerNode)
.set_min_observations_in_leaf_node(minObservationsLeafNode)
.set_min_observations_in_split_node(minObservationsSplitNode)
.set_min_weight_fraction_in_leaf_node(minWeightFractionLeafNode)
.set_min_impurity_decrease_in_split_node(
minImpurityDecreaseSplitNode)
.set_error_metric_mode(df::error_metric_mode::out_of_bag_error)
.set_variable_importance_mode(df::variable_importance_mode::mdi)
.set_infer_mode(df::infer_mode::class_responses |
df::infer_mode::class_probabilities)
.set_voting_mode(df::voting_mode::weighted)
.set_max_tree_depth(maxTreeDepth)
.set_max_bins(maxBins);
t1 = std::chrono::high_resolution_clock::now();
result_train =
preview::train(comm, df_desc, hFeaturetable, hLabeltable);
t2 = std::chrono::high_resolution_clock::now();
duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
t2 - t1)
.count();
std::cout << "DF Classifier (native): training step took "
<< duration / 1000 << " secs." << std::endl;
result_infer = preview::infer(comm, df_desc, result_train.get_model(),
hFeaturetable);
break;
}
case data_type::float64: {
const auto df_desc =
df::descriptor<double, df::method::hist, df::task::classification>{}
.set_class_count(classCount)
.set_tree_count(treeCount)
.set_features_per_node(numFeaturesPerNode)
.set_min_observations_in_leaf_node(minObservationsLeafNode)
.set_min_observations_in_split_node(minObservationsSplitNode)
.set_min_weight_fraction_in_leaf_node(minWeightFractionLeafNode)
.set_min_impurity_decrease_in_split_node(
minImpurityDecreaseSplitNode)
.set_error_metric_mode(df::error_metric_mode::out_of_bag_error)
.set_variable_importance_mode(df::variable_importance_mode::mdi)
.set_infer_mode(df::infer_mode::class_responses |
df::infer_mode::class_probabilities)
.set_voting_mode(df::voting_mode::weighted)
.set_max_tree_depth(maxTreeDepth)
.set_max_bins(maxBins);
t1 = std::chrono::high_resolution_clock::now();
result_train =
preview::train(comm, df_desc, hFeaturetable, hLabeltable);
t2 = std::chrono::high_resolution_clock::now();
duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
t2 - t1)
.count();
std::cout << "DF Classifier (native): training step took "
<< duration / 1000 << " secs." << std::endl;
result_infer = preview::infer(comm, df_desc, result_train.get_model(),
hFeaturetable);
break;
}
default: {
std::cout << "no supported data type :" << &dtype << std::endl;
exit(-1);
}
}
jobject trees = nullptr;
if (isRoot) {
std::cout << "Variable importance results:\n"
Expand All @@ -256,7 +310,12 @@ static jobject doRFClassifierOneAPICompute(
<< result_infer.get_responses() << std::endl;
std::cout << "Probabilities results:\n"
<< result_infer.get_probabilities() << std::endl;

t2 = std::chrono::high_resolution_clock::now();
duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
t2 - t1)
.count();
std::cout << "DF Classifier (native): training step took "
<< duration / 1000 << " secs." << std::endl;
// convert to java hashmap
trees = collect_model(env, result_train.get_model(), classCount);

Expand Down
82 changes: 66 additions & 16 deletions mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,22 +220,72 @@ static jobject doRFRegressorOneAPICompute(
*reinterpret_cast<const homogen_table *>(pNumTabLabel);
std::cout << "doRFRegressorOneAPICompute get_column_count = "
<< hFeaturetable.get_column_count() << std::endl;
const auto df_desc =
df::descriptor<float, df::method::hist, df::task::regression>{}
.set_tree_count(treeCount)
.set_features_per_node(numFeaturesPerNode)
.set_min_observations_in_leaf_node(minObservationsLeafNode)
.set_max_tree_depth(maxTreeDepth)
.set_max_bins(maxbins)
.set_error_metric_mode(
df::error_metric_mode::out_of_bag_error |
df::error_metric_mode::out_of_bag_error_per_observation)
.set_variable_importance_mode(df::variable_importance_mode::mdi);

const auto result_train =
preview::train(comm, df_desc, hFeaturetable, hLabeltable);
const auto result_infer =
preview::infer(comm, df_desc, result_train.get_model(), hFeaturetable);
const auto &dtype = hFeaturetable.get_metadata().get_data_type(0);
df::train_result result_train;
df::infer_result result_infer;
auto t1 = std::chrono::high_resolution_clock::now();
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
switch (dtype) {
case data_type::float32: {
const auto df_desc =
df::descriptor<float, df::method::hist, df::task::regression>{}
.set_tree_count(treeCount)
.set_features_per_node(numFeaturesPerNode)
.set_min_observations_in_leaf_node(minObservationsLeafNode)
.set_max_tree_depth(maxTreeDepth)
.set_max_bins(maxbins)
.set_error_metric_mode(
df::error_metric_mode::out_of_bag_error |
df::error_metric_mode::out_of_bag_error_per_observation)
.set_variable_importance_mode(
df::variable_importance_mode::mdi);
t1 = std::chrono::high_resolution_clock::now();
result_train =
preview::train(comm, df_desc, hFeaturetable, hLabeltable);
t2 = std::chrono::high_resolution_clock::now();
duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
t2 - t1)
.count();
std::cout << "DF Classifier (native): training step took "
<< duration / 1000 << " secs." << std::endl;
result_infer = preview::infer(comm, df_desc, result_train.get_model(),
hFeaturetable);
break;
}
case data_type::float64: {
const auto df_desc =
df::descriptor<double, df::method::hist, df::task::regression>{}
.set_tree_count(treeCount)
.set_features_per_node(numFeaturesPerNode)
.set_min_observations_in_leaf_node(minObservationsLeafNode)
.set_max_tree_depth(maxTreeDepth)
.set_max_bins(maxbins)
.set_error_metric_mode(
df::error_metric_mode::out_of_bag_error |
df::error_metric_mode::out_of_bag_error_per_observation)
.set_variable_importance_mode(
df::variable_importance_mode::mdi);
t1 = std::chrono::high_resolution_clock::now();
result_train =
preview::train(comm, df_desc, hFeaturetable, hLabeltable);
t2 = std::chrono::high_resolution_clock::now();
duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
t2 - t1)
.count();
std::cout << "DF Classifier (native): training step took "
<< duration / 1000 << " secs." << std::endl;
result_infer = preview::infer(comm, df_desc, result_train.get_model(),
hFeaturetable);
break;
}
default: {
std::cout << "no supported data type :" << &dtype << std::endl;
exit(-1);
}
}
jobject trees = nullptr;
if (isRoot) {
std::cout << "Variable importance results:\n"
Expand Down
53 changes: 42 additions & 11 deletions mllib-dal/src/main/native/KMeansImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,23 +251,54 @@ static jlong doKMeansOneAPICompute(
*reinterpret_cast<const homogen_table *>(pNumTabData);
homogen_table centroids =
*reinterpret_cast<const homogen_table *>(pNumTabCenters);
const auto kmeans_desc = kmeans_gpu::descriptor<double>()
.set_cluster_count(clusterNum)
.set_max_iteration_count(iterationNum)
.set_accuracy_threshold(tolerance);
kmeans_gpu::train_input local_input{htable, centroids};
const auto &dtype = htable.get_metadata().get_data_type(0);
kmeans_gpu::train_result result_train;
auto t1 = std::chrono::high_resolution_clock::now();
kmeans_gpu::train_result result_train =
preview::train(comm, kmeans_desc, local_input);
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
kmeans_gpu::train_input local_input{htable, centroids};

switch (dtype) {
case data_type::float32: {
const auto kmeans_desc = kmeans_gpu::descriptor<float>()
.set_cluster_count(clusterNum)
.set_max_iteration_count(iterationNum)
.set_accuracy_threshold(tolerance);
t1 = std::chrono::high_resolution_clock::now();
result_train = preview::train(comm, kmeans_desc, local_input);
break;
}
case data_type::float64: {
const auto kmeans_desc = kmeans_gpu::descriptor<double>()
.set_cluster_count(clusterNum)
.set_max_iteration_count(iterationNum)
.set_accuracy_threshold(tolerance);
t1 = std::chrono::high_resolution_clock::now();
result_train = preview::train(comm, kmeans_desc, local_input);
break;
}
default: {
std::cout << "no supported data type :" << &dtype << std::endl;
exit(-1);
}
}
duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
t2 = std::chrono::high_resolution_clock::now();
std::cout << "KMeans (native): training step took " << duration / 1000
<< " secs." << std::endl;
if (isRoot) {
std::cout << "Iteration count: " << result_train.get_iteration_count()
<< std::endl;
std::cout << "Centroids:\n"
<< result_train.get_model().get_centroids() << std::endl;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
t2 = std::chrono::high_resolution_clock::now();
duration = (float)std::chrono::duration_cast<std::chrono::milliseconds>(
t2 - t1)
.count();
std::cout << "KMeans (native): training step took " << duration / 1000
<< " secs." << std::endl;
// Get the class of the input object
Expand Down
35 changes: 30 additions & 5 deletions mllib-dal/src/main/native/LinearRegressionImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,12 +235,37 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId,
homogen_table ytrain = *reinterpret_cast<const homogen_table *>(pLabel);

linear_regression_gpu::train_input local_input{xtrain, ytrain};
const auto linear_regression_desc =
linear_regression_gpu::descriptor<>(fitIntercept);

linear_regression_gpu::train_result result_train =
preview::train(comm, linear_regression_desc, xtrain, ytrain);
const auto &dtype = xtrain.get_metadata().get_data_type(0);
linear_regression_gpu::train_result result_train;
auto t1 = std::chrono::high_resolution_clock::now();
switch (dtype) {
case data_type::float32: {
const auto linear_regression_desc =
linear_regression_gpu::descriptor<float>(fitIntercept);
result_train =
preview::train(comm, linear_regression_desc, xtrain, ytrain);
break;
}
case data_type::float64: {
const auto linear_regression_desc =
linear_regression_gpu::descriptor<double>(fitIntercept);
result_train =
preview::train(comm, linear_regression_desc, xtrain, ytrain);
break;
}
default: {
std::cout << "no supported data type :" << &dtype << std::endl;
exit(-1);
}
}
if (isRoot) {
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
(float)std::chrono::duration_cast<std::chrono::milliseconds>(t2 -
t1)
.count();
std::cout << "LinerRegression(native): training step took "
<< duration / 1000 << " secs." << std::endl;
HomogenTablePtr result_matrix = std::make_shared<homogen_table>(
result_train.get_model().get_betas());
saveHomogenTablePtrToVector(result_matrix);
Expand Down
Loading

0 comments on commit a404670

Please sign in to comment.