diff --git a/cpp/daal/src/algorithms/low_order_moments/low_order_moments_dense_default_online_oneapi_fpt.cpp b/cpp/daal/src/algorithms/low_order_moments/low_order_moments_dense_default_online_oneapi_fpt.cpp
index 33fabcbd2d9..8087ef17d49 100644
--- a/cpp/daal/src/algorithms/low_order_moments/low_order_moments_dense_default_online_oneapi_fpt.cpp
+++ b/cpp/daal/src/algorithms/low_order_moments/low_order_moments_dense_default_online_oneapi_fpt.cpp
@@ -36,7 +36,7 @@ namespace oneapi
 {
 namespace internal
 {
-template class LowOrderMomentsOnlineKernelOneAPI<DAAL_FPTYPE, defaultDense>;
+template class DAAL_EXPORT LowOrderMomentsOnlineKernelOneAPI<DAAL_FPTYPE, defaultDense>;
 template class LowOrderMomentsOnlineKernelOneAPI<DAAL_FPTYPE, singlePassDense>;
 template class LowOrderMomentsOnlineKernelOneAPI<DAAL_FPTYPE, sumDense>;
 template class LowOrderMomentsOnlineKernelOneAPI<DAAL_FPTYPE, fastCSR>;
diff --git a/cpp/oneapi/dal/algo/basic_statistics.hpp b/cpp/oneapi/dal/algo/basic_statistics.hpp
index 9668bef9082..d69b6b03f1e 100644
--- a/cpp/oneapi/dal/algo/basic_statistics.hpp
+++ b/cpp/oneapi/dal/algo/basic_statistics.hpp
@@ -17,3 +17,5 @@
 #pragma once
 
 #include "oneapi/dal/algo/basic_statistics/compute.hpp"
+#include "oneapi/dal/algo/basic_statistics/partial_compute.hpp"
+#include "oneapi/dal/algo/basic_statistics/finalize_compute.hpp"
diff --git a/cpp/oneapi/dal/algo/basic_statistics/BUILD b/cpp/oneapi/dal/algo/basic_statistics/BUILD
index 0080c3f387c..9e1179e7a2e 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/BUILD
+++ b/cpp/oneapi/dal/algo/basic_statistics/BUILD
@@ -10,6 +10,7 @@ dal_module(
     dal_deps = [
         "@onedal//cpp/oneapi/dal:core",
         "@onedal//cpp/oneapi/dal/backend/primitives:common",
+        "@onedal//cpp/oneapi/dal/backend/primitives:reduction",
     ],
     extra_deps = [
         "@onedal//cpp/daal/src/algorithms/low_order_moments:kernel",
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/basic_statistics_interop.hpp b/cpp/oneapi/dal/algo/basic_statistics/backend/basic_statistics_interop.hpp
index a8546137dc7..9518dd8ae20 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/backend/basic_statistics_interop.hpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/basic_statistics_interop.hpp
@@ -26,6 +26,7 @@ namespace oneapi::dal::basic_statistics::backend {
 
 namespace daal_lom = daal::algorithms::low_order_moments;
 namespace interop = dal::backend::interop;
+namespace bk = dal::backend;
 
 using task_t = task::compute;
 using descriptor_t = detail::descriptor_base<task_t>;
@@ -100,4 +101,32 @@ inline auto get_result(const descriptor_t& desc, const daal_lom::Result& daal_re
     return res;
 }
 
+template <typename Float>
+inline array<Float> copy_immutable(const array<Float>&& inp) {
+    if (inp.has_mutable_data()) {
+        return inp;
+    }
+    else {
+        const auto count = inp.get_count();
+        auto res = array<Float>::empty(count);
+        bk::copy(res.get_mutable_data(), inp.get_data(), count);
+        return res;
+    }
+}
+
+template <typename Float, typename Result, typename Input, typename Parameter>
+inline void alloc_result(Result& result, const Input* input, const Parameter* params, int method) {
+    const auto status = result.template allocate<Float>(input, params, method);
+    interop::status_to_exception(status);
+}
+
+template <typename Float, typename Result, typename Input, typename Parameter>
+inline void initialize_result(Result& result,
+                              const Input* input,
+                              const Parameter* params,
+                              int method) {
+    const auto status = result.template initialize<Float>(input, params, method);
+    interop::status_to_exception(status);
+}
+
 } // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel_dense.cpp
index f0ffcb64239..e0d5bb6dda0 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel_dense.cpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/compute_kernel_dense.cpp
@@ -64,31 +64,6 @@ std::int64_t propose_block_size(std::int64_t row_count, std::int64_t col_count)
     return std::max<idx_t>(std::min<idx_t>(row_count, idx_t(1024l)), block_of_rows_size);
 }
 
-template <typename Float>
-array<Float> copy_immutable(const array<Float>&& inp) {
-    if (inp.has_mutable_data()) {
-        return inp;
-    }
-    else {
-        const auto count = inp.get_count();
-        auto res = array<Float>::empty(count);
-        bk::copy(res.get_mutable_data(), inp.get_data(), count);
-        return res;
-    }
-}
-
-template <typename Float, typename Result, typename Input, typename Parameter>
-void alloc_result(Result& result, const Input* input, const Parameter* params, int method) {
-    const auto status = result.template allocate<Float>(input, params, method);
-    interop::status_to_exception(status);
-}
-
-template <typename Float, typename Result, typename Input, typename Parameter>
-void initialize_result(Result& result, const Input* input, const Parameter* params, int method) {
-    const auto status = result.template initialize<Float>(input, params, method);
-    interop::status_to_exception(status);
-}
-
 template <typename Float>
 result_t call_daal_kernel_with_weights(const context_cpu& ctx,
                                        const descriptor_t& desc,
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel.hpp b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel.hpp
new file mode 100644
index 00000000000..a6daa1116db
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::backend {
+
+template <typename Float, typename Method, typename Task>
+struct finalize_compute_kernel_cpu {
+    compute_result<Task> operator()(const dal::backend::context_cpu& ctx,
+                                    const detail::descriptor_base<Task>& params,
+                                    const partial_compute_result<Task>& input) const;
+};
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel_dense.cpp
new file mode 100644
index 00000000000..5807ec85573
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel_dense.cpp
@@ -0,0 +1,149 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <algorithm>
+
+#include "oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/basic_statistics_interop.hpp"
+
+#include "oneapi/dal/backend/interop/common.hpp"
+#include "oneapi/dal/backend/interop/error_converter.hpp"
+#include "oneapi/dal/backend/interop/table_conversion.hpp"
+
+#include "oneapi/dal/table/row_accessor.hpp"
+
+#include <daal/src/algorithms/low_order_moments/moments_online.h>
+#include <daal/src/algorithms/low_order_moments/low_order_moments_kernel.h>
+
+namespace oneapi::dal::basic_statistics::backend {
+
+using dal::backend::context_cpu;
+using method_t = method::dense;
+using task_t = task::compute;
+using input_t = compute_input<task_t>;
+using result_t = compute_result<task_t>;
+using descriptor_t = detail::descriptor_base<task_t>;
+
+namespace daal_lom = daal::algorithms::low_order_moments;
+namespace interop = dal::backend::interop;
+namespace bk = dal::backend;
+
+template <typename Float, daal::CpuType Cpu>
+using daal_lom_online_kernel_t =
+    daal_lom::internal::LowOrderMomentsOnlineKernel<Float, daal_lom::defaultDense, Cpu>;
+
+template <typename Float, typename Task>
+static compute_result<Task> call_daal_kernel_finalize_compute(
+    const context_cpu& ctx,
+    const descriptor_t& desc,
+    const partial_compute_result<Task>& input) {
+    const auto result_ids = daal_lom::estimatesAll;
+    const auto daal_parameter = daal_lom::Parameter(result_ids);
+
+    auto column_count = input.get_partial_min().get_column_count();
+
+    auto daal_partial_obs = interop::copy_to_daal_homogen_table<Float>(input.get_partial_n_rows());
+    auto daal_partial_min = interop::copy_to_daal_homogen_table<Float>(input.get_partial_min());
+    auto daal_partial_max = interop::copy_to_daal_homogen_table<Float>(input.get_partial_max());
+    auto daal_partial_sums = interop::copy_to_daal_homogen_table<Float>(input.get_partial_sum());
+    auto daal_partial_sum_squares =
+        interop::copy_to_daal_homogen_table<Float>(input.get_partial_sum_squares());
+    auto daal_partial_sum_squares_centered =
+        interop::copy_to_daal_homogen_table<Float>(input.get_partial_sum_squares_centered());
+
+    auto daal_means = interop::allocate_daal_homogen_table<Float>(1, column_count);
+    auto daal_rawt = interop::allocate_daal_homogen_table<Float>(1, column_count);
+
+    auto daal_variance = interop::allocate_daal_homogen_table<Float>(1, column_count);
+    auto daal_stdev = interop::allocate_daal_homogen_table<Float>(1, column_count);
+    auto daal_variation = interop::allocate_daal_homogen_table<Float>(1, column_count);
+    {
+        interop::status_to_exception(
+            interop::call_daal_kernel_finalize_compute<Float, daal_lom_online_kernel_t>(
+                ctx,
+                daal_partial_obs.get(),
+                daal_partial_sums.get(),
+                daal_partial_sum_squares.get(),
+                daal_partial_sum_squares_centered.get(),
+                daal_means.get(),
+                daal_rawt.get(),
+                daal_variance.get(),
+                daal_stdev.get(),
+                daal_variation.get(),
+                &daal_parameter));
+    }
+
+    compute_result<Task> res;
+    const auto res_op = desc.get_result_options();
+    res.set_result_options(desc.get_result_options());
+
+    if (res_op.test(result_options::min)) {
+        res.set_min(interop::convert_from_daal_homogen_table<Float>(daal_partial_min));
+    }
+    if (res_op.test(result_options::max)) {
+        res.set_max(interop::convert_from_daal_homogen_table<Float>(daal_partial_max));
+    }
+    if (res_op.test(result_options::sum)) {
+        res.set_sum(interop::convert_from_daal_homogen_table<Float>(daal_partial_sums));
+    }
+    if (res_op.test(result_options::sum_squares)) {
+        res.set_sum_squares(
+            interop::convert_from_daal_homogen_table<Float>(daal_partial_sum_squares));
+    }
+    if (res_op.test(result_options::sum_squares_centered)) {
+        res.set_sum_squares_centered(
+            interop::convert_from_daal_homogen_table<Float>(daal_partial_sum_squares_centered));
+    }
+    if (res_op.test(result_options::mean)) {
+        res.set_mean(interop::convert_from_daal_homogen_table<Float>(daal_means));
+    }
+    if (res_op.test(result_options::second_order_raw_moment)) {
+        res.set_second_order_raw_moment(interop::convert_from_daal_homogen_table<Float>(daal_rawt));
+    }
+    if (res_op.test(result_options::variance)) {
+        res.set_variance(interop::convert_from_daal_homogen_table<Float>(daal_variance));
+    }
+    if (res_op.test(result_options::standard_deviation)) {
+        res.set_standard_deviation(interop::convert_from_daal_homogen_table<Float>(daal_stdev));
+    }
+    if (res_op.test(result_options::variation)) {
+        res.set_variation(interop::convert_from_daal_homogen_table<Float>(daal_variation));
+    }
+
+    return res;
+}
+
+template <typename Float, typename Task>
+static compute_result<Task> finalize_compute(const context_cpu& ctx,
+                                             const descriptor_t& desc,
+                                             const partial_compute_result<Task>& input) {
+    return call_daal_kernel_finalize_compute<Float, Task>(ctx, desc, input);
+}
+
+template <typename Float>
+struct finalize_compute_kernel_cpu<Float, method_t, task_t> {
+    compute_result<task::compute> operator()(
+        const context_cpu& ctx,
+        const descriptor_t& desc,
+        const partial_compute_result<task::compute>& input) const {
+        return finalize_compute<Float, task::compute>(ctx, desc, input);
+    }
+};
+
+template struct finalize_compute_kernel_cpu<float, method_t, task_t>;
+template struct finalize_compute_kernel_cpu<double, method_t, task_t>;
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel.hpp b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel.hpp
new file mode 100644
index 00000000000..f8870b4553c
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::backend {
+
+template <typename Float, typename Method, typename Task>
+struct partial_compute_kernel_cpu {
+    partial_compute_result<Task> operator()(const dal::backend::context_cpu& ctx,
+                                            const detail::descriptor_base<Task>& params,
+                                            const partial_compute_input<Task>& input) const;
+};
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel_dense.cpp
new file mode 100644
index 00000000000..0b25ebf4bcd
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel_dense.cpp
@@ -0,0 +1,254 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <algorithm>
+
+#include "oneapi/dal/algo/basic_statistics/backend/cpu/apply_weights.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/basic_statistics_interop.hpp"
+
+#include "oneapi/dal/backend/interop/common.hpp"
+#include "oneapi/dal/backend/interop/error_converter.hpp"
+#include "oneapi/dal/backend/interop/table_conversion.hpp"
+
+#include "oneapi/dal/table/row_accessor.hpp"
+
+#include <daal/src/algorithms/low_order_moments/moments_online.h>
+#include <daal/src/algorithms/low_order_moments/low_order_moments_kernel.h>
+
+namespace oneapi::dal::basic_statistics::backend {
+
+using dal::backend::context_cpu;
+using method_t = method::dense;
+using task_t = task::compute;
+using input_t = partial_compute_input<task_t>;
+using result_t = partial_compute_result<task_t>;
+using descriptor_t = detail::descriptor_base<task_t>;
+
+namespace daal_lom = daal::algorithms::low_order_moments;
+namespace interop = dal::backend::interop;
+
+template <typename Float, daal::CpuType Cpu>
+using daal_lom_online_kernel_t =
+    daal_lom::internal::LowOrderMomentsOnlineKernel<Float, daal_lom::defaultDense, Cpu>;
+
+template <typename Float, typename Task>
+inline auto get_partial_result(daal_lom::PartialResult daal_partial_result) {
+    auto result = partial_compute_result();
+
+    result.set_partial_n_rows(interop::convert_from_daal_homogen_table<Float>(
+        daal_partial_result.get(daal_lom::PartialResultId::nObservations)));
+    result.set_partial_min(interop::convert_from_daal_homogen_table<Float>(
+        daal_partial_result.get(daal_lom::PartialResultId::partialMinimum)));
+    result.set_partial_max(interop::convert_from_daal_homogen_table<Float>(
+        daal_partial_result.get(daal_lom::PartialResultId::partialMaximum)));
+    result.set_partial_sum(interop::convert_from_daal_homogen_table<Float>(
+        daal_partial_result.get(daal_lom::PartialResultId::partialSum)));
+    result.set_partial_sum_squares(interop::convert_from_daal_homogen_table<Float>(
+        daal_partial_result.get(daal_lom::PartialResultId::partialSumSquares)));
+    result.set_partial_sum_squares_centered(interop::convert_from_daal_homogen_table<Float>(
+        daal_partial_result.get(daal_lom::PartialResultId::partialSumSquaresCentered)));
+
+    return result;
+}
+
+template <typename Float, typename Task>
+result_t call_daal_kernel_with_weights(const context_cpu& ctx,
+                                       const descriptor_t& desc,
+                                       const partial_compute_input<Task>& input) {
+    auto data = input.get_data();
+    auto weights = input.get_weights();
+    ONEDAL_ASSERT(data.has_data());
+    ONEDAL_ASSERT(weights.has_data());
+
+    constexpr bool is_online = true;
+
+    ONEDAL_ASSERT(weights.get_row_count() == data.get_row_count());
+    ONEDAL_ASSERT(weights.get_column_count() == std::int64_t(1));
+
+    auto daal_input = daal_lom::Input();
+    auto daal_partial = daal_lom::PartialResult();
+
+    const auto input_ = input.get_prev();
+    row_accessor<const Float> data_accessor(data);
+    row_accessor<const Float> weights_accessor(weights);
+    const auto result_ids = daal_lom::estimatesAll;
+    const auto daal_parameter = daal_lom::Parameter(result_ids);
+    auto weights_arr = weights_accessor.pull();
+    auto gen_data_block = data_accessor.pull();
+    auto data_arr = copy_immutable(std::move(gen_data_block));
+    {
+        auto data_ndarr =
+            pr::ndarray<Float, 2>::wrap_mutable(data_arr,
+                                                { data.get_row_count(), data.get_column_count() });
+        auto weights_ndarr = pr::ndarray<Float, 1>::wrap(weights_arr, data.get_row_count());
+
+        apply_weights(ctx, weights_ndarr, data_ndarr);
+    }
+
+    const auto daal_data = interop::convert_to_daal_homogen_table<Float>(data_arr,
+                                                                         data.get_row_count(),
+                                                                         data.get_column_count());
+
+    daal_input.set(daal_lom::InputId::data, daal_data);
+    {
+        alloc_result<Float>(daal_partial, &daal_input, &daal_parameter, result_ids);
+        initialize_result<Float>(daal_partial, &daal_input, &daal_parameter, result_ids);
+    }
+    const bool has_nobs_data = input_.get_partial_n_rows().has_data();
+    if (has_nobs_data) {
+        auto daal_partial_max =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_max());
+        auto daal_partial_min =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_min());
+        auto daal_partial_sums =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_sum());
+        auto daal_partial_sum_squares =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_sum_squares());
+        auto daal_partial_sum_squares_centered =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_sum_squares_centered());
+        auto daal_nobs = interop::copy_to_daal_homogen_table<Float>(input_.get_partial_n_rows());
+        daal_partial.set(daal_lom::PartialResultId::nObservations, daal_nobs);
+
+        daal_partial.set(daal_lom::PartialResultId::partialMaximum, daal_partial_max);
+        daal_partial.set(daal_lom::PartialResultId::partialMinimum, daal_partial_min);
+        daal_partial.set(daal_lom::PartialResultId::partialSum, daal_partial_sums);
+        daal_partial.set(daal_lom::PartialResultId::partialSumSquaresCentered,
+                         daal_partial_sum_squares_centered);
+
+        daal_partial.set(daal_lom::PartialResultId::partialSumSquares, daal_partial_sum_squares);
+        {
+            interop::status_to_exception(
+                interop::call_daal_kernel<Float, daal_lom_online_kernel_t>(ctx,
+                                                                           daal_data.get(),
+                                                                           &daal_partial,
+                                                                           &daal_parameter,
+                                                                           is_online));
+        }
+        auto result = get_partial_result<Float, task_t>(daal_partial);
+
+        return result;
+    }
+    else {
+        {
+            interop::status_to_exception(
+                interop::call_daal_kernel<Float, daal_lom_online_kernel_t>(ctx,
+                                                                           daal_data.get(),
+                                                                           &daal_partial,
+                                                                           &daal_parameter,
+                                                                           is_online));
+        }
+        auto result = get_partial_result<Float, task_t>(daal_partial);
+        return result;
+    }
+}
+
+template <typename Float, typename Task>
+result_t call_daal_kernel_without_weights(const context_cpu& ctx,
+                                          const descriptor_t& desc,
+                                          const partial_compute_input<Task>& input) {
+    auto data = input.get_data();
+    ONEDAL_ASSERT(data.has_data());
+    constexpr bool is_online = true;
+
+    auto daal_input = daal_lom::Input();
+    auto daal_partial = daal_lom::PartialResult();
+
+    const auto input_ = input.get_prev();
+
+    const auto result_ids = daal_lom::estimatesAll;
+    const auto daal_parameter = daal_lom::Parameter(result_ids);
+
+    const auto daal_data = interop::convert_to_daal_table<Float>(data);
+
+    daal_input.set(daal_lom::InputId::data, daal_data);
+    const bool has_nobs_data = input_.get_partial_n_rows().has_data();
+    {
+        alloc_result<Float>(daal_partial, &daal_input, &daal_parameter, result_ids);
+        initialize_result<Float>(daal_partial, &daal_input, &daal_parameter, result_ids);
+    }
+    if (has_nobs_data) {
+        auto daal_partial_max =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_max());
+        auto daal_partial_min =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_min());
+        auto daal_partial_sums =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_sum());
+        auto daal_partial_sum_squares =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_sum_squares());
+        auto daal_partial_sum_squares_centered =
+            interop::copy_to_daal_homogen_table<Float>(input_.get_partial_sum_squares_centered());
+        auto daal_nobs = interop::copy_to_daal_homogen_table<Float>(input_.get_partial_n_rows());
+
+        daal_partial.set(daal_lom::PartialResultId::nObservations, daal_nobs);
+
+        daal_partial.set(daal_lom::PartialResultId::partialMaximum, daal_partial_max);
+        daal_partial.set(daal_lom::PartialResultId::partialMinimum, daal_partial_min);
+        daal_partial.set(daal_lom::PartialResultId::partialSum, daal_partial_sums);
+        daal_partial.set(daal_lom::PartialResultId::partialSumSquaresCentered,
+                         daal_partial_sum_squares_centered);
+
+        daal_partial.set(daal_lom::PartialResultId::partialSumSquares, daal_partial_sum_squares);
+
+        interop::status_to_exception(
+            interop::call_daal_kernel<Float, daal_lom_online_kernel_t>(ctx,
+                                                                       daal_data.get(),
+                                                                       &daal_partial,
+                                                                       &daal_parameter,
+                                                                       is_online));
+        auto result = get_partial_result<Float, task_t>(daal_partial);
+        return result;
+    }
+    else {
+        {
+            interop::status_to_exception(
+                interop::call_daal_kernel<Float, daal_lom_online_kernel_t>(ctx,
+                                                                           daal_data.get(),
+                                                                           &daal_partial,
+                                                                           &daal_parameter,
+                                                                           is_online));
+        }
+        auto result = get_partial_result<Float, task_t>(daal_partial);
+        return result;
+    }
+}
+
+template <typename Float, typename Task>
+static partial_compute_result<Task> partial_compute(const context_cpu& ctx,
+                                                    const descriptor_t& desc,
+                                                    const partial_compute_input<Task>& input) {
+    if (input.get_weights().has_data()) {
+        return call_daal_kernel_with_weights<Float>(ctx, desc, input);
+    }
+    else {
+        return call_daal_kernel_without_weights<Float, Task>(ctx, desc, input);
+    }
+}
+
+template <typename Float>
+struct partial_compute_kernel_cpu<Float, method_t, task_t> {
+    partial_compute_result<task::compute> operator()(
+        const context_cpu& ctx,
+        const descriptor_t& desc,
+        const partial_compute_input<task::compute>& input) const {
+        return partial_compute<Float, task::compute>(ctx, desc, input);
+    }
+};
+
+template struct partial_compute_kernel_cpu<float, method_t, task_t>;
+template struct partial_compute_kernel_cpu<double, method_t, task_t>;
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel.hpp b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel.hpp
new file mode 100644
index 00000000000..478917638d9
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::backend {
+
+template <typename Float, typename Method, typename Task>
+struct finalize_compute_kernel_gpu {
+    compute_result<Task> operator()(const dal::backend::context_gpu& ctx,
+                                    const detail::descriptor_base<Task>& params,
+                                    const partial_compute_result<Task>& input) const;
+};
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel_dense_dpc.cpp b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel_dense_dpc.cpp
new file mode 100644
index 00000000000..86b8a0f540a
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel_dense_dpc.cpp
@@ -0,0 +1,179 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel.hpp"
+
+#include "oneapi/dal/backend/primitives/reduction.hpp"
+#include "oneapi/dal/backend/primitives/utils.hpp"
+#include "oneapi/dal/util/common.hpp"
+#include "oneapi/dal/detail/policy.hpp"
+#include "oneapi/dal/table/row_accessor.hpp"
+
+namespace oneapi::dal::basic_statistics::backend {
+
+namespace bk = dal::backend;
+namespace pr = oneapi::dal::backend::primitives;
+using alloc = sycl::usm::alloc;
+
+using bk::context_gpu;
+using task_t = task::compute;
+using input_t = partial_compute_result<task_t>;
+using result_t = compute_result<task_t>;
+using descriptor_t = detail::descriptor_base<task::compute>;
+
+template <typename Float>
+auto compute_all_metrics(sycl::queue& q,
+                         const pr::ndview<Float, 1>& sums,
+                         const pr::ndview<Float, 1>& sums2,
+                         const pr::ndview<Float, 1>& sums2cent,
+                         const pr::ndview<Float, 1>& nobs,
+                         std::size_t column_count,
+                         const dal::backend::event_vector& deps = {}) {
+    ONEDAL_PROFILER_TASK(compute_all_metrics, q);
+    auto result_means = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_variance = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_raw_moment = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_variation = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_stddev = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+
+    auto result_means_ptr = result_means.get_mutable_data();
+    auto result_variance_ptr = result_variance.get_mutable_data();
+    auto result_raw_moment_ptr = result_raw_moment.get_mutable_data();
+    auto result_variation_ptr = result_variation.get_mutable_data();
+    auto result_stddev_ptr = result_stddev.get_mutable_data();
+
+    auto nobs_ptr = nobs.get_data();
+
+    auto sums_data = sums.get_data();
+    auto sums2_data = sums2.get_data();
+    auto sums2cent_data = sums2cent.get_data();
+    const Float inv_n = Float(1.0 / double(nobs_ptr[0]));
+    auto update_event = q.submit([&](sycl::handler& cgh) {
+        const auto range = sycl::range<1>(column_count);
+
+        cgh.depends_on(deps);
+        cgh.parallel_for(range, [=](sycl::item<1> id) {
+            result_means_ptr[id] = sums_data[id] / nobs_ptr[0];
+            result_variance_ptr[id] = sums2cent_data[id] / (nobs_ptr[0] - 1);
+
+            result_raw_moment_ptr[id] = sums2_data[id] * inv_n;
+
+            result_stddev_ptr[id] = sycl::sqrt(result_variance_ptr[id]);
+
+            result_variation_ptr[id] = result_stddev_ptr[id] / result_means_ptr[id];
+        });
+    });
+
+    return std::make_tuple(result_means,
+                           result_variance,
+                           result_raw_moment,
+                           result_variation,
+                           result_stddev,
+                           update_event);
+}
+
+template <typename Float, typename Task>
+static compute_result<Task> finalize_compute(const context_gpu& ctx,
+                                             const descriptor_t& desc,
+                                             const partial_compute_result<Task>& input) {
+    auto& q_ = ctx.get_queue();
+    result_t res;
+
+    auto column_count = input.get_partial_sum_squares().get_column_count();
+    ONEDAL_ASSERT(column_count > 0);
+
+    const auto res_op = desc.get_result_options();
+    res.set_result_options(desc.get_result_options());
+
+    const auto sums_nd =
+        pr::table2ndarray_1d<Float>(q_, input.get_partial_sum(), sycl::usm::alloc::device);
+    const auto nobs_nd = pr::table2ndarray_1d<Float>(q_, input.get_partial_n_rows());
+
+    const auto sums2_nd =
+        pr::table2ndarray_1d<Float>(q_, input.get_partial_sum_squares(), sycl::usm::alloc::device);
+    const auto sums2cent_nd = pr::table2ndarray_1d<Float>(q_,
+                                                          input.get_partial_sum_squares_centered(),
+                                                          sycl::usm::alloc::device);
+    if (res_op.test(result_options::min)) {
+        ONEDAL_ASSERT(input.get_partial_min().get_column_count() == column_count);
+        res.set_min(input.get_partial_min());
+    }
+    if (res_op.test(result_options::max)) {
+        ONEDAL_ASSERT(input.get_partial_max().get_column_count() == column_count);
+        res.set_max(input.get_partial_max());
+    }
+    if (res_op.test(result_options::sum)) {
+        ONEDAL_ASSERT(input.get_partial_sum().get_column_count() == column_count);
+        res.set_sum(input.get_partial_sum());
+    }
+    if (res_op.test(result_options::sum_squares)) {
+        ONEDAL_ASSERT(input.get_partial_sum_squares().get_column_count() == column_count);
+        res.set_sum_squares(input.get_partial_sum_squares());
+    }
+
+    if (res_op.test(result_options::sum_squares_centered)) {
+        ONEDAL_ASSERT(input.get_partial_sum_squares_centered().get_column_count() == column_count);
+        res.set_sum_squares_centered(input.get_partial_sum_squares_centered());
+    }
+
+    auto [result_means,
+          result_variance,
+          result_raw_moment,
+          result_variation,
+          result_stddev,
+          update_event] =
+        compute_all_metrics<Float>(q_, sums_nd, sums2_nd, sums2cent_nd, nobs_nd, column_count, {});
+    if (res_op.test(result_options::mean)) {
+        ONEDAL_ASSERT(result_means.get_dimension(0) == column_count);
+        res.set_mean(
+            homogen_table::wrap(result_means.flatten(q_, { update_event }), 1, column_count));
+    }
+    if (res_op.test(result_options::second_order_raw_moment)) {
+        ONEDAL_ASSERT(result_raw_moment.get_dimension(0) == column_count);
+        res.set_second_order_raw_moment(
+            homogen_table::wrap(result_raw_moment.flatten(q_, { update_event }), 1, column_count));
+    }
+    if (res_op.test(result_options::variance)) {
+        ONEDAL_ASSERT(result_variance.get_dimension(0) == column_count);
+        res.set_variance(
+            homogen_table::wrap(result_variance.flatten(q_, { update_event }), 1, column_count));
+    }
+    if (res_op.test(result_options::standard_deviation)) {
+        ONEDAL_ASSERT(result_stddev.get_dimension(0) == column_count);
+        res.set_standard_deviation(
+            homogen_table::wrap(result_stddev.flatten(q_, { update_event }), 1, column_count));
+    }
+    if (res_op.test(result_options::variation)) {
+        ONEDAL_ASSERT(result_variation.get_dimension(0) == column_count);
+        res.set_variation(
+            homogen_table::wrap(result_variation.flatten(q_, { update_event }), 1, column_count));
+    }
+    return res;
+}
+
+template <typename Float>
+struct finalize_compute_kernel_gpu<Float, method::dense, task::compute> {
+    result_t operator()(const context_gpu& ctx,
+                        const descriptor_t& desc,
+                        const input_t& input) const {
+        return finalize_compute<Float, task::compute>(ctx, desc, input);
+    }
+};
+
+template struct finalize_compute_kernel_gpu<float, method::dense, task::compute>;
+template struct finalize_compute_kernel_gpu<double, method::dense, task::compute>;
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel.hpp b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel.hpp
new file mode 100644
index 00000000000..8c6f5c3e55e
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::backend {
+
+template <typename Float, typename Method, typename Task>
+struct partial_compute_kernel_gpu {
+    partial_compute_result<Task> operator()(const dal::backend::context_gpu& ctx,
+                                            const detail::descriptor_base<Task>& params,
+                                            const partial_compute_input<Task>& input) const;
+};
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel_dense_dpc.cpp b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel_dense_dpc.cpp
new file mode 100644
index 00000000000..4712e5531df
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel_dense_dpc.cpp
@@ -0,0 +1,356 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel.hpp"
+
+#include "oneapi/dal/backend/common.hpp"
+#include "oneapi/dal/detail/common.hpp"
+#include "oneapi/dal/detail/policy.hpp"
+#include "oneapi/dal/detail/profiler.hpp"
+#include "oneapi/dal/backend/memory.hpp"
+#include "oneapi/dal/backend/primitives/utils.hpp"
+#include "oneapi/dal/util/common.hpp"
+#include "oneapi/dal/backend/primitives/reduction.hpp"
+
+namespace oneapi::dal::basic_statistics::backend {
+
+namespace bk = dal::backend;
+namespace pr = oneapi::dal::backend::primitives;
+
+using alloc = sycl::usm::alloc;
+
+using bk::context_gpu;
+using task_t = task::compute;
+using input_t = partial_compute_input<task_t>;
+using result_t = partial_compute_result<task_t>;
+using descriptor_t = detail::descriptor_base<task_t>;
+
+template <typename Float>
+auto update_partial_results(sycl::queue& q,
+                            const pr::ndview<Float, 1>& min,
+                            const pr::ndview<Float, 1>& current_min,
+                            const pr::ndview<Float, 1>& max,
+                            const pr::ndview<Float, 1>& current_max,
+                            const pr::ndview<Float, 1>& sums,
+                            const pr::ndview<Float, 1>& current_sums,
+                            const pr::ndview<Float, 1>& sums2,
+                            const pr::ndview<Float, 1>& current_sums2,
+                            const pr::ndview<Float, 1>& sums2cent,
+                            const pr::ndview<Float, 1>& current_sums2cent,
+                            const std::int64_t column_count,
+                            const std::int64_t row_count,
+                            const pr::ndview<Float, 1>& nobs,
+                            const dal::backend::event_vector& deps = {}) {
+    ONEDAL_PROFILER_TASK(update_partial_results, q);
+
+    auto result_min = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_max = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_sums = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_sums2 = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+    auto result_sums2cent = pr::ndarray<Float, 1>::empty(q, column_count, alloc::device);
+
+    auto result_min_ptr = result_min.get_mutable_data();
+    auto result_max_ptr = result_max.get_mutable_data();
+    auto result_sums_ptr = result_sums.get_mutable_data();
+    auto result_sums2_ptr = result_sums2.get_mutable_data();
+    auto result_sums2cent_ptr = result_sums2cent.get_mutable_data();
+
+    auto current_min_ptr = current_min.get_mutable_data();
+    auto current_max_ptr = current_max.get_mutable_data();
+    auto current_sums_ptr = current_sums.get_mutable_data();
+    auto current_sums2_ptr = current_sums2.get_mutable_data();
+
+    auto nobs_ptr = nobs.get_data();
+    auto min_data = min.get_data();
+    auto max_data = max.get_data();
+    auto sums_data = sums.get_data();
+    auto sums2_data = sums2.get_data();
+
+    auto update_event = q.submit([&](sycl::handler& cgh) {
+        const auto range = sycl::range<1>(column_count);
+
+        cgh.depends_on(deps);
+        cgh.parallel_for(range, [=](sycl::item<1> id) {
+            result_min_ptr[id] = sycl::fmin(current_min_ptr[id], min_data[id]);
+            result_max_ptr[id] = sycl::fmax(current_max_ptr[id], max_data[id]);
+
+            result_sums_ptr[id] = current_sums_ptr[id] + sums_data[id];
+
+            result_sums2_ptr[id] = current_sums2_ptr[id] + sums2_data[id];
+
+            result_sums2cent_ptr[id] =
+                result_sums2_ptr[id] - result_sums_ptr[id] * result_sums_ptr[id] / nobs_ptr[0];
+        });
+    });
+    return std::make_tuple(result_min,
+                           result_max,
+                           result_sums,
+                           result_sums2,
+                           result_sums2cent,
+                           update_event);
+}
+
+template <typename Float>
+auto apply_weights(sycl::queue& q,
+                   const pr::ndview<Float, 2>& data,
+                   std::int64_t row_count,
+                   std::int64_t column_count,
+                   const pr::ndview<Float, 1>& weights,
+                   const dal::backend::event_vector& deps = {}) {
+    ONEDAL_PROFILER_TASK(apply_weights, q);
+    auto data_to_compute =
+        pr::ndarray<Float, 2>::empty(q, { row_count, column_count }, alloc::device);
+
+    auto weights_ptr = weights.get_data();
+
+    auto data_to_compute_ptr = data_to_compute.get_mutable_data();
+
+    auto input_data = data.get_data();
+
+    auto apply_weights_event = q.submit([&](sycl::handler& cgh) {
+        const auto range = sycl::range<2>(row_count, column_count);
+
+        cgh.depends_on(deps);
+        cgh.parallel_for(range, [=](sycl::item<2> id) {
+            data_to_compute_ptr[id[0] * column_count + id[1]] =
+                input_data[id[0] * column_count + id[1]] * weights_ptr[id[0]];
+        });
+    });
+
+    return std::make_tuple(data_to_compute, apply_weights_event);
+}
+
+template <typename Float>
+auto init_computation(sycl::queue& q,
+                      const pr::ndview<Float, 2>& data,
+                      const pr::ndview<Float, 1>& nobs,
+                      std::int64_t column_count,
+                      std::int64_t row_count,
+                      const dal::backend::event_vector& deps = {}) {
+    ONEDAL_PROFILER_TASK(init_partial_results, q);
+
+    auto component_count = column_count;
+    auto current_nobs_ptr = nobs.get_data();
+    auto result_nobs = pr::ndarray<Float, 1>::empty(q, 1);
+    auto result_nobs_ptr = result_nobs.get_mutable_data();
+    auto result_max = pr::ndarray<Float, 1>::empty(q, component_count, alloc::device);
+
+    auto result_min = pr::ndarray<Float, 1>::empty(q, component_count, alloc::device);
+
+    auto result_sums = pr::ndarray<Float, 1>::empty(q, component_count, alloc::device);
+
+    auto result_sums2 = pr::ndarray<Float, 1>::empty(q, component_count, alloc::device);
+
+    auto result_sums2cent = pr::ndarray<Float, 1>::empty(q, component_count, alloc::device);
+
+    auto nobs_update_event = q.submit([&](sycl::handler& cgh) {
+        const auto range = sycl::range(1);
+
+        cgh.depends_on(deps);
+        cgh.parallel_for(range, [=](sycl::item<1> id) {
+            result_nobs_ptr[0] = current_nobs_ptr[0] + row_count;
+        });
+    });
+    auto reduce_event_min = pr::reduce_by_columns(q,
+                                                  data,
+                                                  result_min,
+                                                  pr::min<Float>{},
+                                                  pr::identity<Float>{},
+                                                  { nobs_update_event });
+    reduce_event_min.wait_and_throw();
+    auto reduce_event_max = pr::reduce_by_columns(q,
+                                                  data,
+                                                  result_max,
+                                                  pr::max<Float>{},
+                                                  pr::identity<Float>{},
+                                                  { reduce_event_min });
+    reduce_event_max.wait_and_throw();
+    auto reduce_event_sums = pr::reduce_by_columns(q,
+                                                   data,
+                                                   result_sums,
+                                                   pr::sum<Float>{},
+                                                   pr::identity<Float>{},
+                                                   { reduce_event_min });
+    reduce_event_sums.wait_and_throw();
+    auto reduce_event_sumssquares = pr::reduce_by_columns(q,
+                                                          data,
+                                                          result_sums2,
+                                                          pr::sum<Float>{},
+                                                          pr::square<Float>{},
+                                                          { reduce_event_min });
+    reduce_event_sumssquares.wait_and_throw();
+
+    return std::make_tuple(result_min,
+                           result_max,
+                           result_sums,
+                           result_sums2,
+                           result_sums2cent,
+                           result_nobs,
+                           reduce_event_sumssquares);
+}
+
+template <typename Float, typename Task>
+static partial_compute_result<Task> partial_compute(const context_gpu& ctx,
+                                                    const descriptor_t& desc,
+                                                    const partial_compute_input<Task>& input) {
+    auto& q = ctx.get_queue();
+    const auto data = input.get_data();
+    const bool weights_enabling = input.get_weights().has_data();
+    const auto weights = input.get_weights();
+    auto result = partial_compute_result();
+    const auto input_ = input.get_prev();
+    const std::int64_t row_count = data.get_row_count();
+    const std::int64_t column_count = data.get_column_count();
+    const std::int64_t component_count = data.get_column_count();
+    dal::detail::check_mul_overflow(row_count, column_count);
+    dal::detail::check_mul_overflow(column_count, column_count);
+    dal::detail::check_mul_overflow(component_count, column_count);
+
+    const auto data_nd = pr::table2ndarray<Float>(q, data, sycl::usm::alloc::device);
+
+    auto data_to_compute = data_nd;
+    sycl::event apply_weights_event;
+    if (weights_enabling) {
+        auto weights_nd = pr::table2ndarray_1d<Float>(q, weights, sycl::usm::alloc::device);
+        std::tie(data_to_compute, apply_weights_event) =
+            apply_weights(q, data_nd, row_count, column_count, weights_nd);
+    }
+
+    const bool has_nobs_data = input_.get_partial_n_rows().has_data();
+
+    if (has_nobs_data) {
+        const auto sums_nd =
+            pr::table2ndarray_1d<Float>(q, input_.get_partial_sum(), sycl::usm::alloc::device);
+        const auto nobs_nd = pr::table2ndarray_1d<Float>(q, input_.get_partial_n_rows());
+
+        const auto min_nd =
+            pr::table2ndarray_1d<Float>(q, input_.get_partial_min(), sycl::usm::alloc::device);
+        const auto max_nd = pr::table2ndarray_1d<Float>(q, input_.get_partial_max());
+
+        const auto sums2_nd = pr::table2ndarray_1d<Float>(q,
+                                                          input_.get_partial_sum_squares(),
+                                                          sycl::usm::alloc::device);
+        const auto sums2cent_nd =
+            pr::table2ndarray_1d<Float>(q,
+                                        input_.get_partial_sum_squares_centered(),
+                                        sycl::usm::alloc::device);
+        auto [partial_min,
+              partial_max,
+              partial_sums,
+              partial_sums2,
+              partial_sums2cent,
+              partial_nobs,
+              init_computation_event] = init_computation(q,
+                                                         data_to_compute,
+                                                         nobs_nd,
+                                                         column_count,
+                                                         row_count,
+                                                         { apply_weights_event });
+
+        auto [result_min,
+              result_max,
+              result_sums,
+              result_sums2,
+              result_sums2cent,
+              merge_results_event] = update_partial_results(q,
+                                                            min_nd,
+                                                            partial_min,
+                                                            max_nd,
+                                                            partial_max,
+                                                            sums_nd,
+                                                            partial_sums,
+                                                            sums2_nd,
+                                                            partial_sums2,
+                                                            sums2cent_nd,
+                                                            partial_sums2cent,
+                                                            column_count,
+                                                            row_count,
+                                                            partial_nobs,
+                                                            { init_computation_event });
+        result.set_partial_min(
+            (homogen_table::wrap(result_min.flatten(q, { merge_results_event }), 1, column_count)));
+        result.set_partial_max(
+            (homogen_table::wrap(result_max.flatten(q, { merge_results_event }), 1, column_count)));
+
+        result.set_partial_sum((
+            homogen_table::wrap(result_sums.flatten(q, { merge_results_event }), 1, column_count)));
+        result.set_partial_sum_squares(
+            (homogen_table::wrap(result_sums2.flatten(q, { merge_results_event }),
+                                 1,
+                                 column_count)));
+        result.set_partial_sum_squares_centered(
+            (homogen_table::wrap(result_sums2cent.flatten(q, { merge_results_event }),
+                                 1,
+                                 column_count)));
+        result.set_partial_n_rows(
+            (homogen_table::wrap(partial_nobs.flatten(q, { merge_results_event }), 1, 1)));
+    }
+    else {
+        auto init_nobs = pr::ndarray<Float, 1>::empty(q, 1);
+
+        auto [result_min,
+              result_max,
+              result_sums,
+              result_sums2,
+              result_sums2cent,
+              result_nobs,
+              init_computation_event] = init_computation(q,
+                                                         data_to_compute,
+                                                         init_nobs,
+                                                         column_count,
+                                                         row_count,
+                                                         { apply_weights_event });
+
+        result.set_partial_min(
+            (homogen_table::wrap(result_min.flatten(q, { init_computation_event }),
+                                 1,
+                                 column_count)));
+        result.set_partial_max(
+            (homogen_table::wrap(result_max.flatten(q, { init_computation_event }),
+                                 1,
+                                 column_count)));
+        result.set_partial_sum(
+            (homogen_table::wrap(result_sums.flatten(q, { init_computation_event }),
+                                 1,
+                                 column_count)));
+        result.set_partial_sum_squares(
+            (homogen_table::wrap(result_sums2.flatten(q, { init_computation_event }),
+                                 1,
+                                 column_count)));
+        result.set_partial_sum_squares_centered(
+            (homogen_table::wrap(result_sums2cent.flatten(q, { init_computation_event }),
+                                 1,
+                                 column_count)));
+        result.set_partial_n_rows(
+            (homogen_table::wrap(result_nobs.flatten(q, { init_computation_event }), 1, 1)));
+    }
+
+    return result;
+}
+
+template <typename Float>
+struct partial_compute_kernel_gpu<Float, method::dense, task::compute> {
+    result_t operator()(const context_gpu& ctx,
+                        const descriptor_t& desc,
+                        const input_t& input) const {
+        return partial_compute<Float, task::compute>(ctx, desc, input);
+    }
+};
+
+template struct partial_compute_kernel_gpu<float, method::dense, task::compute>;
+template struct partial_compute_kernel_gpu<double, method::dense, task::compute>;
+
+} // namespace oneapi::dal::basic_statistics::backend
diff --git a/cpp/oneapi/dal/algo/basic_statistics/common.hpp b/cpp/oneapi/dal/algo/basic_statistics/common.hpp
index e2146672486..9542c278ef3 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/common.hpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/common.hpp
@@ -19,6 +19,7 @@
 #include "oneapi/dal/detail/common.hpp"
 #include "oneapi/dal/table/common.hpp"
 #include "oneapi/dal/util/result_option_id.hpp"
+#include "oneapi/dal/common.hpp"
 
 namespace oneapi::dal::basic_statistics {
 
diff --git a/cpp/oneapi/dal/algo/basic_statistics/compute_types.cpp b/cpp/oneapi/dal/algo/basic_statistics/compute_types.cpp
index 834895ba8b2..a8d89d55031 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/compute_types.cpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/compute_types.cpp
@@ -22,6 +22,7 @@ namespace oneapi::dal::basic_statistics {
 template <typename Task>
 class detail::v1::compute_input_impl : public base {
 public:
+    compute_input_impl() : data(table()){};
     compute_input_impl(const table& data) : data(data) {}
     compute_input_impl(const table& data, const table& weights) : data(data), weights(weights) {}
     table data, weights;
@@ -44,11 +45,26 @@ class detail::v1::compute_result_impl : public base {
     result_option_id options;
 };
 
+template <typename Task>
+class detail::v1::partial_compute_result_impl : public base {
+public:
+    table nobs;
+    table partial_min;
+    table partial_max;
+    table partial_sum;
+    table partial_sum_squares;
+    table partial_sum_squares_centered;
+};
+
 using detail::v1::compute_input_impl;
 using detail::v1::compute_result_impl;
+using detail::v1::partial_compute_result_impl;
 
 namespace v1 {
 
+template <typename Task>
+compute_input<Task>::compute_input() : impl_(new compute_input_impl<Task>{}) {}
+
 template <typename Task>
 compute_input<Task>::compute_input(const table& data) : impl_(new compute_input_impl<Task>(data)) {}
 
@@ -251,8 +267,95 @@ void compute_result<Task>::set_result_options_impl(const result_option_id& value
     impl_->options = value;
 }
 
+template <typename Task>
+partial_compute_input<Task>::partial_compute_input(const table& data)
+        : compute_input<Task>(data),
+          prev_() {}
+
+template <typename Task>
+partial_compute_input<Task>::partial_compute_input() : compute_input<Task>(),
+                                                       prev_() {}
+
+template <typename Task>
+partial_compute_input<Task>::partial_compute_input(const partial_compute_result<Task>& prev,
+                                                   const table& data)
+        : compute_input<Task>(data) {
+    this->prev_ = prev;
+}
+
+template <typename Task>
+partial_compute_input<Task>::partial_compute_input(const partial_compute_result<Task>& prev,
+                                                   const table& data,
+                                                   const table& weights)
+        : compute_input<Task>(data, weights) {
+    this->prev_ = prev;
+}
+
+template <typename Task>
+const table& partial_compute_result<Task>::get_partial_n_rows() const {
+    return impl_->nobs;
+}
+
+template <typename Task>
+partial_compute_result<Task>::partial_compute_result()
+        : impl_(new partial_compute_result_impl<Task>()) {}
+
+template <typename Task>
+void partial_compute_result<Task>::set_partial_n_rows_impl(const table& value) {
+    impl_->nobs = value;
+}
+
+template <typename Task>
+const table& partial_compute_result<Task>::get_partial_min() const {
+    return impl_->partial_min;
+}
+
+template <typename Task>
+void partial_compute_result<Task>::set_partial_min_impl(const table& value) {
+    impl_->partial_min = value;
+}
+template <typename Task>
+const table& partial_compute_result<Task>::get_partial_max() const {
+    return impl_->partial_max;
+}
+
+template <typename Task>
+void partial_compute_result<Task>::set_partial_max_impl(const table& value) {
+    impl_->partial_max = value;
+}
+
+template <typename Task>
+const table& partial_compute_result<Task>::get_partial_sum() const {
+    return impl_->partial_sum;
+}
+
+template <typename Task>
+void partial_compute_result<Task>::set_partial_sum_impl(const table& value) {
+    impl_->partial_sum = value;
+}
+
+template <typename Task>
+void partial_compute_result<Task>::set_partial_sum_squares_impl(const table& value) {
+    impl_->partial_sum_squares = value;
+}
+template <typename Task>
+const table& partial_compute_result<Task>::get_partial_sum_squares() const {
+    return impl_->partial_sum_squares;
+}
+
+template <typename Task>
+void partial_compute_result<Task>::set_partial_sum_squares_centered_impl(const table& value) {
+    impl_->partial_sum_squares_centered = value;
+}
+
+template <typename Task>
+const table& partial_compute_result<Task>::get_partial_sum_squares_centered() const {
+    return impl_->partial_sum_squares_centered;
+}
 template class ONEDAL_EXPORT compute_input<task::compute>;
 template class ONEDAL_EXPORT compute_result<task::compute>;
+template class ONEDAL_EXPORT partial_compute_input<task::compute>;
+template class ONEDAL_EXPORT partial_compute_result<task::compute>;
 
 } // namespace v1
 } // namespace oneapi::dal::basic_statistics
diff --git a/cpp/oneapi/dal/algo/basic_statistics/compute_types.hpp b/cpp/oneapi/dal/algo/basic_statistics/compute_types.hpp
index e8d8095e4a3..a07e3e6c482 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/compute_types.hpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/compute_types.hpp
@@ -27,10 +27,14 @@ class compute_input_impl;
 
 template <typename Task>
 class compute_result_impl;
+
+template <typename Task>
+class partial_compute_result_impl;
 } // namespace v1
 
 using v1::compute_input_impl;
 using v1::compute_result_impl;
+using v1::partial_compute_result_impl;
 
 } // namespace detail
 
@@ -44,7 +48,7 @@ class compute_input : public base {
 
 public:
     using task_t = Task;
-
+    compute_input();
     /// Creates a new instance of the class with the given :literal:`data`
     /// property value
     compute_input(const table& data);
@@ -213,9 +217,132 @@ class compute_result : public base {
     dal::detail::pimpl<detail::compute_result_impl<Task>> impl_;
 };
 
+template <typename Task = task::by_default>
+class partial_compute_result : public base {
+    static_assert(detail::is_valid_task_v<Task>);
+
+public:
+    using task_t = Task;
+
+    partial_compute_result();
+
+    /// The nobs value.
+    /// @remark default = table{}
+    const table& get_partial_n_rows() const;
+
+    auto& set_partial_n_rows(const table& value) {
+        set_partial_n_rows_impl(value);
+        return *this;
+    }
+
+    /// A $1 \\times p$ table, where element $j$ is the minimum current result for feature $j$.
+    /// @remark default = table{}
+    const table& get_partial_min() const;
+
+    auto& set_partial_min(const table& value) {
+        set_partial_min_impl(value);
+        return *this;
+    }
+
+    /// A $1 \\times p$ table, where element $j$ is the maximum current result for feature $j$.
+    /// @remark default = table{}
+    const table& get_partial_max() const;
+
+    auto& set_partial_max(const table& value) {
+        set_partial_max_impl(value);
+        return *this;
+    }
+
+    /// A $1 \\times p$ table, where element $j$ is the sum result of current blocks for feature $j$.
+    /// @remark default = table{}
+    const table& get_partial_sum() const;
+
+    auto& set_partial_sum(const table& value) {
+        set_partial_sum_impl(value);
+        return *this;
+    }
+
+    /// A $1 \\times p$ table, where element $j$ is the sum_squares result of current blocks for feature $j$.
+    /// @remark default = table{}
+    const table& get_partial_sum_squares() const;
+
+    auto& set_partial_sum_squares(const table& value) {
+        set_partial_sum_squares_impl(value);
+        return *this;
+    }
+
+    /// A $1 \\times p$ table, where element $j$ is the sum_squares_centered result of current blocks for feature $j$.
+    /// @remark default = table{}
+    const table& get_partial_sum_squares_centered() const;
+
+    auto& set_partial_sum_squares_centered(const table& value) {
+        set_partial_sum_squares_centered_impl(value);
+        return *this;
+    }
+
+protected:
+    void set_partial_n_rows_impl(const table&);
+    void set_partial_min_impl(const table&);
+    void set_partial_max_impl(const table&);
+    void set_partial_sum_impl(const table&);
+    void set_partial_sum_squares_impl(const table&);
+    void set_partial_sum_squares_centered_impl(const table&);
+
+private:
+    dal::detail::pimpl<detail::partial_compute_result_impl<Task>> impl_;
+};
+
+template <typename Task = task::by_default>
+class partial_compute_input : protected compute_input<Task> {
+public:
+    using task_t = Task;
+
+    partial_compute_input();
+
+    partial_compute_input(const table& data);
+
+    partial_compute_input(const partial_compute_result<Task>& prev, const table& data);
+
+    partial_compute_input(const partial_compute_result<Task>& prev,
+                          const table& data,
+                          const table& weights);
+
+    const table& get_data() const {
+        return compute_input<Task>::get_data();
+    }
+
+    auto& set_data(const table& value) {
+        compute_input<Task>::set_data(value);
+        return *this;
+    }
+
+    const table& get_weights() const {
+        return compute_input<Task>::get_weights();
+    }
+
+    auto& set_weights(const table& value) {
+        compute_input<Task>::set_weights(value);
+        return *this;
+    }
+
+    const partial_compute_result<Task>& get_prev() const {
+        return prev_;
+    }
+
+    auto& set_prev(const partial_compute_result<Task>& value) {
+        prev_ = value;
+        return *this;
+    }
+
+private:
+    partial_compute_result<Task> prev_;
+};
+
 } // namespace v1
 
 using v1::compute_input;
 using v1::compute_result;
+using v1::partial_compute_input;
+using v1::partial_compute_result;
 
 } // namespace oneapi::dal::basic_statistics
diff --git a/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.cpp b/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.cpp
new file mode 100644
index 00000000000..b5e62340cb4
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.cpp
@@ -0,0 +1,43 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::detail {
+namespace v1 {
+
+template <typename Policy, typename Float, typename Method, typename Task>
+struct finalize_compute_ops_dispatcher<Policy, Float, Method, Task> {
+    compute_result<Task> operator()(const Policy& policy,
+                                    const descriptor_base<Task>& desc,
+                                    const partial_compute_result<Task>& input) const {
+        using kernel_dispatcher_t = dal::backend::kernel_dispatcher< //
+            KERNEL_SINGLE_NODE_CPU(backend::finalize_compute_kernel_cpu<Float, Method, Task>)>;
+        return kernel_dispatcher_t()(policy, desc, input);
+    }
+};
+
+#define INSTANTIATE(F, M, T)      \
+    template struct ONEDAL_EXPORT \
+        finalize_compute_ops_dispatcher<dal::detail::host_policy, F, M, T>;
+
+INSTANTIATE(float, method::dense, task::compute)
+INSTANTIATE(double, method::dense, task::compute)
+
+} // namespace v1
+} // namespace oneapi::dal::basic_statistics::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.hpp b/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.hpp
new file mode 100644
index 00000000000..563f8afe848
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.hpp
@@ -0,0 +1,72 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/detail/error_messages.hpp"
+
+namespace oneapi::dal::basic_statistics::detail {
+namespace v1 {
+
+template <typename Context, typename Float, typename Method, typename Task, typename... Options>
+struct finalize_compute_ops_dispatcher {
+    compute_result<Task> operator()(const Context&,
+                                    const descriptor_base<Task>&,
+                                    const partial_compute_result<Task>&) const;
+};
+
+template <typename Descriptor>
+struct finalize_compute_ops {
+    using float_t = typename Descriptor::float_t;
+    using method_t = typename Descriptor::method_t;
+    using task_t = typename Descriptor::task_t;
+    using input_t = partial_compute_result<task_t>;
+    using result_t = compute_result<task_t>;
+    using descriptor_base_t = descriptor_base<task_t>;
+
+    void check_preconditions(const Descriptor& params, const input_t& input) const {
+        ONEDAL_ASSERT(input.get_partial_n_rows().has_data());
+        ONEDAL_ASSERT(input.get_partial_n_rows().get_column_count() == 1);
+        ONEDAL_ASSERT(input.get_partial_n_rows().get_row_count() == 1);
+        ONEDAL_ASSERT(input.get_partial_max().has_data());
+        ONEDAL_ASSERT(input.get_partial_min().has_data());
+        ONEDAL_ASSERT(input.get_partial_sum().has_data());
+        ONEDAL_ASSERT(input.get_partial_sum_squares().has_data());
+        ONEDAL_ASSERT(input.get_partial_sum_squares_centered().has_data());
+    }
+
+    void check_postconditions(const Descriptor& params,
+                              const input_t& input,
+                              const result_t& result) const {}
+
+    template <typename Context>
+    auto operator()(const Context& ctx,
+                    const Descriptor& desc,
+                    const partial_compute_result<task_t>& input) const {
+        check_preconditions(desc, input);
+        const auto result =
+            finalize_compute_ops_dispatcher<Context, float_t, method_t, task_t>()(ctx, desc, input);
+        check_postconditions(desc, input, result);
+        return result;
+    }
+};
+
+} // namespace v1
+
+using v1::finalize_compute_ops;
+
+} // namespace oneapi::dal::basic_statistics::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops_dpc.cpp b/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops_dpc.cpp
new file mode 100644
index 00000000000..a16d1123295
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops_dpc.cpp
@@ -0,0 +1,45 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/cpu/finalize_compute_kernel.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/gpu/finalize_compute_kernel.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::detail {
+namespace v1 {
+
+template <typename Policy, typename Float, typename Method, typename Task>
+struct finalize_compute_ops_dispatcher<Policy, Float, Method, Task> {
+    compute_result<Task> operator()(const Policy& policy,
+                                    const descriptor_base<Task>& desc,
+                                    const partial_compute_result<Task>& input) const {
+        using kernel_dispatcher_t = dal::backend::kernel_dispatcher< //
+            KERNEL_SINGLE_NODE_CPU(backend::finalize_compute_kernel_cpu<Float, Method, Task>),
+            KERNEL_SINGLE_NODE_GPU(backend::finalize_compute_kernel_gpu<Float, Method, Task>)>;
+        return kernel_dispatcher_t()(policy, desc, input);
+    }
+};
+
+#define INSTANTIATE(F, M, T)      \
+    template struct ONEDAL_EXPORT \
+        finalize_compute_ops_dispatcher<dal::detail::data_parallel_policy, F, M, T>;
+
+INSTANTIATE(float, method::dense, task::compute)
+INSTANTIATE(double, method::dense, task::compute)
+
+} // namespace v1
+} // namespace oneapi::dal::basic_statistics::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.cpp b/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.cpp
new file mode 100644
index 00000000000..f8b3ab06dfc
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.cpp
@@ -0,0 +1,42 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::detail {
+namespace v1 {
+
+template <typename Policy, typename Float, typename Method, typename Task>
+struct partial_compute_ops_dispatcher<Policy, Float, Method, Task> {
+    partial_compute_result<Task> operator()(const Policy& policy,
+                                            const descriptor_base<Task>& desc,
+                                            const partial_compute_input<Task>& input) const {
+        using kernel_dispatcher_t = dal::backend::kernel_dispatcher< //
+            KERNEL_SINGLE_NODE_CPU(backend::partial_compute_kernel_cpu<Float, Method, Task>)>;
+        return kernel_dispatcher_t()(policy, desc, input);
+    }
+};
+
+#define INSTANTIATE(F, M, T) \
+    template struct ONEDAL_EXPORT partial_compute_ops_dispatcher<dal::detail::host_policy, F, M, T>;
+
+INSTANTIATE(float, method::dense, task::compute)
+INSTANTIATE(double, method::dense, task::compute)
+
+} // namespace v1
+} // namespace oneapi::dal::basic_statistics::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.hpp b/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.hpp
new file mode 100644
index 00000000000..5aa236cc3b6
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.hpp
@@ -0,0 +1,79 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/detail/error_messages.hpp"
+
+namespace oneapi::dal::basic_statistics::detail {
+namespace v1 {
+
+template <typename Context, typename Float, typename Method, typename Task, typename... Options>
+struct partial_compute_ops_dispatcher {
+    partial_compute_result<Task> operator()(const Context&,
+                                            const descriptor_base<Task>&,
+                                            const partial_compute_input<Task>&) const;
+};
+
+template <typename Descriptor>
+struct partial_compute_ops {
+    using float_t = typename Descriptor::float_t;
+    using method_t = typename Descriptor::method_t;
+    using task_t = typename Descriptor::task_t;
+    using input_t = partial_compute_input<task_t>;
+    using result_t = partial_compute_result<task_t>;
+    using descriptor_base_t = descriptor_base<task_t>;
+
+    void check_preconditions(const Descriptor& params, const input_t& input) const {
+        using msg = dal::detail::error_messages;
+        const auto& data = input.get_data();
+        if (!input.get_data().has_data()) {
+            throw domain_error(msg::input_data_is_empty());
+        }
+        const auto& weights = input.get_weights();
+        if (weights.has_data()) {
+            const auto r_count = weights.get_row_count();
+            if (r_count != data.get_row_count())
+                throw domain_error(msg::weight_dimension_doesnt_match_data_dimension());
+
+            const auto c_count = weights.get_column_count();
+            if (c_count != std::int64_t(1))
+                throw domain_error(msg::weights_column_count_ne_1());
+        }
+    }
+
+    void check_postconditions(const Descriptor& params,
+                              const input_t& input,
+                              const result_t& result) const {}
+
+    template <typename Context>
+    auto operator()(const Context& ctx,
+                    const Descriptor& desc,
+                    const partial_compute_input<task_t>& input) const {
+        check_preconditions(desc, input);
+        const auto result =
+            partial_compute_ops_dispatcher<Context, float_t, method_t, task_t>()(ctx, desc, input);
+        check_postconditions(desc, input, result);
+        return result;
+    }
+};
+
+} // namespace v1
+
+using v1::partial_compute_ops;
+
+} // namespace oneapi::dal::basic_statistics::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops_dpc.cpp b/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops_dpc.cpp
new file mode 100644
index 00000000000..958c2a8eec4
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/detail/partial_compute_ops_dpc.cpp
@@ -0,0 +1,45 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/cpu/partial_compute_kernel.hpp"
+#include "oneapi/dal/algo/basic_statistics/backend/gpu/partial_compute_kernel.hpp"
+#include "oneapi/dal/backend/dispatcher.hpp"
+
+namespace oneapi::dal::basic_statistics::detail {
+namespace v1 {
+
+template <typename Policy, typename Float, typename Method, typename Task>
+struct partial_compute_ops_dispatcher<Policy, Float, Method, Task> {
+    partial_compute_result<Task> operator()(const Policy& policy,
+                                            const descriptor_base<Task>& desc,
+                                            const partial_compute_input<Task>& input) const {
+        using kernel_dispatcher_t = dal::backend::kernel_dispatcher< //
+            KERNEL_SINGLE_NODE_CPU(backend::partial_compute_kernel_cpu<Float, Method, Task>),
+            KERNEL_SINGLE_NODE_GPU(backend::partial_compute_kernel_gpu<Float, Method, Task>)>;
+        return kernel_dispatcher_t()(policy, desc, input);
+    }
+};
+
+#define INSTANTIATE(F, M, T)      \
+    template struct ONEDAL_EXPORT \
+        partial_compute_ops_dispatcher<dal::detail::data_parallel_policy, F, M, T>;
+
+INSTANTIATE(float, method::dense, task::compute)
+INSTANTIATE(double, method::dense, task::compute)
+
+} // namespace v1
+} // namespace oneapi::dal::basic_statistics::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/finalize_compute.hpp b/cpp/oneapi/dal/algo/basic_statistics/finalize_compute.hpp
new file mode 100644
index 00000000000..64a051a8863
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/finalize_compute.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/algo/basic_statistics/detail/finalize_compute_ops.hpp"
+#include "oneapi/dal/finalize_compute.hpp"
+
+namespace oneapi::dal::detail {
+namespace v1 {
+
+template <typename Descriptor>
+struct finalize_compute_ops<Descriptor, dal::basic_statistics::detail::descriptor_tag>
+        : dal::basic_statistics::detail::finalize_compute_ops<Descriptor> {};
+
+} // namespace v1
+} // namespace oneapi::dal::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/partial_compute.hpp b/cpp/oneapi/dal/algo/basic_statistics/partial_compute.hpp
new file mode 100644
index 00000000000..540bbef43cf
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/partial_compute.hpp
@@ -0,0 +1,31 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "oneapi/dal/algo/basic_statistics/compute_types.hpp"
+#include "oneapi/dal/algo/basic_statistics/detail/partial_compute_ops.hpp"
+#include "oneapi/dal/partial_compute.hpp"
+
+namespace oneapi::dal::detail {
+namespace v1 {
+
+template <typename Descriptor>
+struct partial_compute_ops<Descriptor, dal::basic_statistics::detail::descriptor_tag>
+        : dal::basic_statistics::detail::partial_compute_ops<Descriptor> {};
+
+} // namespace v1
+} // namespace oneapi::dal::detail
diff --git a/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp b/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp
index 1b38ba2e696..6f911a9aff9 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/test/fixture.hpp
@@ -19,7 +19,8 @@
 #include <limits>
 
 #include "oneapi/dal/algo/basic_statistics/compute.hpp"
-
+#include "oneapi/dal/algo/basic_statistics/partial_compute.hpp"
+#include "oneapi/dal/algo/basic_statistics/finalize_compute.hpp"
 #include "oneapi/dal/test/engine/common.hpp"
 #include "oneapi/dal/test/engine/fixtures.hpp"
 #include "oneapi/dal/test/engine/dataframe.hpp"
@@ -49,6 +50,31 @@ class basic_statistics_test : public te::crtp_algo_fixture<TestType, Derived> {
     te::table_id get_homogen_table_id() const {
         return te::table_id::homogen<float_t>();
     }
+    template <typename Float>
+    std::vector<dal::table> split_table_by_rows(const dal::table& t, std::int64_t split_count) {
+        ONEDAL_ASSERT(0l < split_count);
+        ONEDAL_ASSERT(split_count <= t.get_row_count());
+
+        const std::int64_t row_count = t.get_row_count();
+        const std::int64_t column_count = t.get_column_count();
+        const std::int64_t block_size_regular = row_count / split_count;
+        const std::int64_t block_size_tail = row_count % split_count;
+
+        std::vector<dal::table> result(split_count);
+
+        std::int64_t row_offset = 0;
+        for (std::int64_t i = 0; i < split_count; i++) {
+            const std::int64_t tail = std::int64_t(i + 1 == split_count) * block_size_tail;
+            const std::int64_t block_size = block_size_regular + tail;
+
+            const auto row_range = dal::range{ row_offset, row_offset + block_size };
+            const auto block = dal::row_accessor<const Float>{ t }.pull(row_range);
+            result[i] = dal::homogen_table::wrap(block, block_size, column_count);
+            row_offset += block_size;
+        }
+
+        return result;
+    }
 
     void general_checks(const te::dataframe& data_fr,
                         std::shared_ptr<te::dataframe> weights_fr,
@@ -74,6 +100,42 @@ class basic_statistics_test : public te::crtp_algo_fixture<TestType, Derived> {
         check_for_exception_for_non_requested_results(compute_mode, compute_result);
     }
 
+    void online_general_checks(const te::dataframe& data_fr,
+                               std::shared_ptr<te::dataframe> weights_fr,
+                               bs::result_option_id compute_mode) {
+        const auto use_weights = bool(weights_fr);
+        CAPTURE(use_weights, compute_mode);
+        const std::int64_t nBlocks = 10;
+        const auto bs_desc = get_descriptor(compute_mode);
+        const auto data_table_id = this->get_homogen_table_id();
+
+        table weights, data = data_fr.get_table(this->get_policy(), data_table_id);
+        dal::basic_statistics::partial_compute_result<> partial_result;
+
+        auto input_table = split_table_by_rows<double>(data, nBlocks);
+        if (use_weights) {
+            weights = weights_fr->get_table(this->get_policy(), data_table_id);
+            auto weights_table = split_table_by_rows<double>(weights, nBlocks);
+            for (std::int64_t i = 0; i < nBlocks; ++i) {
+                partial_result = this->partial_compute(bs_desc,
+                                                       partial_result,
+                                                       input_table[i],
+                                                       weights_table[i]);
+            }
+            auto compute_result = this->finalize_compute(bs_desc, partial_result);
+            check_compute_result(compute_mode, data, weights, compute_result);
+            check_for_exception_for_non_requested_results(compute_mode, compute_result);
+        }
+        else {
+            for (std::int64_t i = 0; i < nBlocks; ++i) {
+                partial_result = this->partial_compute(bs_desc, partial_result, input_table[i]);
+            }
+            auto compute_result = this->finalize_compute(bs_desc, partial_result);
+            check_compute_result(compute_mode, data, weights, compute_result);
+            check_for_exception_for_non_requested_results(compute_mode, compute_result);
+        }
+    }
+
     void check_compute_result(bs::result_option_id compute_mode,
                               const table& data,
                               const table& weights,
diff --git a/cpp/oneapi/dal/algo/basic_statistics/test/online.cpp b/cpp/oneapi/dal/algo/basic_statistics/test/online.cpp
new file mode 100644
index 00000000000..e974ac4fe26
--- /dev/null
+++ b/cpp/oneapi/dal/algo/basic_statistics/test/online.cpp
@@ -0,0 +1,65 @@
+/*******************************************************************************
+* Copyright 2021 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics/test/fixture.hpp"
+#include "oneapi/dal/test/engine/tables.hpp"
+#include "oneapi/dal/test/engine/io.hpp"
+
+namespace oneapi::dal::basic_statistics::test {
+
+namespace te = dal::test::engine;
+namespace la = te::linalg;
+namespace bs = oneapi::dal::basic_statistics;
+
+template <typename TestType>
+class basic_statistics_online_test
+        : public basic_statistics_test<TestType, basic_statistics_online_test<TestType>> {};
+
+TEMPLATE_LIST_TEST_M(basic_statistics_online_test,
+                     "basic_statistics common flow no weights",
+                     "[basic_statistics][integration][online]",
+                     basic_statistics_types) {
+    SKIP_IF(this->not_float64_friendly());
+    const te::dataframe data =
+        GENERATE_DATAFRAME(te::dataframe_builder{ 100, 10 }.fill_normal(-30, 30, 7777),
+                           te::dataframe_builder{ 200, 20 }.fill_normal(-30, 30, 7777),
+                           te::dataframe_builder{ 200, 530 }.fill_normal(-30, 30, 7777),
+                           te::dataframe_builder{ 500, 250 }.fill_normal(0, 1, 7777),
+                           te::dataframe_builder{ 6000, 20 }.fill_normal(-30, 30, 7777),
+                           te::dataframe_builder{ 6000, 530 }.fill_normal(-30, 30, 7777),
+                           te::dataframe_builder{ 10000, 200 }.fill_normal(-30, 30, 7777),
+                           te::dataframe_builder{ 1000000, 20 }.fill_normal(-0.5, 0.5, 7777));
+
+    std::shared_ptr<te::dataframe> weights;
+    const bool use_weights = GENERATE(0, 1);
+
+    if (use_weights) {
+        const auto row_count = data.get_row_count();
+        weights = std::make_shared<te::dataframe>(
+            te::dataframe_builder{ row_count, 1 }.fill_normal(0, 1, 777).build());
+    }
+
+    const bs::result_option_id res_min_max = result_options::min | result_options::max;
+    const bs::result_option_id res_mean_varc = result_options::mean | result_options::variance;
+    const bs::result_option_id res_all =
+        bs::result_option_id(dal::result_option_id_base(mask_full));
+
+    const bs::result_option_id compute_mode = GENERATE_COPY(res_min_max, res_mean_varc, res_all);
+
+    this->online_general_checks(data, weights, compute_mode);
+}
+
+} // namespace oneapi::dal::basic_statistics::test
diff --git a/cpp/oneapi/dal/algo/basic_statistics/test/spmd.cpp b/cpp/oneapi/dal/algo/basic_statistics/test/spmd.cpp
index b8b8bd9d43d..bb79a096d9a 100644
--- a/cpp/oneapi/dal/algo/basic_statistics/test/spmd.cpp
+++ b/cpp/oneapi/dal/algo/basic_statistics/test/spmd.cpp
@@ -83,8 +83,6 @@ class basic_statistics_spmd_test
     std::int64_t rank_count_;
 };
 
-using basic_statistics_types = COMBINE_TYPES((float, double), (basic_statistics::method::dense));
-
 TEMPLATE_LIST_TEST_M(basic_statistics_spmd_test,
                      "basic_statistics common flow",
                      "[basic_statistics][integration][spmd]",
diff --git a/cpp/oneapi/dal/algo/covariance/backend/gpu/partial_compute_kernel_dense_dpc.cpp b/cpp/oneapi/dal/algo/covariance/backend/gpu/partial_compute_kernel_dense_dpc.cpp
index 2a16084e89a..75ad9a05f81 100644
--- a/cpp/oneapi/dal/algo/covariance/backend/gpu/partial_compute_kernel_dense_dpc.cpp
+++ b/cpp/oneapi/dal/algo/covariance/backend/gpu/partial_compute_kernel_dense_dpc.cpp
@@ -78,7 +78,6 @@ auto compute_crossproduct(sycl::queue& q,
 
 template <typename Float>
 auto init(sycl::queue& q,
-
           const std::int64_t row_count,
           const dal::backend::event_vector& deps = {}) {
     ONEDAL_PROFILER_TASK(init_partial_results, q);
@@ -202,10 +201,7 @@ static partial_compute_result<Task> partial_compute(const context_gpu& ctx,
             homogen_table::wrap(result_nobs.flatten(q, { update_event }), 1, 1));
     }
     else {
-        auto [result_nobs, init_event] = init<Float>(q,
-
-                                                     row_count,
-                                                     { crossproduct_event });
+        auto [result_nobs, init_event] = init<Float>(q, row_count, { crossproduct_event });
 
         result.set_partial_sum(
             homogen_table::wrap(sums.flatten(q, { init_event }), 1, column_count));
diff --git a/cpp/oneapi/dal/finalize_compute.hpp b/cpp/oneapi/dal/finalize_compute.hpp
index fb761b42371..d7c0b1260d1 100644
--- a/cpp/oneapi/dal/finalize_compute.hpp
+++ b/cpp/oneapi/dal/finalize_compute.hpp
@@ -19,7 +19,7 @@
 #include "oneapi/dal/detail/finalize_compute_ops.hpp"
 #include "oneapi/dal/detail/spmd_policy.hpp"
 #include "oneapi/dal/spmd/communicator.hpp"
-//TODO: move partial compute into preview(detail::data_parallel_policy is not in preview)
+
 namespace oneapi::dal {
 namespace v1 {
 
diff --git a/cpp/oneapi/dal/partial_compute.hpp b/cpp/oneapi/dal/partial_compute.hpp
index 5f4e7ad62d3..820f74f9685 100644
--- a/cpp/oneapi/dal/partial_compute.hpp
+++ b/cpp/oneapi/dal/partial_compute.hpp
@@ -19,7 +19,7 @@
 #include "oneapi/dal/detail/partial_compute_ops.hpp"
 #include "oneapi/dal/detail/spmd_policy.hpp"
 #include "oneapi/dal/spmd/communicator.hpp"
-//TODO: move partial compute into preview(detail::data_parallel_policy is not in preview)
+
 namespace oneapi::dal {
 namespace v1 {
 
diff --git a/examples/oneapi/cpp/BUILD b/examples/oneapi/cpp/BUILD
index 3dec717aebe..e473cddd284 100644
--- a/examples/oneapi/cpp/BUILD
+++ b/examples/oneapi/cpp/BUILD
@@ -67,6 +67,7 @@ dal_example_suite(
 
 dal_algo_example_suite(
     algos = [
+        "basic_statistics",
         "connected_components",
         "covariance",
         "dbscan",
diff --git a/examples/oneapi/cpp/source/basic_statistics/basic_statistics_dense_online.cpp b/examples/oneapi/cpp/source/basic_statistics/basic_statistics_dense_online.cpp
new file mode 100644
index 00000000000..ed109127870
--- /dev/null
+++ b/examples/oneapi/cpp/source/basic_statistics/basic_statistics_dense_online.cpp
@@ -0,0 +1,52 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "oneapi/dal/algo/basic_statistics.hpp"
+#include "oneapi/dal/io/csv.hpp"
+
+#include "example_util/utils.hpp"
+
+namespace dal = oneapi::dal;
+
+int main(int argc, char const *argv[]) {
+    const auto data_file_name = get_data_path("covcormoments_dense.csv");
+    const std::int64_t nBlocks = 10;
+
+    const auto data = dal::read<dal::table>(dal::csv::data_source{ data_file_name });
+    const auto bs_desc = dal::basic_statistics::descriptor{};
+
+    dal::basic_statistics::partial_compute_result<> partial_result;
+
+    auto input_table = split_table_by_rows<double>(data, nBlocks);
+    for (std::int64_t i = 0; i < nBlocks; i++) {
+        partial_result = dal::partial_compute(bs_desc, partial_result, input_table[i]);
+    }
+    auto result = dal::finalize_compute(bs_desc, partial_result);
+
+    std::cout << "Minimum:\n" << result.get_min() << std::endl;
+    std::cout << "Maximum:\n" << result.get_max() << std::endl;
+    std::cout << "Sum:\n" << result.get_sum() << std::endl;
+    std::cout << "Sum of squares:\n" << result.get_sum_squares() << std::endl;
+    std::cout << "Sum of squared difference from the means:\n"
+              << result.get_sum_squares_centered() << std::endl;
+    std::cout << "Mean:\n" << result.get_mean() << std::endl;
+    std::cout << "Second order raw moment:\n" << result.get_second_order_raw_moment() << std::endl;
+    std::cout << "Variance:\n" << result.get_variance() << std::endl;
+    std::cout << "Standard deviation:\n" << result.get_standard_deviation() << std::endl;
+    std::cout << "Variation:\n" << result.get_variation() << std::endl;
+
+    return 0;
+}
diff --git a/examples/oneapi/dpc/source/basic_statistics/basic_statistics_dense_online.cpp b/examples/oneapi/dpc/source/basic_statistics/basic_statistics_dense_online.cpp
new file mode 100644
index 00000000000..859f0967186
--- /dev/null
+++ b/examples/oneapi/dpc/source/basic_statistics/basic_statistics_dense_online.cpp
@@ -0,0 +1,67 @@
+/*******************************************************************************
+* Copyright 2023 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <sycl/sycl.hpp>
+
+#ifndef ONEDAL_DATA_PARALLEL
+#define ONEDAL_DATA_PARALLEL
+#endif
+
+#include "oneapi/dal/algo/basic_statistics.hpp"
+#include "oneapi/dal/io/csv.hpp"
+
+#include "example_util/utils.hpp"
+
+namespace dal = oneapi::dal;
+
+void run(sycl::queue &q) {
+    const auto data_file_name = get_data_path("covcormoments_dense.csv");
+    const std::int64_t nBlocks = 10;
+    const auto data = dal::read<dal::table>(q, dal::csv::data_source{ data_file_name });
+
+    const auto bs_desc = dal::basic_statistics::descriptor{};
+
+    dal::basic_statistics::partial_compute_result<> partial_result;
+
+    auto input_table = split_table_by_rows<double>(data, nBlocks);
+    for (std::int64_t i = 0; i < nBlocks; i++) {
+        partial_result = dal::partial_compute(q, bs_desc, partial_result, input_table[i]);
+    }
+    auto result = dal::finalize_compute(q, bs_desc, partial_result);
+
+    std::cout << "Minimum:\n" << result.get_min() << std::endl;
+    std::cout << "Maximum:\n" << result.get_max() << std::endl;
+    std::cout << "Sum:\n" << result.get_sum() << std::endl;
+    std::cout << "Sum of squares:\n" << result.get_sum_squares() << std::endl;
+    std::cout << "Sum of squared difference from the means:\n"
+              << result.get_sum_squares_centered() << std::endl;
+    std::cout << "Mean:\n" << result.get_mean() << std::endl;
+    std::cout << "Second order raw moment:\n" << result.get_second_order_raw_moment() << std::endl;
+    std::cout << "Variance:\n" << result.get_variance() << std::endl;
+    std::cout << "Standard deviation:\n" << result.get_standard_deviation() << std::endl;
+    std::cout << "Variation:\n" << result.get_variation() << std::endl;
+}
+
+int main(int argc, char const *argv[]) {
+    for (auto d : list_devices()) {
+        std::cout << "Running on " << d.get_platform().get_info<sycl::info::platform::name>()
+                  << ", " << d.get_info<sycl::info::device::name>() << "\n"
+                  << std::endl;
+        auto q = sycl::queue{ d };
+        run(q);
+    }
+    return 0;
+}