From 5ea755c7d984811693a3d61b10885513895e3086 Mon Sep 17 00:00:00 2001 From: Anuya Welling Date: Tue, 30 Apr 2024 14:08:53 -0500 Subject: [PATCH] [Dynamic Selection] Adding sycl profiling for auto tune policy (#1464) * Added static assert for keyArgs==Args * WIP for jit compilation * Fixing Autotune bug * Added sycl profiling without host task * Corrections in dynamic traits * Remove comments in dynamic_selection_traits * Adding back jit compolation restrictions * Adressing comments to add thread safety, better traits * Added backend trait to check if profiling is enabled * Backend traits, renaming and adding a lock to lazy report * Fixed memory leaks * No nullptr for selection handle in async waiter constructor * Added an erase and remove_if * Adressing comments for profiling report, Adding profiling to default sycl backend constructor * Changes to std::chrono::duration * Addressed comments for sycl backend * Addressed comments to sycl backend * Fix USM memory leaks and create unique task names (#1537) * test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp - fix USM shared memory leaks Signed-off-by: Sergey Kopienko * test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp - create unique task names for h.single_task([](){}); Signed-off-by: Sergey Kopienko --------- Signed-off-by: Sergey Kopienko * Fix the format of report method - using `report_duration` in second param (#1539) * include/oneapi/dpl/internal/dynamic_selection_traits.h - fix traits to specify report method second parameter type Signed-off-by: Sergey Kopienko * Change second parameter type of auto_tune_selection_type::report method - to report_duration (std::chrono::milliseconds) Signed-off-by: Sergey Kopienko --------- Signed-off-by: Sergey Kopienko * Fixing clang format --------- Signed-off-by: Sergey Kopienko Co-authored-by: Sergey Kopienko --- .../dynamic_selection_impl/auto_tune_policy.h | 23 ++- .../dynamic_selection_impl/backend_traits.h | 55 ++++++ .../dynamic_load_policy.h | 5 + .../dynamic_selection_impl/sycl_backend.h | 171 +++++++++++++++--- .../dpl/internal/dynamic_selection_traits.h | 21 ++- .../sycl/test_auto_tune_policy_sycl.pass.cpp | 163 +++++++++++------ test/support/inline_backend.h | 12 +- 7 files changed, 347 insertions(+), 103 deletions(-) create mode 100644 include/oneapi/dpl/internal/dynamic_selection_impl/backend_traits.h diff --git a/include/oneapi/dpl/internal/dynamic_selection_impl/auto_tune_policy.h b/include/oneapi/dpl/internal/dynamic_selection_impl/auto_tune_policy.h index bfeaf5cd08a..9fc901977ac 100644 --- a/include/oneapi/dpl/internal/dynamic_selection_impl/auto_tune_policy.h +++ b/include/oneapi/dpl/internal/dynamic_selection_impl/auto_tune_policy.h @@ -16,12 +16,14 @@ #include #include #include +#include #include #include #include #include #include #include "oneapi/dpl/internal/dynamic_selection_traits.h" +#include "oneapi/dpl/internal/dynamic_selection_impl/backend_traits.h" #if _DS_BACKEND_SYCL != 0 # include "oneapi/dpl/internal/dynamic_selection_impl/sycl_backend.h" #endif @@ -47,13 +49,16 @@ class auto_tune_policy using size_type = typename std::vector::size_type; using timing_t = uint64_t; + using report_clock_type = std::chrono::steady_clock; + using report_duration = std::chrono::milliseconds; + static constexpr timing_t never_resample = 0; static constexpr size_type use_best_resource = ~size_type(0); struct resource_with_index_t { wrapped_resource_t r_; - size_type index_; + size_type index_ = 0; }; struct time_data_t @@ -66,7 +71,7 @@ class auto_tune_policy { std::mutex m_; - std::chrono::steady_clock::time_point t0_; + report_clock_type::time_point t0_; timing_t best_timing_ = std::numeric_limits::max(); resource_with_index_t best_resource_; @@ -80,7 +85,7 @@ class auto_tune_policy timing_t resample_time_ = 0.0; tuner_t(resource_with_index_t br, size_type resources_size, timing_t rt) - : t0_(std::chrono::steady_clock::now()), best_resource_(br), max_resource_to_profile_(resources_size), + : t0_(report_clock_type::now()), best_resource_(br), max_resource_to_profile_(resources_size), resample_time_(rt) { } @@ -100,8 +105,8 @@ class auto_tune_policy } else { - auto now = std::chrono::steady_clock::now(); - auto ms = std::chrono::duration_cast(now - t0_).count(); + const auto now = report_clock_type::now(); + const auto ms = std::chrono::duration_cast(now - t0_).count(); if (ms < resample_time_) { return use_best_resource; @@ -169,9 +174,9 @@ class auto_tune_policy }; void - report(const execution_info::task_time_t&, const typename execution_info::task_time_t::value_type& v) const + report(const execution_info::task_time_t&, report_duration v) const { - tuner_->add_new_timing(resource_, v); + tuner_->add_new_timing(resource_, v.count()); } }; @@ -217,6 +222,10 @@ class auto_tune_policy select(Function&& f, Args&&... args) { static_assert(sizeof...(KeyArgs) == sizeof...(Args)); + if constexpr (backend_traits::lazy_report_v) + { + backend_->lazy_report(); + } if (state_) { std::lock_guard l(state_->m_); diff --git a/include/oneapi/dpl/internal/dynamic_selection_impl/backend_traits.h b/include/oneapi/dpl/internal/dynamic_selection_impl/backend_traits.h new file mode 100644 index 00000000000..26ce70171f7 --- /dev/null +++ b/include/oneapi/dpl/internal/dynamic_selection_impl/backend_traits.h @@ -0,0 +1,55 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _ONEDPL_INTERNAL_BACKEND_TRAITS_H +#define _ONEDPL_INTERNAL_BACKEND_TRAITS_H + +#include +#include + +namespace oneapi +{ +namespace dpl +{ +namespace experimental +{ +namespace internal +{ +template +auto +has_lazy_report_impl(...) -> std::false_type; + +template +auto +has_lazy_report_impl(int) -> decltype(std::declval().lazy_report(), std::true_type{}); + +template +struct has_lazy_report : decltype(has_lazy_report_impl(0)) +{ +}; + +} //namespace internal + +namespace backend_traits +{ +template +struct lazy_report_value +{ + static constexpr bool value = ::oneapi::dpl::experimental::internal::has_lazy_report::value; +}; +template +inline constexpr bool lazy_report_v = lazy_report_value::value; + +} //namespace backend_traits + +} // namespace experimental +} // namespace dpl +} // namespace oneapi + +#endif /*_ONEDPL_INTERNAL_BACKEND_TRAITS_H*/ diff --git a/include/oneapi/dpl/internal/dynamic_selection_impl/dynamic_load_policy.h b/include/oneapi/dpl/internal/dynamic_selection_impl/dynamic_load_policy.h index d865b9b7fba..c34b92e1956 100644 --- a/include/oneapi/dpl/internal/dynamic_selection_impl/dynamic_load_policy.h +++ b/include/oneapi/dpl/internal/dynamic_selection_impl/dynamic_load_policy.h @@ -19,6 +19,7 @@ #include #include #include "oneapi/dpl/internal/dynamic_selection_traits.h" +#include "oneapi/dpl/internal/dynamic_selection_impl/backend_traits.h" #if _DS_BACKEND_SYCL != 0 # include "oneapi/dpl/internal/dynamic_selection_impl/sycl_backend.h" #endif @@ -150,6 +151,10 @@ struct dynamic_load_policy selection_type select(Args&&...) { + if constexpr (backend_traits::lazy_report_v) + { + backend_->lazy_report(); + } if (state_) { std::lock_guard l(state_->m_); diff --git a/include/oneapi/dpl/internal/dynamic_selection_impl/sycl_backend.h b/include/oneapi/dpl/internal/dynamic_selection_impl/sycl_backend.h index 25921bdf819..52df9966946 100644 --- a/include/oneapi/dpl/internal/dynamic_selection_impl/sycl_backend.h +++ b/include/oneapi/dpl/internal/dynamic_selection_impl/sycl_backend.h @@ -16,9 +16,11 @@ #include "oneapi/dpl/internal/dynamic_selection_impl/scoring_policy_defs.h" #include +#include #include #include #include +#include namespace oneapi { @@ -36,24 +38,96 @@ class sycl_backend using resource_container_t = std::vector; private: - class async_waiter + static inline bool is_profiling_enabled = false; + using report_clock_type = std::chrono::steady_clock; + using report_duration = std::chrono::milliseconds; + + class async_waiter_base + { + public: + virtual void report() const = 0; + virtual bool is_complete() const = 0; + virtual ~async_waiter_base() = default; + }; + + template + class async_waiter : public async_waiter_base { sycl::event e_; + std::shared_ptr s; public: - async_waiter(const sycl::event& e) : e_(e) {} + async_waiter() = default; + async_waiter(sycl::event e, std::shared_ptr selection) : e_(e), s(selection) {} + sycl::event unwrap() { return e_; } + void wait() { e_.wait(); } + + void + report() const override + { + if constexpr (report_value_v) + { + if (s != nullptr) + { + const auto time_start = + e_.template get_profiling_info(); + const auto time_end = e_.template get_profiling_info(); + s->report(execution_info::task_time, std::chrono::duration_cast( + std::chrono::nanoseconds(time_end - time_start))); + } + } + } + + bool + is_complete() const override + { + return e_.get_info() == + sycl::info::event_command_status::complete; + } + }; + + struct async_waiter_list_t + { + + std::mutex m_; + std::vector> async_waiters; + + void + add_waiter(async_waiter_base* t) + { + std::lock_guard l(m_); + async_waiters.push_back(std::unique_ptr(t)); + } + + void + lazy_report() + { + std::lock_guard l(m_); + async_waiters.erase(std::remove_if(async_waiters.begin(), async_waiters.end(), + [](std::unique_ptr& async_waiter) { + if (async_waiter->is_complete()) + { + async_waiter->report(); + return true; + } + return false; + }), + async_waiters.end()); + } }; + async_waiter_list_t async_waiter_list; + class submission_group { resource_container_t resources_; @@ -85,11 +159,17 @@ class sycl_backend template sycl_backend(const NativeUniverseVector& v) { + bool profiling = true; global_rank_.reserve(v.size()); for (auto e : v) { global_rank_.push_back(e); + if (!e.template has_property()) + { + profiling = false; + } } + is_profiling_enabled = profiling; sgroup_ptr_ = std::make_unique(global_rank_); } @@ -97,37 +177,49 @@ class sycl_backend auto submit(SelectionHandle s, Function&& f, Args&&... args) { + constexpr bool report_task_completion = report_info_v; + constexpr bool report_task_submission = report_info_v; + constexpr bool report_task_time = report_value_v; + auto q = unwrap(s); - if constexpr (report_info_v) - { + + if constexpr (report_task_submission) report(s, execution_info::task_submission); - } - if constexpr (report_info_v || - report_value_v) + + if constexpr (report_task_completion || report_task_time) { - std::chrono::steady_clock::time_point t0; - if constexpr (report_value_v) + const auto t0 = report_clock_type::now(); + + auto e1 = f(q, std::forward(args)...); + async_waiter waiter{e1, std::make_shared(s)}; + + if constexpr (report_task_time) { - t0 = std::chrono::steady_clock::now(); + if (is_profiling_enabled) + async_waiter_list.add_waiter(new async_waiter(waiter)); } - auto e1 = f(q, std::forward(args)...); - auto e2 = q.submit([=](sycl::handler& h) { - h.depends_on(e1); - h.host_task([=]() { - if constexpr (report_value_v) - s.report(execution_info::task_time, (std::chrono::steady_clock::now() - t0).count()); - if constexpr (report_info_v) - { - s.report(execution_info::task_completion); - } + + if (report_task_time && !is_profiling_enabled || report_task_completion) + { + auto e2 = q.submit([=](sycl::handler& h) { + h.depends_on(e1); + h.host_task([=]() { + if constexpr (report_task_time) + { + if (!is_profiling_enabled) + s.report(execution_info::task_time, + std::chrono::duration_cast(report_clock_type::now() - t0)); + } + if constexpr (report_task_completion) + s.report(execution_info::task_completion); + }); }); - }); - return async_waiter{e2}; - } - else - { - return async_waiter{f(unwrap(s), std::forward(args)...)}; + waiter = async_waiter{e2, std::make_shared(s)}; + } + return waiter; } + + return async_waiter{f(q, std::forward(args)...), std::make_shared(s)}; } auto @@ -142,6 +234,15 @@ class sycl_backend return global_rank_; } + void + lazy_report() + { + if (is_profiling_enabled) + { + async_waiter_list.lazy_report(); + } + } + private: resource_container_t global_rank_; std::unique_ptr sgroup_ptr_; @@ -149,10 +250,24 @@ class sycl_backend void initialize_default_resources() { + bool profiling = true; + auto prop_list = sycl::property_list{}; auto devices = sycl::device::get_devices(); - for (auto x : devices) + for (auto& x : devices) + { + if (!x.has(sycl::aspect::queue_profiling)) + { + profiling = false; + } + } + is_profiling_enabled = profiling; + if (is_profiling_enabled) + { + prop_list = sycl::property_list{sycl::property::queue::enable_profiling()}; + } + for (auto& x : devices) { - global_rank_.push_back(sycl::queue{x}); + global_rank_.push_back(sycl::queue{x, prop_list}); } } }; diff --git a/include/oneapi/dpl/internal/dynamic_selection_traits.h b/include/oneapi/dpl/internal/dynamic_selection_traits.h index c1955a86c42..bd6f79ba898 100644 --- a/include/oneapi/dpl/internal/dynamic_selection_traits.h +++ b/include/oneapi/dpl/internal/dynamic_selection_traits.h @@ -108,16 +108,17 @@ struct has_report : decltype(has_report_impl(0)) { }; -template +template auto has_report_value_impl(...) -> std::false_type; -template +template auto -has_report_value_impl(int) -> decltype(std::declval().report(std::declval(), 0), std::true_type{}); +has_report_value_impl(int) + -> decltype(std::declval().report(std::declval(), std::declval()), std::true_type{}); -template -struct has_report_value : decltype(has_report_value_impl(0)) +template +struct has_report_value : decltype(has_report_value_impl(0)) { }; @@ -277,7 +278,7 @@ template void report(S&& s, const Info& i, const Value& v) { - if constexpr (internal::has_report_value::value) + if constexpr (internal::has_report_value::value) { std::forward(s).report(i, v); } @@ -291,13 +292,13 @@ struct report_info template inline constexpr bool report_info_v = report_info::value; -template +template struct report_value { - static constexpr bool value = internal::has_report_value::value; + static constexpr bool value = internal::has_report_value::value; }; -template -inline constexpr bool report_value_v = report_value::value; +template +inline constexpr bool report_value_v = report_value::value; } // namespace experimental } // namespace dpl diff --git a/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp b/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp index e24c4c00522..d6e21e7db80 100644 --- a/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp +++ b/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp @@ -14,6 +14,8 @@ #include #include "support/test_dynamic_selection_utils.h" #include "support/utils.h" +#include "support/sycl_alloc_utils.h" + #if TEST_DYNAMIC_SELECTION_AVAILABLE int @@ -53,8 +55,11 @@ test_auto_submit_wait_on_event(UniverseContainer u, int best_resource) using my_policy_t = Policy; // they are cpus so this is ok - double* v = sycl::malloc_shared(1000000, u[0]); - int* j = sycl::malloc_shared(1, u[0]); + TestUtils::usm_data_transfer dt_helper_v(u[0], 1000000); + TestUtils::usm_data_transfer dt_helper_j(u[0], 1); + + double* v = dt_helper_v.get_data(); + int* j = dt_helper_j.get_data(); my_policy_t p{u}; auto n_samples = u.size(); @@ -101,7 +106,9 @@ test_auto_submit_wait_on_event(UniverseContainer u, int best_resource) ecount += i; if (*j == 0) { - return sycl::event{}; + return q.submit([=](sycl::handler& h){ + h.single_task([](){}); + }); } else { @@ -145,7 +152,9 @@ test_auto_submit_wait_on_event(UniverseContainer u, int best_resource) ecount += i; if (*j == 0) { - return sycl::event{}; + return q.submit([=](sycl::handler& h){ + h.single_task([](){}); + }); } else { @@ -185,8 +194,11 @@ test_auto_submit_wait_on_group(UniverseContainer u, int best_resource) using my_policy_t = Policy; // they are cpus so this is ok - double* v = sycl::malloc_shared(1000000, u[0]); - int* j = sycl::malloc_shared(1, u[0]); + TestUtils::usm_data_transfer dt_helper_v(u[0], 1000000); + TestUtils::usm_data_transfer dt_helper_j(u[0], 1); + + double* v = dt_helper_v.get_data(); + int* j = dt_helper_j.get_data(); my_policy_t p{u}; auto n_samples = u.size(); @@ -233,7 +245,9 @@ test_auto_submit_wait_on_group(UniverseContainer u, int best_resource) ecount += i; if (*j == 0) { - return sycl::event{}; + return q.submit([=](sycl::handler& h){ + h.single_task([](){}); + }); } else { @@ -277,7 +291,9 @@ test_auto_submit_wait_on_group(UniverseContainer u, int best_resource) ecount += i; if (*j == 0) { - return sycl::event{}; + return q.submit([=](sycl::handler& h){ + h.single_task([](){}); + }); } else { @@ -310,6 +326,7 @@ test_auto_submit_wait_on_group(UniverseContainer u, int best_resource) return 0; } + template int test_auto_submit_and_wait(UniverseContainer u, int best_resource) @@ -317,8 +334,11 @@ test_auto_submit_and_wait(UniverseContainer u, int best_resource) using my_policy_t = Policy; // they are cpus so this is ok - double* v = sycl::malloc_shared(1000000, u[0]); - int* j = sycl::malloc_shared(1, u[0]); + TestUtils::usm_data_transfer dt_helper_v(u[0], 1000000); + TestUtils::usm_data_transfer dt_helper_j(u[0], 1); + + double* v = dt_helper_v.get_data(); + int* j = dt_helper_j.get_data(); my_policy_t p{u}; auto n_samples = u.size(); @@ -365,7 +385,9 @@ test_auto_submit_and_wait(UniverseContainer u, int best_resource) ecount += i; if (*j == 0) { - return sycl::event{}; + return q.submit([=](sycl::handler& h){ + h.single_task([](){}); + }); } else { @@ -408,7 +430,9 @@ test_auto_submit_and_wait(UniverseContainer u, int best_resource) ecount += i; if (*j == 0) { - return sycl::event{}; + return q.submit([=](sycl::handler& h){ + h.single_task([](){}); + }); } else { @@ -440,13 +464,20 @@ test_auto_submit_and_wait(UniverseContainer u, int best_resource) return 0; } + +template static inline void build_auto_tune_universe(std::vector& u) { + auto prop_list = sycl::property_list{}; + if(use_event_profiling){ + prop_list = sycl::property_list{sycl::property::queue::enable_profiling()}; + } + try { auto device_cpu1 = sycl::device(sycl::cpu_selector_v); - sycl::queue cpu1_queue(device_cpu1); + sycl::queue cpu1_queue{device_cpu1, prop_list}; u.push_back(cpu1_queue); } catch (const sycl::exception&) @@ -456,7 +487,7 @@ build_auto_tune_universe(std::vector& u) try { auto device_cpu2 = sycl::device(sycl::cpu_selector_v); - sycl::queue cpu2_queue(device_cpu2); + sycl::queue cpu2_queue{device_cpu2, prop_list}; u.push_back(cpu2_queue); } catch (const sycl::exception&) @@ -466,7 +497,7 @@ build_auto_tune_universe(std::vector& u) try { auto device_cpu3 = sycl::device(sycl::cpu_selector_v); - sycl::queue cpu3_queue(device_cpu3); + sycl::queue cpu3_queue{device_cpu3, prop_list}; u.push_back(cpu3_queue); } catch (const sycl::exception&) @@ -476,7 +507,7 @@ build_auto_tune_universe(std::vector& u) try { auto device_cpu4 = sycl::device(sycl::cpu_selector_v); - sycl::queue cpu4_queue(device_cpu4); + sycl::queue cpu4_queue{device_cpu4, prop_list}; u.push_back(cpu4_queue); } catch (const sycl::exception&) @@ -484,7 +515,8 @@ build_auto_tune_universe(std::vector& u) std::cout << "SKIPPED: Unable to run with cpu_selector\n"; } } -#endif + +#endif //TEST_DYNAMIC_SELECTION_AVAILABLE int main() @@ -494,52 +526,77 @@ main() #if TEST_DYNAMIC_SELECTION_AVAILABLE #if !ONEDPL_FPGA_DEVICE || !ONEDPL_FPGA_EMULATOR using policy_t = oneapi::dpl::experimental::auto_tune_policy; - std::vector u; - build_auto_tune_universe(u); + std::vector u1; + std::vector u2; + constexpr bool use_event_profiling = true; + build_auto_tune_universe(u1); + build_auto_tune_universe(u2); - //If building the universe is not a success, return - if (u.size() != 0) + if (u1.size() != 0 || u2.size() !=0 ) { - auto f = [u](int i) - { + auto f = [u1](int i) { if (i <= 8) - return u[(i - 1) % 4]; + return u1[(i - 1) % 4]; else - return u[0]; + return u1[0]; }; constexpr bool just_call_submit = false; constexpr bool call_select_before_submit = true; - auto actual = test_auto_initialization(u); - actual = test_select(u, f); - actual = test_auto_submit_wait_on_event(u, 0); - actual = test_auto_submit_wait_on_event(u, 1); - actual = test_auto_submit_wait_on_event(u, 2); - actual = test_auto_submit_wait_on_event(u, 3); - actual = test_auto_submit_wait_on_group(u, 0); - actual = test_auto_submit_wait_on_group(u, 1); - actual = test_auto_submit_wait_on_group(u, 2); - actual = test_auto_submit_wait_on_group(u, 3); - actual = test_auto_submit_and_wait(u, 0); - actual = test_auto_submit_and_wait(u, 1); - actual = test_auto_submit_and_wait(u, 2); - actual = test_auto_submit_and_wait(u, 3); - + auto actual = test_auto_initialization(u1); + actual = test_select(u1, f); + actual = test_auto_submit_wait_on_event(u1, 0); + actual = test_auto_submit_wait_on_event(u1, 1); + actual = test_auto_submit_wait_on_event(u1, 2); + actual = test_auto_submit_wait_on_event(u1, 3); + actual = test_auto_submit_wait_on_group(u1, 0); + actual = test_auto_submit_wait_on_group(u1, 1); + actual = test_auto_submit_wait_on_group(u1, 2); + actual = test_auto_submit_wait_on_group(u1, 3); + actual = test_auto_submit_and_wait(u1, 0); + actual = test_auto_submit_and_wait(u1, 1); + actual = test_auto_submit_and_wait(u1, 2); + actual = test_auto_submit_and_wait(u1, 3); + // now select then submits + actual = test_auto_submit_wait_on_event(u1, 0); + actual = test_auto_submit_wait_on_event(u1, 1); + actual = test_auto_submit_wait_on_event(u1, 2); + actual = test_auto_submit_wait_on_event(u1, 3); + actual = test_auto_submit_wait_on_group(u1, 0); + actual = test_auto_submit_wait_on_group(u1, 1); + actual = test_auto_submit_wait_on_group(u1, 2); + actual = test_auto_submit_wait_on_group(u1, 3); + actual = test_auto_submit_and_wait(u1, 0); + actual = test_auto_submit_and_wait(u1, 1); + actual = test_auto_submit_and_wait(u1, 2); + actual = test_auto_submit_and_wait(u1, 3); + // Use event profiling + actual = test_auto_submit_wait_on_event(u2, 0); + actual = test_auto_submit_wait_on_event(u2, 1); + actual = test_auto_submit_wait_on_event(u2, 2); + actual = test_auto_submit_wait_on_event(u2, 3); + actual = test_auto_submit_wait_on_group(u2, 0); + actual = test_auto_submit_wait_on_group(u2, 1); + actual = test_auto_submit_wait_on_group(u2, 2); + actual = test_auto_submit_wait_on_group(u2, 3); + actual = test_auto_submit_and_wait(u2, 0); + actual = test_auto_submit_and_wait(u2, 1); + actual = test_auto_submit_and_wait(u2, 2); + actual = test_auto_submit_and_wait(u2, 3); // now select then submits - actual = test_auto_submit_wait_on_event(u, 0); - actual = test_auto_submit_wait_on_event(u, 1); - actual = test_auto_submit_wait_on_event(u, 2); - actual = test_auto_submit_wait_on_event(u, 3); - actual = test_auto_submit_wait_on_group(u, 0); - actual = test_auto_submit_wait_on_group(u, 1); - actual = test_auto_submit_wait_on_group(u, 2); - actual = test_auto_submit_wait_on_group(u, 3); - - actual = test_auto_submit_and_wait(u, 0); - actual = test_auto_submit_and_wait(u, 1); - actual = test_auto_submit_and_wait(u, 2); - actual = test_auto_submit_and_wait(u, 3); + actual = test_auto_submit_wait_on_event(u2, 0); + actual = test_auto_submit_wait_on_event(u2, 1); + actual = test_auto_submit_wait_on_event(u2, 2); + actual = test_auto_submit_wait_on_event(u2, 3); + actual = test_auto_submit_wait_on_group(u2, 0); + actual = test_auto_submit_wait_on_group(u2, 1); + actual = test_auto_submit_wait_on_group(u2, 2); + actual = test_auto_submit_wait_on_group(u2, 3); + actual = test_auto_submit_and_wait(u2, 0); + actual = test_auto_submit_and_wait(u2, 1); + actual = test_auto_submit_and_wait(u2, 2); + actual = test_auto_submit_and_wait(u2, 3); bProcessed = true; } diff --git a/test/support/inline_backend.h b/test/support/inline_backend.h index 301dfdf1315..24c0e7cfeaa 100644 --- a/test/support/inline_backend.h +++ b/test/support/inline_backend.h @@ -14,6 +14,7 @@ #include #include +#include namespace TestUtils { @@ -50,6 +51,7 @@ class int_inline_backend_t using wait_type = int; using execution_resource_t = basic_execution_resource_t; using resource_container_t = std::vector; + using report_duration = std::chrono::milliseconds; private: using native_resource_container_t = std::vector; @@ -99,8 +101,8 @@ class int_inline_backend_t submit(SelectionHandle s, Function&& f, Args&&... args) { std::chrono::steady_clock::time_point t0; - if constexpr (oneapi::dpl::experimental::report_value_v) + if constexpr (oneapi::dpl::experimental::report_value_v< + SelectionHandle, oneapi::dpl::experimental::execution_info::task_time_t, report_duration>) { t0 = std::chrono::steady_clock::now(); } @@ -116,11 +118,11 @@ class int_inline_backend_t { oneapi::dpl::experimental::report(s, oneapi::dpl::experimental::execution_info::task_completion); } - if constexpr (oneapi::dpl::experimental::report_value_v) + if constexpr (oneapi::dpl::experimental::report_value_v< + SelectionHandle, oneapi::dpl::experimental::execution_info::task_time_t, report_duration>) { report(s, oneapi::dpl::experimental::execution_info::task_time, - (std::chrono::steady_clock::now() - t0).count()); + std::chrono::duration_cast(std::chrono::steady_clock::now() - t0)); } return async_waiter{w}; }