From 6869456ffeb001d98672f3ab20b85508ed168311 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Tue, 24 Sep 2024 11:58:13 +0200 Subject: [PATCH 1/5] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h - fix corner case performance issue in the __subgroup_bubble_sorter::sort Signed-off-by: Sergey Kopienko --- .../hetero/dpcpp/parallel_backend_sycl_merge_sort.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h index 19a4f25b889..bf249b882dc 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h @@ -40,16 +40,20 @@ struct __subgroup_bubble_sorter void sort(const _StorageAcc& __storage_acc, _Compare __comp, std::uint32_t __start, std::uint32_t __end) const { - for (std::uint32_t i = __start; i < __end; ++i) + bool __changed = true; + + for (std::uint32_t i = __start; i < __end && __changed; ++i) { + __changed = false; + for (std::uint32_t j = __start + 1; j < __start + __end - i; ++j) { auto& __first_item = __storage_acc[j - 1]; auto& __second_item = __storage_acc[j]; if (__comp(__second_item, __first_item)) { - using std::swap; - swap(__first_item, __second_item); + std::swap(__first_item, __second_item); + __changed = true; } } } From 57f9caf578b5617cdaf2ec16006bc7b83ce8f36c Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Tue, 24 Sep 2024 13:51:42 +0200 Subject: [PATCH 2/5] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h - avoid loops and condition checks for small data sizes in __subgroup_bubble_sorter::sort Signed-off-by: Sergey Kopienko --- .../dpcpp/parallel_backend_sycl_merge_sort.h | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h index bf249b882dc..5c80362ba67 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h @@ -40,22 +40,39 @@ struct __subgroup_bubble_sorter void sort(const _StorageAcc& __storage_acc, _Compare __comp, std::uint32_t __start, std::uint32_t __end) const { - bool __changed = true; + using _IndexType = std::make_signed_t; - for (std::uint32_t i = __start; i < __end && __changed; ++i) - { - __changed = false; + _IndexType __n = __end - __start; - for (std::uint32_t j = __start + 1; j < __start + __end - i; ++j) + switch (__n) + { + case 0: + case 1: + break; + case 2: { - auto& __first_item = __storage_acc[j - 1]; - auto& __second_item = __storage_acc[j]; + auto& __first_item = __storage_acc[__start]; + auto& __second_item = __storage_acc[__start + 1]; if (__comp(__second_item, __first_item)) - { std::swap(__first_item, __second_item); - __changed = true; - } } + break; + default: + do + { + _IndexType __new_n = 0; + for (_IndexType __i = 1; __i < __n; ++__i) + { + auto& __first_item = __storage_acc[__start + __i - 1]; + auto& __second_item = __storage_acc[__start + __i]; + if (__comp(__second_item, __first_item)) + { + std::swap(__first_item, __second_item); + __new_n = __i; + } + } + __n = __new_n; + } while (__n > 1); } } }; From ff81aab111f839958f66e4bc0d0ede09370ca22b Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Sep 2024 09:33:50 +0200 Subject: [PATCH 3/5] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h - fix review comment: allow ADL as it was originally Signed-off-by: Sergey Kopienko --- .../pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h index 5c80362ba67..d713038451c 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h @@ -54,7 +54,10 @@ struct __subgroup_bubble_sorter auto& __first_item = __storage_acc[__start]; auto& __second_item = __storage_acc[__start + 1]; if (__comp(__second_item, __first_item)) - std::swap(__first_item, __second_item); + { + using std::swap; + swap(__first_item, __second_item); + } } break; default: @@ -67,7 +70,8 @@ struct __subgroup_bubble_sorter auto& __second_item = __storage_acc[__start + __i]; if (__comp(__second_item, __first_item)) { - std::swap(__first_item, __second_item); + using std::swap; + swap(__first_item, __second_item); __new_n = __i; } } From c3c2d8fc9d48262c1f90a7e0f25facb480daaea1 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Sep 2024 09:37:27 +0200 Subject: [PATCH 4/5] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h - fix review comment: the overflow is handled in the caller side, so it does not make sense to make a signed type. Signed-off-by: Sergey Kopienko --- .../pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h index d713038451c..4cde46a7e7e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h @@ -40,9 +40,7 @@ struct __subgroup_bubble_sorter void sort(const _StorageAcc& __storage_acc, _Compare __comp, std::uint32_t __start, std::uint32_t __end) const { - using _IndexType = std::make_signed_t; - - _IndexType __n = __end - __start; + std::uint32_t __n = __end - __start; switch (__n) { @@ -63,8 +61,8 @@ struct __subgroup_bubble_sorter default: do { - _IndexType __new_n = 0; - for (_IndexType __i = 1; __i < __n; ++__i) + std::uint32_t __new_n = 0; + for (std::uint32_t __i = 1; __i < __n; ++__i) { auto& __first_item = __storage_acc[__start + __i - 1]; auto& __second_item = __storage_acc[__start + __i]; From 658f8df6f746bad40a2a000f6b97391422ec3cf8 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 25 Sep 2024 09:41:44 +0200 Subject: [PATCH 5/5] include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h - fix review comment: additional comments Signed-off-by: Sergey Kopienko --- .../pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h index 4cde46a7e7e..8c9aa3eb85d 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h @@ -44,10 +44,10 @@ struct __subgroup_bubble_sorter switch (__n) { - case 0: - case 1: + case 0: // The case when __end == __start no source data means no sorting required + case 1: // The case when __end == __start + 1 one source data item means no sorting required break; - case 2: + case 2: // The case when __end == __start + 2 two source data items required only one comparison (and swap) without any loops and etc. { auto& __first_item = __storage_acc[__start]; auto& __second_item = __storage_acc[__start + 1]; @@ -58,7 +58,7 @@ struct __subgroup_bubble_sorter } } break; - default: + default: // The case when __end > __start + 2 three or more source data items require full bubble sort do { std::uint32_t __new_n = 0;