From f6d00b8a95ddc41e17ac2faeba13afacd48252d2 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 1 Nov 2024 11:08:39 +0000 Subject: [PATCH 01/37] Avoid potential ambiguity in UrReturnHelper If UrReturnHelper's operator() is explicitly called as operator()(...), there is a potential for ambiguity when the specified RetType and the inferred T are the same: this is ambiguous with the version of operator() where only T is a template parameter, and T is specified explicitly. We already have code that explicitly calls operator()(...), so prevent this from becoming a problem. --- source/ur/ur.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index 7e53425000..5a23a88ba9 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -304,7 +304,8 @@ class UrReturnHelper { // Array return value where element type is differrent from T template - ur_result_t operator()(const T *t, size_t s) { + std::enable_if_t, ur_result_t> + operator()(const T *t, size_t s) { return ur::getInfoArray(s, param_value_size, param_value, param_value_size_ret, t); } From 61bc0bd01dd0c1148ed8be31482c54dcd6378abb Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Fri, 20 Sep 2024 10:36:46 +0100 Subject: [PATCH 02/37] Improvements to align CTS and Spec for Context: - Add test for urContextCreate returning UR_RESULT_ERROR_INVALID_ENUMERATION - Added testing in urContextGetInfo for the atomic memory enums, checking against the type mask or UR_RESULT_ERROR_UNSUPPORTED_ENUM (as some adapters don't support this currently) --- source/adapters/cuda/context.cpp | 4 ++ source/adapters/hip/context.cpp | 2 +- source/adapters/level_zero/context.cpp | 9 +++-- source/adapters/level_zero/v2/context.cpp | 8 +++- source/adapters/native_cpu/context.cpp | 2 +- source/adapters/opencl/context.cpp | 2 +- .../context/context_adapter_level_zero.match | 3 +- .../context_adapter_level_zero_v2.match | 2 +- .../context/context_adapter_native_cpu.match | 2 +- test/conformance/context/urContextCreate.cpp | 11 ++++++ test/conformance/context/urContextGetInfo.cpp | 38 +++++++++++++++---- .../context/urContextSetExtendedDeleter.cpp | 3 +- .../enqueue/urEnqueueUSMFill2D.cpp | 6 ++- .../enqueue/urEnqueueUSMMemcpy2D.cpp | 6 ++- .../testing/include/uur/fixtures.h | 3 +- 15 files changed, 78 insertions(+), 23 deletions(-) diff --git a/source/adapters/cuda/context.cpp b/source/adapters/cuda/context.cpp index 69796cf79d..ffd991d59f 100644 --- a/source/adapters/cuda/context.cpp +++ b/source/adapters/cuda/context.cpp @@ -98,6 +98,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetInfo( UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; return ReturnValue(Capabilities); } + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: // 2D USM memcpy is supported. return ReturnValue(true); diff --git a/source/adapters/hip/context.cpp b/source/adapters/hip/context.cpp index 761eab954d..b36ed73560 100644 --- a/source/adapters/hip/context.cpp +++ b/source/adapters/hip/context.cpp @@ -75,7 +75,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { // These queries should be dealt with in context_impl.cpp by calling the // queries of each device separately and building the intersection set. - return UR_RESULT_ERROR_INVALID_ENUMERATION; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } case UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT: // 2D USM memcpy is supported. diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 296e3e98d5..865b47de5c 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -126,13 +126,16 @@ ur_result_t urContextGetInfo( UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST; return ReturnValue(Capabilities); } + case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } default: // TODO: implement other parameters - die("urGetContextInfo: unsuppported ParamName."); + return UR_RESULT_ERROR_INVALID_ENUMERATION; } - - return UR_RESULT_SUCCESS; } ur_result_t urContextGetNativeHandle( diff --git a/source/adapters/level_zero/v2/context.cpp b/source/adapters/level_zero/v2/context.cpp index 71360dd057..f70a4ea253 100644 --- a/source/adapters/level_zero/v2/context.cpp +++ b/source/adapters/level_zero/v2/context.cpp @@ -173,8 +173,14 @@ ur_result_t urContextGetInfo(ur_context_handle_t hContext, case UR_CONTEXT_INFO_USM_FILL2D_SUPPORT: // 2D USM fill is not supported. return ReturnValue(uint8_t{false}); + case UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: + case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } default: - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + return UR_RESULT_ERROR_INVALID_ENUMERATION; } } } // namespace ur::level_zero diff --git a/source/adapters/native_cpu/context.cpp b/source/adapters/native_cpu/context.cpp index 8efc61a024..7c178f951a 100644 --- a/source/adapters/native_cpu/context.cpp +++ b/source/adapters/native_cpu/context.cpp @@ -64,7 +64,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: case UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index c2c38aa753..34181eda3b 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -89,7 +89,7 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { /* These queries should be dealt with in context_impl.cpp by calling the * queries of each device separately and building the intersection set. */ - return UR_RESULT_ERROR_INVALID_ARGUMENT; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } case UR_CONTEXT_INFO_NUM_DEVICES: case UR_CONTEXT_INFO_DEVICES: diff --git a/test/conformance/context/context_adapter_level_zero.match b/test/conformance/context/context_adapter_level_zero.match index c36611b9a5..2234fd8e93 100644 --- a/test/conformance/context/context_adapter_level_zero.match +++ b/test/conformance/context/context_adapter_level_zero.match @@ -1,2 +1,3 @@ {{NONDETERMINISTIC}} -urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urContextCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ diff --git a/test/conformance/context/context_adapter_level_zero_v2.match b/test/conformance/context/context_adapter_level_zero_v2.match index 2e6ea80468..93333a9178 100644 --- a/test/conformance/context/context_adapter_level_zero_v2.match +++ b/test/conformance/context/context_adapter_level_zero_v2.match @@ -1 +1 @@ -urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ +{{OPT}}urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ diff --git a/test/conformance/context/context_adapter_native_cpu.match b/test/conformance/context/context_adapter_native_cpu.match index 3f80da7c36..b5665a8f1a 100644 --- a/test/conformance/context/context_adapter_native_cpu.match +++ b/test/conformance/context/context_adapter_native_cpu.match @@ -1 +1 @@ -urContextSetExtendedDeleterTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urContextSetExtendedDeleterTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/context/urContextCreate.cpp b/test/conformance/context/urContextCreate.cpp index 0f268a3992..0dc4341bd0 100644 --- a/test/conformance/context/urContextCreate.cpp +++ b/test/conformance/context/urContextCreate.cpp @@ -36,6 +36,17 @@ TEST_P(urContextCreateTest, InvalidNullPointerContext) { urContextCreate(1, &device, nullptr, nullptr)); } +TEST_P(urContextCreateTest, InvalidEnumeration) { + auto device = GetParam(); + + ur_context_properties_t properties{UR_STRUCTURE_TYPE_CONTEXT_PROPERTIES, + nullptr, UR_CONTEXT_FLAGS_MASK}; + uur::raii::Context context = nullptr; + + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_ENUMERATION, + urContextCreate(1, &device, &properties, context.ptr())); +} + using urContextCreateMultiDeviceTest = uur::urAllDevicesTest; TEST_F(urContextCreateMultiDeviceTest, Success) { if (devices.size() < 2) { diff --git a/test/conformance/context/urContextGetInfo.cpp b/test/conformance/context/urContextGetInfo.cpp index f9f699d511..46bc2cd179 100644 --- a/test/conformance/context/urContextGetInfo.cpp +++ b/test/conformance/context/urContextGetInfo.cpp @@ -17,6 +17,24 @@ struct urContextGetInfoTestWithInfoParam {UR_CONTEXT_INFO_REFERENCE_COUNT, sizeof(uint32_t)}, {UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, sizeof(bool)}, {UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, sizeof(bool)}, + {UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}, + {UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, + sizeof(ur_memory_order_capability_flags_t)}}; + + ctx_info_mem_flags_map = { + {UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, + UR_MEMORY_ORDER_CAPABILITY_FLAGS_MASK}, + {UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, + UR_MEMORY_SCOPE_CAPABILITY_FLAGS_MASK}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, + UR_MEMORY_ORDER_CAPABILITY_FLAGS_MASK}, + {UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES, + UR_MEMORY_SCOPE_CAPABILITY_FLAGS_MASK}, }; } @@ -26,24 +44,30 @@ struct urContextGetInfoTestWithInfoParam } std::unordered_map ctx_info_size_map; + std::unordered_map + ctx_info_mem_flags_map; }; UUR_TEST_SUITE_P(urContextGetInfoTestWithInfoParam, ::testing::Values( - UR_CONTEXT_INFO_NUM_DEVICES, // - UR_CONTEXT_INFO_DEVICES, // - UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, // - UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, // - UR_CONTEXT_INFO_REFERENCE_COUNT // - + UR_CONTEXT_INFO_NUM_DEVICES, // + UR_CONTEXT_INFO_DEVICES, // + UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, // + UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, // + UR_CONTEXT_INFO_REFERENCE_COUNT, // + UR_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, // + UR_CONTEXT_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES, // + UR_CONTEXT_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES, // + UR_CONTEXT_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES // ), uur::deviceTestWithParamPrinter); TEST_P(urContextGetInfoTestWithInfoParam, Success) { ur_context_info_t info = getParam(); size_t info_size = 0; - ASSERT_SUCCESS(urContextGetInfo(context, info, 0, nullptr, &info_size)); + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetInfo(context, info, 0, nullptr, &info_size)); ASSERT_NE(info_size, 0); if (const auto expected_size = ctx_info_size_map.find(info); diff --git a/test/conformance/context/urContextSetExtendedDeleter.cpp b/test/conformance/context/urContextSetExtendedDeleter.cpp index 99e72ecaa0..ad09ac2179 100644 --- a/test/conformance/context/urContextSetExtendedDeleter.cpp +++ b/test/conformance/context/urContextSetExtendedDeleter.cpp @@ -21,7 +21,8 @@ TEST_P(urContextSetExtendedDeleterTest, Success) { *static_cast(userdata) = true; }; - ASSERT_SUCCESS(urContextSetExtendedDeleter(context, deleter, &called)); + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextSetExtendedDeleter(context, deleter, &called)); } ASSERT_TRUE(called); } diff --git a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp index fcb244e94a..29123b57bd 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp @@ -34,9 +34,11 @@ struct urEnqueueUSMFill2DTestWithParam UUR_RETURN_ON_FATAL_FAILURE(urQueueTestWithParam::SetUp()); bool memfill2d_support = false; - ASSERT_SUCCESS(urContextGetInfo( + [[maybe_unused]] ur_result_t result = urContextGetInfo( context, UR_CONTEXT_INFO_USM_FILL2D_SUPPORT, - sizeof(memfill2d_support), &memfill2d_support, nullptr)); + sizeof(memfill2d_support), &memfill2d_support, nullptr); + ASSERT_TRUE(result == UR_RESULT_SUCCESS || + result == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); if (!memfill2d_support) { GTEST_SKIP() << "2D USM mem fill is not supported"; } diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp index b1f7e23b66..e12e79a295 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy2D.cpp @@ -28,9 +28,11 @@ struct urEnqueueUSMMemcpy2DTestWithParam } bool memcpy2d_support = false; - ASSERT_SUCCESS(urContextGetInfo( + [[maybe_unused]] ur_result_t result = urContextGetInfo( context, UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT, - sizeof(memcpy2d_support), &memcpy2d_support, nullptr)); + sizeof(memcpy2d_support), &memcpy2d_support, nullptr); + ASSERT_TRUE(result == UR_RESULT_SUCCESS || + result == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); if (!memcpy2d_support) { GTEST_SKIP() << "2D USM memcpy is not supported"; } diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 00bee6ba14..549bef009d 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -24,7 +24,8 @@ #define UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(ret) \ auto status = ret; \ - if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { \ + if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE || \ + status == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { \ GTEST_SKIP(); \ } else { \ ASSERT_EQ(status, UR_RESULT_SUCCESS); \ From 2a081891b69ce881be19ab61a92394c8473786da Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Mon, 28 Oct 2024 17:04:44 +0000 Subject: [PATCH 03/37] Remove urContextCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle from L0 match file --- test/conformance/context/context_adapter_level_zero.match | 1 - 1 file changed, 1 deletion(-) diff --git a/test/conformance/context/context_adapter_level_zero.match b/test/conformance/context/context_adapter_level_zero.match index 2234fd8e93..a78979330d 100644 --- a/test/conformance/context/context_adapter_level_zero.match +++ b/test/conformance/context/context_adapter_level_zero.match @@ -1,3 +1,2 @@ {{NONDETERMINISTIC}} -urContextCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{OPT}}urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ From 774c082b9cc54a65e314999655668db554921744 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Fri, 1 Nov 2024 14:10:27 -0700 Subject: [PATCH 04/37] [L0] Add Level Zero Reference Document - Add inital Level Zero Reference document with environment variable definitions. Signed-off-by: Neil R. Spruit --- scripts/core/LEVEL_ZERO.rst | 141 +++++++++++++++++++++++++++++++ scripts/templates/index.rst.mako | 1 + 2 files changed, 142 insertions(+) create mode 100644 scripts/core/LEVEL_ZERO.rst diff --git a/scripts/core/LEVEL_ZERO.rst b/scripts/core/LEVEL_ZERO.rst new file mode 100644 index 0000000000..a2a2a75b6d --- /dev/null +++ b/scripts/core/LEVEL_ZERO.rst @@ -0,0 +1,141 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +========================== +Level Zero UR Reference Document +========================== + +This document gives general guidelines on differences in the UR L0 adapter for customer usecases. + +Environment Variables +===================== + ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| Environment Variable | Description | Possible Values | ++=============================================+==============================================================+==============================================================+ +| UR_L0_USE_COPY_ENGINE | Controls the use of copy engines. | "0": Copy engines will not be used. | +| | | "1": All available copy engines can be used. | +| | | "lower_index:upper_index": Specifies a range of copy engines | +| | | to be used. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USE_IMMEDIATE_COMMANDLISTS | Determines the mode of immediate command lists. | "0": Immediate command lists are not used. | +| | | "1": Immediate command lists are used per queue. | +| | | "2": Immediate command lists are used per thread per queue. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USM_ALLOCATOR_TRACE | Enables tracing for the USM allocator. | "0": Tracing is disabled. | +| | | "1": Tracing is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USM_ALLOCATOR | Configures the USM allocator. | Specifies the configuration for the USM allocator. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_DEBUG_BASIC | Enables basic debugging for Level Zero. | "0": Debugging is disabled. | +| | | "1": Debugging is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_ENABLE_SYSMAN_ENV_DEFAULT | Controls the default SysMan environment initialization. | "1" or unset: Enables SysMan environment initialization. | +| | | "0": Disables SysMan environment initialization. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_ENABLE_ZESINIT_DEFAULT | Controls the default SysMan initialization with zesInit. | "1": Enables SysMan initialization with zesInit. | +| | | "0" or unset: Disables SysMan initialization with zesInit. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| SYCL_ENABLE_PCI | Deprecated and no longer needed. | Any value: Triggers a warning message. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USE_COPY_ENGINE_FOR_FILL | Controls the use of copy engines for memory fill operations. | "0": Copy engines will not be used for fill operations. | +| | | "1": Copy engines will be used for fill operations. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_DEBUG | Controls the debug level for Level Zero. | "0": No debug information. | +| | | "1": Basic debug information. | +| | | "2": Validation debug information. | +| | | "-1": All debug information. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_LEAKS_DEBUG | Enables debugging for memory leaks. | "0": Memory leaks debugging is disabled. | +| | | "1": Memory leaks debugging is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_INIT_ALL_DRIVERS | Controls the initialization of all Level Zero drivers. | "0": Only currently used drivers are initialized. | +| | | "1": All drivers on the system are initialized. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_SERIALIZE | Controls serialization of Level Zero calls. | "0": No locking or blocking. | +| | | "1": Locking around each UR_CALL. | +| | | "2": Blocking UR calls where supported. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_QUEUE_SYNCHRONIZE_NON_BLOCKING | Controls non-blocking synchronization of queues. | "0": Non-blocking synchronization is disabled. | +| | | "1": Non-blocking synchronization is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_OOQ_INTEGRATED_SIGNAL_EVENT | Controls signal events for commands on integrated GPUs. | "0": Signal events are not created. | +| | | "1": Signal events are created. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_TRACK_INDIRECT_ACCESS_MEMORY | Enables tracking of indirect access memory. | "0": Tracking is disabled. | +| | | "1": Tracking is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING| Controls exposure of CSlice in affinity partitioning. | "0": CSlice is not exposed. | +| | | "1": CSlice is exposed. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL | Sets the maximum number of events per event pool. | Any positive integer: Specifies the maximum number of events | +| | | per event pool. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_COMMANDLISTS_CLEANUP_THRESHOLD | Sets the threshold for command lists cleanup. | Any positive integer: Specifies the threshold for cleanup. | +| | | Negative value: Disables the threshold. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USE_NATIVE_USM_MEMCPY2D | Controls the use of native USM memcpy2D operations. | "0": Native USM memcpy2D operations are not used. | +| | | "1": Native USM memcpy2D operations are used. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_ENABLE_USM_HOSTPTR_IMPORT | Enables USM host pointer import. | "0": USM host pointer import is disabled. | +| | | "1": USM host pointer import is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_ENABLE_USM_HOSTPTR_RELEASE | Enables USM host pointer release. | "0": USM host pointer release is disabled. | +| | | "1": USM host pointer release is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_ENABLE_USM_HOST_UNIFIED_MEMORY | Enables USM host unified memory. | "0": USM host unified memory is disabled. | +| | | "1": USM host unified memory is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS | Controls the use of multiple command lists for barriers. | "0": Multiple command lists are not used. | +| | | "1": Multiple command lists are used. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_IN_ORDER_BARRIER_BY_SIGNAL | Controls if in-order barriers are implemented by signal. | "0": Barriers are implemented by true barrier command. | +| | | "1": Barriers are implemented by signal. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_DISABLE_EVENTS_CACHING | Controls the caching of events in the context. | "0" or unset: Event caching is enabled. | +| | | "1": Event caching is disabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_REUSE_DISCARDED_EVENTS | Controls the reuse of uncompleted events in in-order queues. | "0": Reuse of discarded events is disabled. | +| | | "1" or unset: Reuse of discarded events is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST | Controls filtering of event wait lists. | "0" or unset: Filtering is disabled. | +| | | "1": Filtering is enabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_DEVICE_SCOPE_EVENTS | Controls the scope of device events. | "0": All events are host-visible. | +| | | "1": On-demand host-visible proxy events. | +| | | "2": Last command in batch host-visible. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USE_COPY_ENGINE_FOR_D2D_COPY | Controls the use of copy engines for device-to-device copy | "0": Copy engines will not be used for D2D copy operations. | +| | operations. | "1": Copy engines will be used for D2D copy operations. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_BATCH_SIZE | Controls the batch size for command lists. | "0": Dynamic batch size adjustment. | +| | | Any positive integer: Specifies the fixed batch size. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_COPY_BATCH_SIZE | Controls the batch size for copy command lists. | "0": Dynamic batch size adjustment. | +| | | Any positive integer: Specifies the fixed batch size. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_MAX | Sets the maximum number of immediate command lists batches. | Any positive integer: Specifies the maximum number of batches| ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +|UR_L0_IMMEDIATE_COMMANDLISTS_EVENTS_PER_BATCH| Sets the number of events per batch for immediate command | Any positive integer: Specifies the number of events per | +| | lists. | batch. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USE_COMPUTE_ENGINE | Controls the use of compute engines. | "0": Only the first compute engine is used. | +| | | Any positive integer: Specifies the index of the compute | +| | | engine to be used. | +| | | Negative value: All available compute engines may be used. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_USM_RESIDENT | Controls memory residency for USM allocations. | "0xHSD": Specifies residency for host, shared, and device | +| | | allocations. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ +| UR_L0_DISABLE_USM_ALLOCATOR | Controls the use of the USM allocator. | "0": USM allocator is enabled. | +| | | Any other value: USM allocator is disabled. | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ + +Contributors +------------ + +* Neil Spruit `neil.r.spruit@intel.com `_ + diff --git a/scripts/templates/index.rst.mako b/scripts/templates/index.rst.mako index 1d5ba6a9b0..d1884bdf1b 100644 --- a/scripts/templates/index.rst.mako +++ b/scripts/templates/index.rst.mako @@ -16,5 +16,6 @@ core/CONTRIB.rst core/CUDA.rst core/HIP.rst + core/LEVEL_ZERO.rst exp-features.rst api.rst From e97b1c07ad8a59f605b38637efbc626253957f17 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Fri, 1 Nov 2024 15:08:13 -0700 Subject: [PATCH 05/37] [L0] Add missing Device envs Signed-off-by: Neil R. Spruit --- scripts/core/LEVEL_ZERO.rst | 249 ++++++++++++++++++------------------ 1 file changed, 127 insertions(+), 122 deletions(-) diff --git a/scripts/core/LEVEL_ZERO.rst b/scripts/core/LEVEL_ZERO.rst index a2a2a75b6d..e2ab18a0ea 100644 --- a/scripts/core/LEVEL_ZERO.rst +++ b/scripts/core/LEVEL_ZERO.rst @@ -13,129 +13,134 @@ This document gives general guidelines on differences in the UR L0 adapter for c Environment Variables ===================== -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| Environment Variable | Description | Possible Values | -+=============================================+==============================================================+==============================================================+ -| UR_L0_USE_COPY_ENGINE | Controls the use of copy engines. | "0": Copy engines will not be used. | -| | | "1": All available copy engines can be used. | -| | | "lower_index:upper_index": Specifies a range of copy engines | -| | | to be used. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USE_IMMEDIATE_COMMANDLISTS | Determines the mode of immediate command lists. | "0": Immediate command lists are not used. | -| | | "1": Immediate command lists are used per queue. | -| | | "2": Immediate command lists are used per thread per queue. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USM_ALLOCATOR_TRACE | Enables tracing for the USM allocator. | "0": Tracing is disabled. | -| | | "1": Tracing is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USM_ALLOCATOR | Configures the USM allocator. | Specifies the configuration for the USM allocator. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_DEBUG_BASIC | Enables basic debugging for Level Zero. | "0": Debugging is disabled. | -| | | "1": Debugging is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_ENABLE_SYSMAN_ENV_DEFAULT | Controls the default SysMan environment initialization. | "1" or unset: Enables SysMan environment initialization. | -| | | "0": Disables SysMan environment initialization. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_ENABLE_ZESINIT_DEFAULT | Controls the default SysMan initialization with zesInit. | "1": Enables SysMan initialization with zesInit. | -| | | "0" or unset: Disables SysMan initialization with zesInit. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| SYCL_ENABLE_PCI | Deprecated and no longer needed. | Any value: Triggers a warning message. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USE_COPY_ENGINE_FOR_FILL | Controls the use of copy engines for memory fill operations. | "0": Copy engines will not be used for fill operations. | -| | | "1": Copy engines will be used for fill operations. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_DEBUG | Controls the debug level for Level Zero. | "0": No debug information. | -| | | "1": Basic debug information. | -| | | "2": Validation debug information. | -| | | "-1": All debug information. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_LEAKS_DEBUG | Enables debugging for memory leaks. | "0": Memory leaks debugging is disabled. | -| | | "1": Memory leaks debugging is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_INIT_ALL_DRIVERS | Controls the initialization of all Level Zero drivers. | "0": Only currently used drivers are initialized. | -| | | "1": All drivers on the system are initialized. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_SERIALIZE | Controls serialization of Level Zero calls. | "0": No locking or blocking. | -| | | "1": Locking around each UR_CALL. | -| | | "2": Blocking UR calls where supported. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_QUEUE_SYNCHRONIZE_NON_BLOCKING | Controls non-blocking synchronization of queues. | "0": Non-blocking synchronization is disabled. | -| | | "1": Non-blocking synchronization is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_OOQ_INTEGRATED_SIGNAL_EVENT | Controls signal events for commands on integrated GPUs. | "0": Signal events are not created. | -| | | "1": Signal events are created. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_TRACK_INDIRECT_ACCESS_MEMORY | Enables tracking of indirect access memory. | "0": Tracking is disabled. | -| | | "1": Tracking is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING| Controls exposure of CSlice in affinity partitioning. | "0": CSlice is not exposed. | -| | | "1": CSlice is exposed. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL | Sets the maximum number of events per event pool. | Any positive integer: Specifies the maximum number of events | -| | | per event pool. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_COMMANDLISTS_CLEANUP_THRESHOLD | Sets the threshold for command lists cleanup. | Any positive integer: Specifies the threshold for cleanup. | -| | | Negative value: Disables the threshold. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USE_NATIVE_USM_MEMCPY2D | Controls the use of native USM memcpy2D operations. | "0": Native USM memcpy2D operations are not used. | -| | | "1": Native USM memcpy2D operations are used. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_ENABLE_USM_HOSTPTR_IMPORT | Enables USM host pointer import. | "0": USM host pointer import is disabled. | -| | | "1": USM host pointer import is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_ENABLE_USM_HOSTPTR_RELEASE | Enables USM host pointer release. | "0": USM host pointer release is disabled. | -| | | "1": USM host pointer release is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_ENABLE_USM_HOST_UNIFIED_MEMORY | Enables USM host unified memory. | "0": USM host unified memory is disabled. | -| | | "1": USM host unified memory is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS | Controls the use of multiple command lists for barriers. | "0": Multiple command lists are not used. | -| | | "1": Multiple command lists are used. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_IN_ORDER_BARRIER_BY_SIGNAL | Controls if in-order barriers are implemented by signal. | "0": Barriers are implemented by true barrier command. | -| | | "1": Barriers are implemented by signal. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_DISABLE_EVENTS_CACHING | Controls the caching of events in the context. | "0" or unset: Event caching is enabled. | -| | | "1": Event caching is disabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_REUSE_DISCARDED_EVENTS | Controls the reuse of uncompleted events in in-order queues. | "0": Reuse of discarded events is disabled. | -| | | "1" or unset: Reuse of discarded events is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST | Controls filtering of event wait lists. | "0" or unset: Filtering is disabled. | -| | | "1": Filtering is enabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_DEVICE_SCOPE_EVENTS | Controls the scope of device events. | "0": All events are host-visible. | -| | | "1": On-demand host-visible proxy events. | -| | | "2": Last command in batch host-visible. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USE_COPY_ENGINE_FOR_D2D_COPY | Controls the use of copy engines for device-to-device copy | "0": Copy engines will not be used for D2D copy operations. | -| | operations. | "1": Copy engines will be used for D2D copy operations. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_BATCH_SIZE | Controls the batch size for command lists. | "0": Dynamic batch size adjustment. | -| | | Any positive integer: Specifies the fixed batch size. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_COPY_BATCH_SIZE | Controls the batch size for copy command lists. | "0": Dynamic batch size adjustment. | -| | | Any positive integer: Specifies the fixed batch size. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_MAX | Sets the maximum number of immediate command lists batches. | Any positive integer: Specifies the maximum number of batches| -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -|UR_L0_IMMEDIATE_COMMANDLISTS_EVENTS_PER_BATCH| Sets the number of events per batch for immediate command | Any positive integer: Specifies the number of events per | -| | lists. | batch. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USE_COMPUTE_ENGINE | Controls the use of compute engines. | "0": Only the first compute engine is used. | -| | | Any positive integer: Specifies the index of the compute | -| | | engine to be used. | -| | | Negative value: All available compute engines may be used. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_USM_RESIDENT | Controls memory residency for USM allocations. | "0xHSD": Specifies residency for host, shared, and device | -| | | allocations. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ -| UR_L0_DISABLE_USM_ALLOCATOR | Controls the use of the USM allocator. | "0": USM allocator is enabled. | -| | | Any other value: USM allocator is disabled. | -+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+ ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| Environment Variable | Description | Possible Values | Default Value | ++=============================================+==============================================================+==============================================================+==================+ +| UR_L0_USE_COPY_ENGINE | Controls the use of copy engines. | "0": Copy engines will not be used. | "1" | +| | | "1": All available copy engines can be used. | | +| | | "lower_index:upper_index": Specifies a range of copy engines | | +| | | to be used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_IMMEDIATE_COMMANDLISTS | Determines the mode of immediate command lists. | "0": Immediate command lists are not used. | "0" | +| | | "1": Immediate command lists are used per queue. | | +| | | "2": Immediate command lists are used per thread per queue. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_RELAXED_ALLOCATION_LIMITS | Controls the use of relaxed allocation limits. | "0": Relaxed allocation limits are not used. | "0" | +| | | "1": Relaxed allocation limits are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_DRIVER_IN_ORDER_LISTS | Controls the use of in-order lists from the driver. | "0": In-order lists from the driver are not used. | "0" | +| | | "1": In-order lists from the driver are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USM_ALLOCATOR_TRACE | Enables tracing for the USM allocator. | "0": Tracing is disabled. | "0" | +| | | "1": Tracing is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USM_ALLOCATOR | Configures the USM allocator. | Specifies the configuration for the USM allocator. | All Configs | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DEBUG_BASIC | Enables basic debugging for Level Zero. | "0": Debugging is disabled. | "0" | +| | | "1": Debugging is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_SYSMAN_ENV_DEFAULT | Controls the default SysMan environment initialization. | "1" or unset: Enables SysMan environment initialization. | "1" | +| | | "0": Disables SysMan environment initialization. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_ZESINIT_DEFAULT | Controls the default SysMan initialization with zesInit. | "1": Enables SysMan initialization with zesInit. | "0" | +| | | "0" or unset: Disables SysMan initialization with zesInit. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| SYCL_ENABLE_PCI | Deprecated and no longer needed. | Any value: Triggers a warning message. | None | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_COPY_ENGINE_FOR_FILL | Controls the use of copy engines for memory fill operations. | "0": Copy engines will not be used for fill operations. | "0" | +| | | "1": Copy engines will be used for fill operations. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DEBUG | Controls the debug level for Level Zero. | "0": No debug information. | "0" | +| | | "1": Basic debug information. | | +| | | "2": Validation debug information. | | +| | | "-1": All debug information. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_LEAKS_DEBUG | Enables debugging for memory leaks. | "0": Memory leaks debugging is disabled. | "0" | +| | | "1": Memory leaks debugging is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_INIT_ALL_DRIVERS | Controls the initialization of all Level Zero drivers. | "0": Only currently used drivers are initialized. | "0" | +| | | "1": All drivers on the system are initialized. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_SERIALIZE | Controls serialization of Level Zero calls. | "0": No locking or blocking. | "0" | +| | | "1": Locking around each UR_CALL. | | +| | | "2": Blocking UR calls where supported. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_QUEUE_SYNCHRONIZE_NON_BLOCKING | Controls non-blocking synchronization of queues. | "0": Non-blocking synchronization is disabled. | "0" | +| | | "1": Non-blocking synchronization is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_OOQ_INTEGRATED_SIGNAL_EVENT | Controls signal events for commands on integrated GPUs. | "0": Signal events are not created. | "0" | +| | | "1": Signal events are created. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_TRACK_INDIRECT_ACCESS_MEMORY | Enables tracking of indirect access memory. | "0": Tracking is disabled. | "0" | +| | | "1": Tracking is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING| Controls exposure of CSlice in affinity partitioning. | "0": CSlice is not exposed. | "0" | +| | | "1": CSlice is exposed. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_MAX_NUMBER_OF_EVENTS_PER_EVENT_POOL | Sets the maximum number of events per event pool. | Any positive integer: Specifies the maximum number of events | 256 | +| | | per event pool. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_COMMANDLISTS_CLEANUP_THRESHOLD | Sets the threshold for command lists cleanup. | Any positive integer: Specifies the threshold for cleanup. | 20 | +| | | Negative value: Disables the threshold. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_NATIVE_USM_MEMCPY2D | Controls the use of native USM memcpy2D operations. | "0": Native USM memcpy2D operations are not used. | "0" | +| | | "1": Native USM memcpy2D operations are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_USM_HOSTPTR_IMPORT | Enables USM host pointer import. | "0": USM host pointer import is disabled. | "0" | +| | | "1": USM host pointer import is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_USM_HOSTPTR_RELEASE | Enables USM host pointer release. | "0": USM host pointer release is disabled. | "0" | +| | | "1": USM host pointer release is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_ENABLE_USM_HOST_UNIFIED_MEMORY | Enables USM host unified memory. | "0": USM host unified memory is disabled. | "0" | +| | | "1": USM host unified memory is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS | Controls the use of multiple command lists for barriers. | "0": Multiple command lists are not used. | "0" | +| | | "1": Multiple command lists are used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_IN_ORDER_BARRIER_BY_SIGNAL | Controls if in-order barriers are implemented by signal. | "0": Barriers are implemented by true barrier command. | "0" | +| | | "1": Barriers are implemented by signal. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DISABLE_EVENTS_CACHING | Controls the caching of events in the context. | "0" or unset: Event caching is enabled. | "0" | +| | | "1": Event caching is disabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_REUSE_DISCARDED_EVENTS | Controls the reuse of uncompleted events in in-order queues. | "0": Reuse of discarded events is disabled. | "1" | +| | | "1" or unset: Reuse of discarded events is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST | Controls filtering of event wait lists. | "0" or unset: Filtering is disabled. | "0" | +| | | "1": Filtering is enabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DEVICE_SCOPE_EVENTS | Controls the scope of device events. | "0": All events are host-visible. | "0" | +| | | "1": On-demand host-visible proxy events. | | +| | | "2": Last command in batch host-visible. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_COPY_ENGINE_FOR_D2D_COPY | Controls the use of copy engines for device-to-device copy | "0": Copy engines will not be used for D2D copy operations. | "0" | +| | operations. | "1": Copy engines will be used for D2D copy operations. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_BATCH_SIZE | Controls the batch size for command lists. | "0": Dynamic batch size adjustment. | "0" | +| | | Any positive integer: Specifies the fixed batch size. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_COPY_BATCH_SIZE | Controls the batch size for copy command lists. | "0": Dynamic batch size adjustment. | "0" | +| | | Any positive integer: Specifies the fixed batch size. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_MAX | Sets the maximum number of immediate command lists batches. | Any positive integer: Specifies the maximum number of batches| 10 | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +|UR_L0_IMMEDIATE_COMMANDLISTS_EVENTS_PER_BATCH| Sets the number of events per batch for immediate command | Any positive integer: Specifies the number of events per | 256 | +| | lists. | batch. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USE_COMPUTE_ENGINE | Controls the use of compute engines. | "0": Only the first compute engine is used. | "0" | +| | | Any positive integer: Specifies the index of the compute | | +| | | engine to be used. | | +| | | Negative value: All available compute engines may be used. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_USM_RESIDENT | Controls memory residency for USM allocations. | "0xHSD": Specifies residency for host, shared, and device | 0x2 | +| | | allocations. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ +| UR_L0_DISABLE_USM_ALLOCATOR | Controls the use of the USM allocator. | "0": USM allocator is enabled. | "0" | +| | | Any other value: USM allocator is disabled. | | ++---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+ Contributors ------------ -* Neil Spruit `neil.r.spruit@intel.com `_ - +* Neil Spruit `neil.r.spruit@intel.com `_ \ No newline at end of file From a3c6a4dc4fd4d0a8432e10d48e190929b63132b9 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 25 Sep 2024 23:46:48 +0200 Subject: [PATCH 06/37] [L0 v2] use single command list for all operations --- .../level_zero/v2/command_list_cache.cpp | 30 +- .../level_zero/v2/command_list_cache.hpp | 7 +- source/adapters/level_zero/v2/memory.cpp | 2 +- .../v2/queue_immediate_in_order.cpp | 329 ++++++++---------- .../v2/queue_immediate_in_order.hpp | 31 +- .../level_zero/v2/command_list_cache_test.cpp | 26 +- 6 files changed, 193 insertions(+), 232 deletions(-) diff --git a/source/adapters/level_zero/v2/command_list_cache.cpp b/source/adapters/level_zero/v2/command_list_cache.cpp index 4379ec1d6a..9e585b80af 100644 --- a/source/adapters/level_zero/v2/command_list_cache.cpp +++ b/source/adapters/level_zero/v2/command_list_cache.cpp @@ -13,6 +13,21 @@ #include "../device.hpp" +typedef struct _zex_intel_queue_copy_operations_offload_hint_exp_desc_t { + ze_structure_type_t stype; + const void *pNext; + ze_bool_t copyOffloadEnabled; +} zex_intel_queue_copy_operations_offload_hint_exp_desc_t; + +#define ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES \ + (ze_structure_type_t)0x0003001B + +template <> +ze_structure_type_t +getZeStructureType() { + return ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES; +} + bool v2::immediate_command_list_descriptor_t::operator==( const immediate_command_list_descriptor_t &rhs) const { return ZeDevice == rhs.ZeDevice && IsInOrder == rhs.IsInOrder && @@ -45,6 +60,10 @@ command_list_cache_t::command_list_cache_t(ze_context_handle_t ZeContext) raii::ze_command_list_handle_t command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { + ZeStruct offloadDesc; + offloadDesc.copyOffloadEnabled = + std::visit([](auto &&arg) { return arg.CopyOffloadEnabled; }, desc); + if (auto ImmCmdDesc = std::get_if(&desc)) { ze_command_list_handle_t ZeCommandList; @@ -58,6 +77,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { QueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY; QueueDesc.index = ImmCmdDesc->Index.value(); } + QueueDesc.pNext = &offloadDesc; ZE2UR_CALL_THROWS( zeCommandListCreateImmediate, (ZeContext, ImmCmdDesc->ZeDevice, &QueueDesc, &ZeCommandList)); @@ -68,6 +88,7 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { CmdListDesc.flags = RegCmdDesc.IsInOrder ? ZE_COMMAND_LIST_FLAG_IN_ORDER : 0; CmdListDesc.commandQueueGroupOrdinal = RegCmdDesc.Ordinal; + CmdListDesc.pNext = &offloadDesc; ze_command_list_handle_t ZeCommandList; ZE2UR_CALL_THROWS(zeCommandListCreate, (ZeContext, RegCmdDesc.ZeDevice, @@ -78,13 +99,14 @@ command_list_cache_t::createCommandList(const command_list_descriptor_t &desc) { raii::command_list_unique_handle command_list_cache_t::getImmediateCommandList( ze_device_handle_t ZeDevice, bool IsInOrder, uint32_t Ordinal, - ze_command_queue_mode_t Mode, ze_command_queue_priority_t Priority, - std::optional Index) { + bool CopyOffloadEnable, ze_command_queue_mode_t Mode, + ze_command_queue_priority_t Priority, std::optional Index) { TRACK_SCOPE_LATENCY("command_list_cache_t::getImmediateCommandList"); immediate_command_list_descriptor_t Desc; Desc.ZeDevice = ZeDevice; Desc.Ordinal = Ordinal; + Desc.CopyOffloadEnabled = CopyOffloadEnable; Desc.IsInOrder = IsInOrder; Desc.Mode = Mode; Desc.Priority = Priority; @@ -99,13 +121,15 @@ raii::command_list_unique_handle command_list_cache_t::getImmediateCommandList( raii::command_list_unique_handle command_list_cache_t::getRegularCommandList(ze_device_handle_t ZeDevice, - bool IsInOrder, uint32_t Ordinal) { + bool IsInOrder, uint32_t Ordinal, + bool CopyOffloadEnable) { TRACK_SCOPE_LATENCY("command_list_cache_t::getRegularCommandList"); regular_command_list_descriptor_t Desc; Desc.ZeDevice = ZeDevice; Desc.IsInOrder = IsInOrder; Desc.Ordinal = Ordinal; + Desc.CopyOffloadEnabled = CopyOffloadEnable; auto [CommandList, _] = getCommandList(Desc).release(); diff --git a/source/adapters/level_zero/v2/command_list_cache.hpp b/source/adapters/level_zero/v2/command_list_cache.hpp index 0f2a212eab..c2462cce5e 100644 --- a/source/adapters/level_zero/v2/command_list_cache.hpp +++ b/source/adapters/level_zero/v2/command_list_cache.hpp @@ -30,6 +30,7 @@ struct immediate_command_list_descriptor_t { ze_device_handle_t ZeDevice; bool IsInOrder; uint32_t Ordinal; + bool CopyOffloadEnabled; ze_command_queue_mode_t Mode; ze_command_queue_priority_t Priority; std::optional Index; @@ -40,6 +41,7 @@ struct regular_command_list_descriptor_t { ze_device_handle_t ZeDevice; bool IsInOrder; uint32_t Ordinal; + bool CopyOffloadEnabled; bool operator==(const regular_command_list_descriptor_t &rhs) const; }; @@ -56,12 +58,13 @@ struct command_list_cache_t { raii::command_list_unique_handle getImmediateCommandList(ze_device_handle_t ZeDevice, bool IsInOrder, - uint32_t Ordinal, ze_command_queue_mode_t Mode, + uint32_t Ordinal, bool CopyOffloadEnable, + ze_command_queue_mode_t Mode, ze_command_queue_priority_t Priority, std::optional Index = std::nullopt); raii::command_list_unique_handle getRegularCommandList(ze_device_handle_t ZeDevice, bool IsInOrder, - uint32_t Ordinal); + uint32_t Ordinal, bool CopyOffloadEnable); // For testing purposes size_t getNumImmediateCommandLists(); diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp index a70792969b..3b305612d8 100644 --- a/source/adapters/level_zero/v2/memory.cpp +++ b/source/adapters/level_zero/v2/memory.cpp @@ -157,7 +157,7 @@ static ur_result_t synchronousZeCopy(ur_context_handle_t hContext, hDevice ->QueueGroup[ur_device_handle_t_::queue_group_info_t::type::Compute] .ZeOrdinal, - ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + true, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, std::nullopt); ZE2UR_CALL(zeCommandListAppendMemoryCopy, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index b4f61adbba..8e31dd1bc6 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -23,30 +23,17 @@ namespace v2 { std::pair ur_queue_immediate_in_order_t::getWaitListView( - const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, - ur_command_list_handler_t *pHandler) { - auto extraWaitEvent = (lastHandler && pHandler != lastHandler) - ? lastHandler->lastEvent->getZeEvent() - : nullptr; - - auto totalEvents = numWaitEvents + (extraWaitEvent != nullptr); - waitList.reserve(totalEvents); + const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) { + waitList.resize(numWaitEvents); for (uint32_t i = 0; i < numWaitEvents; i++) { waitList[i] = phWaitEvents[i]->getZeEvent(); } - if (extraWaitEvent) { - waitList[numWaitEvents] = extraWaitEvent; - } - - return {waitList.data(), static_cast(totalEvents)}; + return {waitList.data(), static_cast(numWaitEvents)}; } -static int32_t getZeOrdinal(ur_device_handle_t hDevice, queue_group_type type) { - if (type == queue_group_type::MainCopy && hDevice->hasMainCopyEngine()) { - return hDevice->QueueGroup[queue_group_type::MainCopy].ZeOrdinal; - } +static int32_t getZeOrdinal(ur_device_handle_t hDevice) { return hDevice->QueueGroup[queue_group_type::Compute].ZeOrdinal; } @@ -73,29 +60,22 @@ static ze_command_queue_priority_t getZePriority(ur_queue_flags_t flags) { ur_command_list_handler_t::ur_command_list_handler_t( ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProps, queue_group_type type, - event_pool *eventPool) + const ur_queue_properties_t *pProps) : commandList(hContext->commandListCache.getImmediateCommandList( - hDevice->ZeDevice, true, getZeOrdinal(hDevice, type), + hDevice->ZeDevice, true, getZeOrdinal(hDevice), + true /* always enable copy offload */, ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, getZePriority(pProps ? pProps->flags : ur_queue_flags_t{}), - getZeIndex(pProps))), - internalEvent(eventPool->allocate(), [=](ur_event_handle_t event) { - ur::level_zero::urEventRelease(event); - }) {} + getZeIndex(pProps))) {} ur_command_list_handler_t::ur_command_list_handler_t( - ze_command_list_handle_t hZeCommandList, event_pool *eventPool, - bool ownZeHandle) + ze_command_list_handle_t hZeCommandList, bool ownZeHandle) : commandList(hZeCommandList, [ownZeHandle](ze_command_list_handle_t hZeCommandList) { if (ownZeHandle) { zeCommandListDestroy(hZeCommandList); } - }), - internalEvent(eventPool->allocate(), [=](ur_event_handle_t event) { - ur::level_zero::urEventRelease(event); - }) {} + }) {} static event_flags_t eventFlagsFromQueueFlags(ur_queue_flags_t flags) { event_flags_t eventFlags = EVENT_FLAGS_COUNTER; @@ -110,10 +90,7 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( : hContext(hContext), hDevice(hDevice), flags(pProps ? pProps->flags : 0), eventPool(hContext->eventPoolCache.borrow( hDevice->Id.value(), eventFlagsFromQueueFlags(flags))), - copyHandler(hContext, hDevice, pProps, queue_group_type::MainCopy, - eventPool.get()), - computeHandler(hContext, hDevice, pProps, queue_group_type::Compute, - eventPool.get()) {} + handler(hContext, hDevice, pProps) {} ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( ur_context_handle_t hContext, ur_device_handle_t hDevice, @@ -121,42 +98,17 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( : hContext(hContext), hDevice(hDevice), flags(flags), eventPool(hContext->eventPoolCache.borrow( hDevice->Id.value(), eventFlagsFromQueueFlags(flags))), - copyHandler( - reinterpret_cast(hNativeHandle), eventPool.get(), false /* we're using a single command list for both handlers, only own it by one of them */), - computeHandler(reinterpret_cast(hNativeHandle), - eventPool.get(), ownZeQueue) {} - -ur_command_list_handler_t * -ur_queue_immediate_in_order_t::getCommandListHandlerForCompute() { - return &computeHandler; -} - -ur_command_list_handler_t * -ur_queue_immediate_in_order_t::getCommandListHandlerForCopy() { - // TODO: optimize for specific devices, see ../memory.cpp - return ©Handler; -} - -ur_command_list_handler_t * -ur_queue_immediate_in_order_t::getCommandListHandlerForFill( - size_t patternSize) { - if (patternSize <= hDevice->QueueGroup[queue_group_type::MainCopy] - .ZeProperties.maxMemoryFillPatternSize) - return ©Handler; - else - return &computeHandler; -} + handler(reinterpret_cast(hNativeHandle), + ownZeQueue) {} -ur_event_handle_t ur_queue_immediate_in_order_t::getSignalEvent( - ur_command_list_handler_t *handler, ur_event_handle_t *hUserEvent) { - if (!hUserEvent) { - handler->lastEvent = handler->internalEvent.get(); - } else { +ur_event_handle_t +ur_queue_immediate_in_order_t::getSignalEvent(ur_event_handle_t *hUserEvent) { + if (hUserEvent) { *hUserEvent = eventPool->allocate(); - handler->lastEvent = *hUserEvent; + return *hUserEvent; + } else { + return nullptr; } - - return handler->lastEvent; } ur_result_t @@ -178,11 +130,8 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, case UR_QUEUE_INFO_DEVICE_DEFAULT: return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_QUEUE_INFO_EMPTY: { - // We can exit early if we have in-order queue. - if (!lastHandler) - return ReturnValue(true); - else - return ReturnValue(false); + // We can't tell if the queue is empty as we don't hold to any events + return ReturnValue(false); } default: logger::error("Unsupported ParamName in urQueueGetInfo: " @@ -210,27 +159,8 @@ ur_result_t ur_queue_immediate_in_order_t::queueRelease() { ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle( ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) { std::ignore = pDesc; - *phNativeQueue = reinterpret_cast( - this->computeHandler.commandList.get()); - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_queue_immediate_in_order_t::finalizeHandler( - ur_command_list_handler_t *handler) { - lastHandler = handler; - return UR_RESULT_SUCCESS; -} - -ur_result_t ur_queue_immediate_in_order_t::finalizeHandler( - ur_command_list_handler_t *handler, bool blocking) { - if (blocking) { - ZE2UR_CALL(zeCommandListHostSynchronize, - (handler->commandList.get(), UINT64_MAX)); - lastHandler = nullptr; - } else { - finalizeHandler(handler); - } - + *phNativeQueue = + reinterpret_cast(this->handler.commandList.get()); return UR_RESULT_SUCCESS; } @@ -239,18 +169,11 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() { std::unique_lock lock(this->Mutex); - if (!lastHandler) { - return UR_RESULT_SUCCESS; - } - - auto lastCmdList = lastHandler->commandList.get(); - lastHandler = nullptr; - lock.unlock(); - // TODO: use zeEventHostSynchronize instead? TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::zeCommandListHostSynchronize"); - ZE2UR_CALL(zeCommandListHostSynchronize, (lastCmdList, UINT64_MAX)); + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); return UR_RESULT_SUCCESS; } @@ -283,16 +206,14 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( zeThreadGroupDimensions, WG, workDim, pGlobalWorkSize, pLocalWorkSize)); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto memoryMigrate = [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; }; @@ -310,11 +231,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::zeCommandListAppendLaunchKernel"); + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendLaunchKernel, - (handler->commandList.get(), hZeKernel, &zeThreadGroupDimensions, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + (handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions, + zeSignalEvent, waitList.second, waitList.first)); - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( @@ -324,17 +246,26 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + if (!numEventsInWaitList && !phEvent) { + // nop + return UR_RESULT_SUCCESS; + } + + auto signalEvent = getSignalEvent(phEvent); auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler->commandList.get(), numWaitEvents, pWaitEvents)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + if (numWaitEvents > 0) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (handler.commandList.get(), numWaitEvents, pWaitEvents)); + } - return finalizeHandler(handler); + if (signalEvent) { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (handler.commandList.get(), signalEvent->getZeEvent())); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( @@ -351,18 +282,16 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pSrc = ur_cast(src->getDevicePtr( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, srcOffset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -371,7 +300,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, dstOffset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -383,11 +312,17 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( waitList.second = 0; } + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), pDst, pSrc, size, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + (handler.commandList.get(), pDst, pSrc, size, zeSignalEvent, + waitList.second, waitList.first)); - return finalizeHandler(handler, blocking); + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( @@ -435,18 +370,16 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( auto zeParams = ur2zeRegionParams(srcOrigin, dstOrigin, region, srcRowPitch, dstRowPitch, srcSlicePitch, dstSlicePitch); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pSrc = ur_cast(src->getDevicePtr( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, 0, src->getSize(), [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -454,7 +387,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, 0, dst->getSize(), [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -466,13 +399,19 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( waitList.second = 0; } + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (handler->commandList.get(), pDst, &zeParams.dstRegion, + (handler.commandList.get(), pDst, &zeParams.dstRegion, zeParams.dstPitch, zeParams.dstSlicePitch, pSrc, &zeParams.srcRegion, zeParams.srcPitch, zeParams.srcSlicePitch, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + zeSignalEvent, waitList.second, waitList.first)); - return finalizeHandler(handler, blocking); + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( @@ -630,17 +569,15 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pDst = ur_cast(hBuffer->mapHostPtr( mapFlags, offset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -649,14 +586,19 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( if (!memoryMigrated && waitList.second) { // If memory was not migrated, we need to wait on the events here. ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler->commandList.get(), waitList.second, waitList.first)); + (handler.commandList.get(), waitList.second, waitList.first)); if (signalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + (handler.commandList.get(), signalEvent->getZeEvent())); } } - return finalizeHandler(handler, blockingMap); + if (blockingMap) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( @@ -666,32 +608,30 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); // TODO: currently unmapHostPtr deallocates memory immediately, // since the memory might be used by the user, we need to make sure // all dependencies are completed. ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler->commandList.get(), waitList.second, waitList.first)); + (handler.commandList.get(), waitList.second, waitList.first)); bool memoryMigrated = false; hMem->unmapHostPtr(pMappedPtr, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; }); if (signalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + (handler.commandList.get(), signalEvent->getZeEvent())); } - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( @@ -699,18 +639,16 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto handler = getCommandListHandlerForFill(patternSize); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); - auto waitList = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto pDst = ur_cast(dst->getDevicePtr( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, offset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), dst, src, size, nullptr, + (handler.commandList.get(), dst, src, size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -726,11 +664,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( // PatternSize must be a power of two for zeCommandListAppendMemoryFill. // When it's not, the fill is emulated with zeCommandListAppendMemoryCopy. + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryFill, - (handler->commandList.get(), pDst, pPattern, patternSize, size, - signalEvent->getZeEvent(), waitList.second, waitList.first)); + (handler.commandList.get(), pDst, pPattern, patternSize, size, + zeSignalEvent, waitList.second, waitList.first)); - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill( @@ -756,17 +695,22 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCopy(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (handler->commandList.get(), pDst, pSrc, size, - signalEvent->getZeEvent(), numWaitEvents, pWaitEvents)); + (handler.commandList.get(), pDst, pSrc, size, zeSignalEvent, + numWaitEvents, pWaitEvents)); + + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } - return finalizeHandler(handler, blocking); + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( @@ -779,23 +723,25 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); if (pWaitEvents) { - ZE2UR_CALL(zeCommandListAppendBarrier, (handler->commandList.get(), nullptr, + ZE2UR_CALL(zeCommandListAppendBarrier, (handler.commandList.get(), nullptr, numWaitEvents, pWaitEvents)); } // TODO: figure out how to translate "flags" ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, - (handler->commandList.get(), pMem, size)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); + (handler.commandList.get(), pMem, size)); + + if (signalEvent) { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (handler.commandList.get(), signalEvent->getZeEvent())); + } - return finalizeHandler(handler); + return UR_RESULT_SUCCESS; } ur_result_t @@ -810,24 +756,26 @@ ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size, auto zeAdvice = ur_cast(advice); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); - auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0, handler); + auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0); if (pWaitEvents) { - ZE2UR_CALL(zeCommandListAppendBarrier, (handler->commandList.get(), nullptr, + ZE2UR_CALL(zeCommandListAppendBarrier, (handler.commandList.get(), nullptr, numWaitEvents, pWaitEvents)); } // TODO: figure out how to translate "flags" ZE2UR_CALL(zeCommandListAppendMemAdvise, - (handler->commandList.get(), this->hDevice->ZeDevice, pMem, size, + (handler.commandList.get(), this->hDevice->ZeDevice, pMem, size, zeAdvice)); - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler->commandList.get(), signalEvent->getZeEvent())); - return finalizeHandler(handler); + if (signalEvent) { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (handler.commandList.get(), signalEvent->getZeEvent())); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill2D( @@ -1024,24 +972,27 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( std::scoped_lock lock(this->Mutex); - auto handler = getCommandListHandlerForCompute(); - auto signalEvent = getSignalEvent(handler, phEvent); + auto signalEvent = getSignalEvent(phEvent); if (!signalEvent) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } auto [pWaitEvents, numWaitEvents] = - getWaitListView(phEventWaitList, numEventsInWaitList, handler); + getWaitListView(phEventWaitList, numEventsInWaitList); signalEvent->recordStartTimestamp(); ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, - (handler->commandList.get(), - signalEvent->getEventEndTimestampPtr(), signalEvent->getZeEvent(), - numWaitEvents, pWaitEvents)); + (handler.commandList.get(), signalEvent->getEventEndTimestampPtr(), + signalEvent->getZeEvent(), numWaitEvents, pWaitEvents)); - return finalizeHandler(handler, blocking); + if (blocking) { + ZE2UR_CALL(zeCommandListHostSynchronize, + (handler.commandList.get(), UINT64_MAX)); + } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp( diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 5b320fe2e8..d8769d3b97 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -26,20 +26,12 @@ using queue_group_type = ur_device_handle_t_::queue_group_info_t::type; struct ur_command_list_handler_t { ur_command_list_handler_t(ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProps, - queue_group_type type, event_pool *eventPool); + const ur_queue_properties_t *pProps); ur_command_list_handler_t(ze_command_list_handle_t hZeCommandList, - event_pool *eventPool, bool ownZeHandle); + bool ownZeHandle); raii::command_list_unique_handle commandList; - std::unique_ptr> - internalEvent; - - // TODO: do we need to keep ref count of this for user events? - // For counter based events, we can reuse them safely and l0 event pool - // cannot be destroyed before the queue is released. - ur_event_handle_t lastEvent = nullptr; }; struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { @@ -50,26 +42,15 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { raii::cache_borrowed_event_pool eventPool; - ur_command_list_handler_t copyHandler; - ur_command_list_handler_t computeHandler; - ur_command_list_handler_t *lastHandler = nullptr; + ur_command_list_handler_t handler; std::vector waitList; std::pair - getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents, - ur_command_list_handler_t *pHandler); - - ur_command_list_handler_t *getCommandListHandlerForCompute(); - ur_command_list_handler_t *getCommandListHandlerForCopy(); - ur_command_list_handler_t *getCommandListHandlerForFill(size_t patternSize); - - ur_event_handle_t getSignalEvent(ur_command_list_handler_t *handler, - ur_event_handle_t *hUserEvent); + getWaitListView(const ur_event_handle_t *phWaitEvents, + uint32_t numWaitEvents); - ur_result_t finalizeHandler(ur_command_list_handler_t *handler); - ur_result_t finalizeHandler(ur_command_list_handler_t *handler, - bool blocking); + ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent); ur_result_t enqueueRegionCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, diff --git a/test/adapters/level_zero/v2/command_list_cache_test.cpp b/test/adapters/level_zero/v2/command_list_cache_test.cpp index fcaae55a96..3ddeaeff6e 100644 --- a/test/adapters/level_zero/v2/command_list_cache_test.cpp +++ b/test/adapters/level_zero/v2/command_list_cache_test.cpp @@ -40,13 +40,13 @@ TEST_P(CommandListCacheTest, CanStoreAndRetriveImmediateAndRegularCmdLists) { // get command lists from the cache for (int i = 0; i < numListsPerType; ++i) { - regCmdListOwners.emplace_back( - cache.getRegularCommandList(device->ZeDevice, IsInOrder, Ordinal)); + regCmdListOwners.emplace_back(cache.getRegularCommandList( + device->ZeDevice, IsInOrder, Ordinal, true)); auto [it, _] = regCmdLists.emplace(regCmdListOwners.back().get()); ASSERT_TRUE(*it != nullptr); immCmdListOwners.emplace_back(cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority)); + device->ZeDevice, IsInOrder, Ordinal, true, Mode, Priority)); std::tie(it, _) = immCmdLists.emplace(immCmdListOwners.back().get()); ASSERT_TRUE(*it != nullptr); } @@ -57,12 +57,12 @@ TEST_P(CommandListCacheTest, CanStoreAndRetriveImmediateAndRegularCmdLists) { // verify we get back the same command lists for (int i = 0; i < numListsPerType; ++i) { - auto regCmdList = - cache.getRegularCommandList(device->ZeDevice, IsInOrder, Ordinal); + auto regCmdList = cache.getRegularCommandList(device->ZeDevice, + IsInOrder, Ordinal, true); ASSERT_TRUE(regCmdList != nullptr); auto immCmdList = cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority); + device->ZeDevice, IsInOrder, Ordinal, true, Mode, Priority); ASSERT_TRUE(immCmdList != nullptr); ASSERT_EQ(regCmdLists.erase(regCmdList.get()), 1); @@ -103,7 +103,8 @@ TEST_P(CommandListCacheTest, ImmediateCommandListsHaveProperAttributes) { for (uint32_t Index = 0; Index < QueueGroupProperties[Ordinal].numQueues; Index++) { auto CommandList = cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority, Index); + device->ZeDevice, IsInOrder, Ordinal, true, Mode, Priority, + Index); ze_device_handle_t ZeDevice; auto Ret = @@ -133,8 +134,9 @@ TEST_P(CommandListCacheTest, ImmediateCommandListsHaveProperAttributes) { } // verify list creation without an index - auto CommandList = cache.getImmediateCommandList( - device->ZeDevice, IsInOrder, Ordinal, Mode, Priority, std::nullopt); + auto CommandList = + cache.getImmediateCommandList(device->ZeDevice, IsInOrder, Ordinal, + true, Mode, Priority, std::nullopt); ze_device_handle_t ZeDevice; auto Ret = zeCommandListGetDeviceHandle(CommandList.get(), &ZeDevice); @@ -207,7 +209,7 @@ TEST_P(CommandListCacheTest, CommandListsAreReusedByQueues) { } // Queues scope ASSERT_EQ(context->commandListCache.getNumImmediateCommandLists(), - NumUniqueQueueTypes * 2); // * 2 for compute and copy + NumUniqueQueueTypes); ASSERT_EQ(context->commandListCache.getNumRegularCommandLists(), 0); } } @@ -236,7 +238,7 @@ TEST_P(CommandListCacheTest, CommandListsCacheIsThreadSafe) { ASSERT_LE( context->commandListCache.getNumImmediateCommandLists(), - NumThreads * 2); // * 2 for compute and copy + NumThreads); } }); } @@ -246,5 +248,5 @@ TEST_P(CommandListCacheTest, CommandListsCacheIsThreadSafe) { } ASSERT_LE(context->commandListCache.getNumImmediateCommandLists(), - NumThreads * 2); + NumThreads); } From d39870be4d06824fe191e7b8d1c341d76316b195 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Wed, 16 Oct 2024 11:44:53 +0100 Subject: [PATCH 07/37] Improvements to align CTS and Spec for Memory: - Add tests for UR_RESULT_ERROR_INVALID_NULL/HOST_POINTER for urMemImageCreate and urMemBufferCreate - Add missing error condition to spec for urMemImageCreate for checking type of image description struct - Add tests for UR_RESULT_ERROR_INVALID_NULL_HANDLE/POINTER for urMemBuffer/ImageCreateWithnativeHandle - Remove skip for urMemImageCreateWithNativeHandle - buffer was invalid as it was never instantiated in fixture struct - Update image format used in urMemImageTest fixture as it was invalid and likely to cause previously skipped test to fail (which now pass) - Add missing DDI table entry for urMemImageCreateWithNativeHandle for OpenCL - Add test for using different ur_mem_flag_t flags with urMemBufferPartition - Add missing UR_MEM_INFO_REFERENCE_COUNT to spec for urMemGetInfo and added test for this in urMemRetain/Release - Removed assert in L0 urMemGetInfo which would fail if mem type is not image or the query is not a context query - Fixed potential bug with HIP urMemGetNativeHandle which would fail if the mem handle was an image because use of std::variant --- include/ur_api.h | 11 ++- include/ur_print.hpp | 15 +++ scripts/core/memory.yml | 6 ++ source/adapters/cuda/memory.cpp | 3 + source/adapters/hip/memory.cpp | 23 +++-- source/adapters/level_zero/memory.cpp | 8 +- source/adapters/level_zero/v2/memory.cpp | 3 + source/adapters/opencl/memory.cpp | 2 + .../adapters/opencl/ur_interface_loader.cpp | 1 + source/loader/layers/validation/ur_valddi.cpp | 6 +- source/loader/ur_libapi.cpp | 3 +- source/ur_api.cpp | 3 +- .../memory/memory_adapter_cuda.match | 3 + .../memory/memory_adapter_hip.match | 4 + .../memory/memory_adapter_level_zero.match | 2 + .../memory/memory_adapter_level_zero_v2.match | 14 +++ .../memory/memory_adapter_native_cpu.match | 17 +++- .../memory/memory_adapter_opencl.match | 2 - test/conformance/memory/urMemBufferCreate.cpp | 64 +++++++++---- .../urMemBufferCreateWithNativeHandle.cpp | 93 +++++++++++++++---- .../memory/urMemBufferPartition.cpp | 25 +++-- test/conformance/memory/urMemGetInfo.cpp | 39 ++++++-- test/conformance/memory/urMemImageCreate.cpp | 27 ++++-- .../urMemImageCreateWithNativeHandle.cpp | 31 +++++-- test/conformance/memory/urMemRelease.cpp | 21 +++++ test/conformance/memory/urMemRetain.cpp | 24 ++++- .../testing/include/uur/fixtures.h | 9 +- 27 files changed, 362 insertions(+), 97 deletions(-) delete mode 100644 test/conformance/memory/memory_adapter_opencl.match diff --git a/include/ur_api.h b/include/ur_api.h index cfa122deb4..60d6fc2f70 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -2515,8 +2515,12 @@ typedef enum ur_mem_type_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Memory Information type typedef enum ur_mem_info_t { - UR_MEM_INFO_SIZE = 0, ///< [size_t] actual size of of memory object in bytes - UR_MEM_INFO_CONTEXT = 1, ///< [::ur_context_handle_t] context in which the memory object was created + UR_MEM_INFO_SIZE = 0, ///< [size_t] actual size of of memory object in bytes + UR_MEM_INFO_CONTEXT = 1, ///< [::ur_context_handle_t] context in which the memory object was created + UR_MEM_INFO_REFERENCE_COUNT = 2, ///< [uint32_t] Reference count of the memory object. + ///< The reference count returned should be considered immediately stale. + ///< It is unsuitable for general use in applications. This feature is + ///< provided for identifying memory leaks. /// @cond UR_MEM_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -2650,6 +2654,7 @@ typedef struct ur_image_desc_t { /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR +/// + `pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype` /// + `pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type` /// + `pImageDesc && pImageDesc->numMipLevel != 0` /// + `pImageDesc && pImageDesc->numSamples != 0` @@ -2990,7 +2995,7 @@ urMemImageCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hMemory` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_MEM_INFO_CONTEXT < propName` +/// + `::UR_MEM_INFO_REFERENCE_COUNT < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/include/ur_print.hpp b/include/ur_print.hpp index cfb077177e..09431d4352 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -5631,6 +5631,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_mem_info_t value) { case UR_MEM_INFO_CONTEXT: os << "UR_MEM_INFO_CONTEXT"; break; + case UR_MEM_INFO_REFERENCE_COUNT: + os << "UR_MEM_INFO_REFERENCE_COUNT"; + break; default: os << "unknown enumerator"; break; @@ -5672,6 +5675,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_mem_info_t os << ")"; } break; + case UR_MEM_INFO_REFERENCE_COUNT: { + const uint32_t *tptr = (const uint32_t *)ptr; + if (sizeof(uint32_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/scripts/core/memory.yml b/scripts/core/memory.yml index 0fde537f37..7cc7467da4 100644 --- a/scripts/core/memory.yml +++ b/scripts/core/memory.yml @@ -62,6 +62,11 @@ etors: desc: "[size_t] actual size of of memory object in bytes" - name: CONTEXT desc: "[$x_context_handle_t] context in which the memory object was created" + - name: REFERENCE_COUNT + desc: | + [uint32_t] Reference count of the memory object. + The reference count returned should be considered immediately stale. + It is unsuitable for general use in applications. This feature is provided for identifying memory leaks. --- #-------------------------------------------------------------------------- type: enum desc: "Image channel order info: number of channels and the channel layout" @@ -241,6 +246,7 @@ returns: - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_INVALID_VALUE - $X_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR: + - "`pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype`" - "`pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type`" - "`pImageDesc && pImageDesc->numMipLevel != 0`" - "`pImageDesc && pImageDesc->numSamples != 0`" diff --git a/source/adapters/cuda/memory.cpp b/source/adapters/cuda/memory.cpp index ea55c1669a..8b4db742ac 100644 --- a/source/adapters/cuda/memory.cpp +++ b/source/adapters/cuda/memory.cpp @@ -171,6 +171,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, case UR_MEM_INFO_CONTEXT: { return ReturnValue(hMemory->getContext()); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return ReturnValue(hMemory->getReferenceCount()); + } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/hip/memory.cpp b/source/adapters/hip/memory.cpp index aa7b5f4040..93d8450862 100644 --- a/source/adapters/hip/memory.cpp +++ b/source/adapters/hip/memory.cpp @@ -231,10 +231,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, size_t propSize, void *pMemInfo, size_t *pPropSizeRet) { - - UR_ASSERT(MemInfoType <= UR_MEM_INFO_CONTEXT, - UR_RESULT_ERROR_INVALID_ENUMERATION); - // FIXME: Only getting info for the first device in the context. This // should be fine in general auto Device = hMemory->getContext()->getDevices()[0]; @@ -286,6 +282,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, case UR_MEM_INFO_CONTEXT: { return ReturnValue(hMemory->getContext()); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return ReturnValue(hMemory->getReferenceCount()); + } default: return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -316,14 +315,18 @@ urMemGetNativeHandle(ur_mem_handle_t hMem, ur_device_handle_t Device, return UR_RESULT_ERROR_INVALID_MEM_OBJECT; } } - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).getPtr(Device)); -#elif defined(__HIP_PLATFORM_AMD__) - *phNativeMem = reinterpret_cast( - std::get(hMem->Mem).getPtr(Device)); -#else +#elif !defined(__HIP_PLATFORM_AMD__) #error("Must define exactly one of __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__"); #endif + if (std::holds_alternative(hMem->Mem)) { + *phNativeMem = reinterpret_cast( + std::get(hMem->Mem).getPtr(Device)); + } else if (std::holds_alternative(hMem->Mem)) { + *phNativeMem = reinterpret_cast( + std::get(hMem->Mem).getSurface(Device)); + } else { + return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 706a3f1364..cd12800bdd 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -1844,9 +1844,6 @@ ur_result_t urMemGetInfo( size_t *PropSizeRet ///< [out][optional] pointer to the actual size in ///< bytes of data queried by pMemInfo. ) { - UR_ASSERT(MemInfoType == UR_MEM_INFO_CONTEXT || !Memory->isImage(), - UR_RESULT_ERROR_INVALID_VALUE); - auto Buffer = reinterpret_cast<_ur_buffer *>(Memory); std::shared_lock Lock(Buffer->Mutex); UrReturnHelper ReturnValue(PropSize, MemInfo, PropSizeRet); @@ -1859,8 +1856,11 @@ ur_result_t urMemGetInfo( // Get size of the allocation return ReturnValue(size_t{Buffer->Size}); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return ReturnValue(Buffer->RefCount.load()); + } default: { - die("urMemGetInfo: Parameter is not implemented"); + return UR_RESULT_ERROR_INVALID_ENUMERATION; } } diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp index a70792969b..05a25aa256 100644 --- a/source/adapters/level_zero/v2/memory.cpp +++ b/source/adapters/level_zero/v2/memory.cpp @@ -522,6 +522,9 @@ ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, // Get size of the allocation return returnValue(size_t{hMemory->getSize()}); } + case UR_MEM_INFO_REFERENCE_COUNT: { + return returnValue(hMemory->RefCount.load()); + } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index b2476fc420..201df1f678 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -186,6 +186,8 @@ cl_int mapURMemInfoToCL(ur_mem_info_t URPropName) { return CL_MEM_SIZE; case UR_MEM_INFO_CONTEXT: return CL_MEM_CONTEXT; + case UR_MEM_INFO_REFERENCE_COUNT: + return CL_MEM_REFERENCE_COUNT; default: return -1; } diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index da0b659bec..cba90ee152 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -154,6 +154,7 @@ urGetMemProcAddrTable(ur_api_version_t Version, ur_mem_dditable_t *pDdiTable) { pDdiTable->pfnBufferPartition = urMemBufferPartition; pDdiTable->pfnBufferCreateWithNativeHandle = urMemBufferCreateWithNativeHandle; + pDdiTable->pfnImageCreateWithNativeHandle = urMemImageCreateWithNativeHandle; pDdiTable->pfnGetInfo = urMemGetInfo; pDdiTable->pfnGetNativeHandle = urMemGetNativeHandle; pDdiTable->pfnImageCreate = urMemImageCreate; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ef7bb019ea..fdfce7951b 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -1108,6 +1108,10 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreate( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + if (pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype) { + return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; + } + if (pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type) { return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; } @@ -1507,7 +1511,7 @@ __urdlllocal ur_result_t UR_APICALL urMemGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_MEM_INFO_CONTEXT < propName) { + if (UR_MEM_INFO_REFERENCE_COUNT < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 45ee5e7531..9a8e4c2e12 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -1541,6 +1541,7 @@ ur_result_t UR_APICALL urContextSetExtendedDeleter( /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR +/// + `pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype` /// + `pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type` /// + `pImageDesc && pImageDesc->numMipLevel != 0` /// + `pImageDesc && pImageDesc->numSamples != 0` @@ -1890,7 +1891,7 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hMemory` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_MEM_INFO_CONTEXT < propName` +/// + `::UR_MEM_INFO_REFERENCE_COUNT < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 140a20135b..92b02b7176 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -1342,6 +1342,7 @@ ur_result_t UR_APICALL urContextSetExtendedDeleter( /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR +/// + `pImageDesc && UR_STRUCTURE_TYPE_IMAGE_DESC != pImageDesc->stype` /// + `pImageDesc && UR_MEM_TYPE_IMAGE1D_ARRAY < pImageDesc->type` /// + `pImageDesc && pImageDesc->numMipLevel != 0` /// + `pImageDesc && pImageDesc->numSamples != 0` @@ -1636,7 +1637,7 @@ ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hMemory` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_MEM_INFO_CONTEXT < propName` +/// + `::UR_MEM_INFO_REFERENCE_COUNT < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/test/conformance/memory/memory_adapter_cuda.match b/test/conformance/memory/memory_adapter_cuda.match index c5b70e8559..bc36329e55 100644 --- a/test/conformance/memory/memory_adapter_cuda.match +++ b/test/conformance/memory/memory_adapter_cuda.match @@ -2,3 +2,6 @@ urMemImageCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_ {{OPT}}urMemImageCremBufferCrateTestWith1DMemoryTypeParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_TYPE_IMAGE1D_ARRAY {{OPT}}urMemImageCreateTestWith2DMemoryTypeParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_TYPE_IMAGE2D_ARRAY +urMemBufferCreateWithNativeHandleTest.Success/NVIDIA_CUDA_BACKEND___{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/NVIDIA_CUDA_BACKEND___{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/NVIDIA_CUDA_BACKEND___{{.*}} diff --git a/test/conformance/memory/memory_adapter_hip.match b/test/conformance/memory/memory_adapter_hip.match index 589542df7f..4c83995b4f 100644 --- a/test/conformance/memory/memory_adapter_hip.match +++ b/test/conformance/memory/memory_adapter_hip.match @@ -2,3 +2,7 @@ urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}} urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} +urMemBufferCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/AMD_HIP_BACKEND___{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/AMD_HIP_BACKEND___{{.*}} +urMemImageCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}} diff --git a/test/conformance/memory/memory_adapter_level_zero.match b/test/conformance/memory/memory_adapter_level_zero.match index 8dbd2ac9db..f25d8f23f3 100644 --- a/test/conformance/memory/memory_adapter_level_zero.match +++ b/test/conformance/memory/memory_adapter_level_zero.match @@ -1,5 +1,7 @@ # Note: This file is only for use with cts_exe.py {{OPT}}urMemBufferMultiQueueMemBufferTest.WriteBack +urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__UR_MEM_FLAG_WRITE_ONLY +urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__UR_MEM_FLAG_READ_ONLY urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE diff --git a/test/conformance/memory/memory_adapter_level_zero_v2.match b/test/conformance/memory/memory_adapter_level_zero_v2.match index 89f708aae1..a0a5823fbc 100644 --- a/test/conformance/memory/memory_adapter_level_zero_v2.match +++ b/test/conformance/memory/memory_adapter_level_zero_v2.match @@ -1,6 +1,12 @@ {{NONDETERMINISTIC}} +urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_WRITE_ONLY +urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_ONLY +urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_WRITE +urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE {{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT +{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_REFERENCE_COUNT {{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 {{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 {{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 @@ -275,3 +281,11 @@ {{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH {{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT {{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH +urMemBufferCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemImageCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} diff --git a/test/conformance/memory/memory_adapter_native_cpu.match b/test/conformance/memory/memory_adapter_native_cpu.match index 5bdd88804b..f22ebcc2e9 100644 --- a/test/conformance/memory/memory_adapter_native_cpu.match +++ b/test/conformance/memory/memory_adapter_native_cpu.match @@ -1,10 +1,12 @@ {{NONDETERMINISTIC}} +urMemBufferPartitionWithFlagsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_FLAG_WRITE_ONLY +urMemBufferPartitionWithFlagsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_FLAG_READ_ONLY urMemBufferPartitionTest.InvalidValueCreateType/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_SIZE -urMemGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_CONTEXT -urMemGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_SIZE -urMemGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_CONTEXT +urMemGetInfoTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_SIZE +urMemGetInfoTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_CONTEXT +urMemGetInfoTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_REFERENCE_COUNT +urMemGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 @@ -232,3 +234,10 @@ urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_C urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT urMemReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urMemRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemReleaseTest.CheckReferenceCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemRetainTest.CheckReferenceCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemBufferCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/memory/memory_adapter_opencl.match b/test/conformance/memory/memory_adapter_opencl.match deleted file mode 100644 index b57e3876d0..0000000000 --- a/test/conformance/memory/memory_adapter_opencl.match +++ /dev/null @@ -1,2 +0,0 @@ -{{NONDETERMINISTIC}} -urMemImageCreateTest.InvalidImageDescStype/Intel_R__OpenCL___{{.*}} diff --git a/test/conformance/memory/urMemBufferCreate.cpp b/test/conformance/memory/urMemBufferCreate.cpp index 2e9b46114d..df90c4b63a 100644 --- a/test/conformance/memory/urMemBufferCreate.cpp +++ b/test/conformance/memory/urMemBufferCreate.cpp @@ -31,6 +31,19 @@ TEST_P(urMemBufferCreateWithFlagsTest, InvalidNullHandleContext) { urMemBufferCreate(nullptr, getParam(), 4096, nullptr, buffer.ptr())); } +TEST_P(urMemBufferCreateWithFlagsTest, InvalidNullPointerBuffer) { + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_POINTER, + urMemBufferCreate(context, getParam(), 4096, nullptr, nullptr)); +} + +TEST_P(urMemBufferCreateWithFlagsTest, InvalidBufferSizeZero) { + uur::raii::Mem buffer = nullptr; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_BUFFER_SIZE, + urMemBufferCreate(context, getParam(), 0, nullptr, buffer.ptr())); +} + using urMemBufferCreateTest = uur::urContextTest; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferCreateTest); @@ -41,6 +54,37 @@ TEST_P(urMemBufferCreateTest, InvalidEnumerationFlags) { nullptr, buffer.ptr())); } +TEST_P(urMemBufferCreateTest, InvalidHostPtrNullProperties) { + uur::raii::Mem buffer = nullptr; + ur_mem_flags_t flags = + UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemBufferCreate(context, flags, 4096, nullptr, buffer.ptr())); +} + +TEST_P(urMemBufferCreateTest, InvalidHostPtrNullHost) { + uur::raii::Mem buffer = nullptr; + ur_mem_flags_t flags = + UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + ur_buffer_properties_t properties; + properties.pHost = nullptr; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemBufferCreate(context, flags, 4096, &properties, buffer.ptr())); +} + +TEST_P(urMemBufferCreateTest, InvalidHostPtrValidHost) { + uur::raii::Mem buffer = nullptr; + ur_mem_flags_t flags = 0; + ur_buffer_properties_t properties; + int data = 42; + properties.pHost = &data; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemBufferCreate(context, flags, 4096, &properties, buffer.ptr())); +} + using urMemBufferCreateWithHostPtrFlagsTest = urMemBufferCreateTestWithFlagsParam; UUR_TEST_SUITE_P(urMemBufferCreateWithHostPtrFlagsTest, @@ -59,23 +103,3 @@ TEST_P(urMemBufferCreateWithHostPtrFlagsTest, SUCCESS) { ASSERT_SUCCESS(urMemBufferCreate(context, getParam(), 4096, &properties, buffer.ptr())); } - -TEST_P(urMemBufferCreateWithHostPtrFlagsTest, InvalidHostPtr) { - uur::raii::Mem buffer = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_HOST_PTR, - urMemBufferCreate(context, getParam(), 4096, nullptr, buffer.ptr())); -} - -TEST_P(urMemBufferCreateWithFlagsTest, InvalidNullPointerBuffer) { - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_NULL_POINTER, - urMemBufferCreate(context, getParam(), 4096, nullptr, nullptr)); -} - -TEST_P(urMemBufferCreateWithFlagsTest, InvalidBufferSizeZero) { - uur::raii::Mem buffer = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_BUFFER_SIZE, - urMemBufferCreate(context, getParam(), 0, nullptr, buffer.ptr())); -} diff --git a/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp b/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp index 8b98076cf6..78e2510369 100644 --- a/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp +++ b/test/conformance/memory/urMemBufferCreateWithNativeHandle.cpp @@ -11,23 +11,15 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferCreateWithNativeHandleTest); TEST_P(urMemBufferCreateWithNativeHandleTest, Success) { ur_native_handle_t hNativeMem = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urMemGetNativeHandle(buffer, device, &hNativeMem)); - } + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &hNativeMem)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. // We can however convert the native_handle back into a unified-runtime handle // and perform some query on it to verify that it works. ur_mem_handle_t mem = nullptr; - ur_mem_native_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, - /*.pNext =*/nullptr, - /*.isNativeHandleOwned =*/false, - }; - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urMemBufferCreateWithNativeHandle(hNativeMem, context, &props, &mem)); + ASSERT_SUCCESS( + urMemBufferCreateWithNativeHandle(hNativeMem, context, nullptr, &mem)); ASSERT_NE(mem, nullptr); size_t alloc_size = 0; @@ -37,12 +29,81 @@ TEST_P(urMemBufferCreateWithNativeHandleTest, Success) { ASSERT_SUCCESS(urMemRelease(mem)); } -using urMemBufferMultiQueueMemBufferTest = uur::urMultiDeviceMemBufferQueueTest; +TEST_P(urMemBufferCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &native_handle)); + + ur_mem_handle_t mem = nullptr; + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/true, + }; + ASSERT_SUCCESS(urMemBufferCreateWithNativeHandle(native_handle, context, + &props, &mem)); + ASSERT_NE(nullptr, mem); + ur_context_handle_t mem_context = nullptr; + ASSERT_SUCCESS(urMemGetInfo(mem, UR_MEM_INFO_CONTEXT, + sizeof(ur_context_handle_t), &mem_context, + nullptr)); + ASSERT_EQ(context, mem_context); +} + +TEST_P(urMemBufferCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &native_handle)); + + ur_mem_handle_t mem = nullptr; + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/false, + }; + ASSERT_SUCCESS(urMemBufferCreateWithNativeHandle(native_handle, context, + &props, &mem)); + ASSERT_NE(nullptr, mem); + + ur_context_handle_t mem_context = nullptr; + ASSERT_SUCCESS(urMemGetInfo(mem, UR_MEM_INFO_CONTEXT, + sizeof(ur_context_handle_t), &mem_context, + nullptr)); + ASSERT_EQ(context, mem_context); +} + +TEST_P(urMemBufferCreateWithNativeHandleTest, InvalidNullHandle) { + ur_native_handle_t hNativeMem = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &hNativeMem)); + + ur_mem_handle_t mem = nullptr; + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/false, + }; + ASSERT_EQ( + urMemBufferCreateWithNativeHandle(hNativeMem, nullptr, &props, &mem), + UR_RESULT_ERROR_INVALID_NULL_HANDLE); +} + +TEST_P(urMemBufferCreateWithNativeHandleTest, InvalidNullPointer) { + ur_native_handle_t hNativeMem = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(buffer, device, &hNativeMem)); + + ur_mem_native_properties_t props = { + /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, + /*.pNext =*/nullptr, + /*.isNativeHandleOwned =*/false, + }; + ASSERT_EQ( + urMemBufferCreateWithNativeHandle(hNativeMem, context, &props, nullptr), + UR_RESULT_ERROR_INVALID_NULL_POINTER); +} + +using urMemBufferMultiQueueMemBufferTest = uur::urMultiDeviceMemBufferQueueTest; TEST_F(urMemBufferMultiQueueMemBufferTest, WriteBack) { void *ptr; ASSERT_SUCCESS(urUSMHostAlloc(context, nullptr, nullptr, size, &ptr)); - ur_mem_handle_t mem = nullptr; ur_mem_native_properties_t props = { /*.stype =*/UR_STRUCTURE_TYPE_MEM_NATIVE_PROPERTIES, @@ -54,26 +115,20 @@ TEST_F(urMemBufferMultiQueueMemBufferTest, WriteBack) { reinterpret_cast(ptr), context, &props, &mem)); } ASSERT_NE(mem, nullptr); - const uint8_t pattern = 0x11; std::vector src(size, pattern); - // write data to the buffer and destroy the buffer ASSERT_SUCCESS(urEnqueueMemBufferWrite(queues[1], mem, true, 0, size, src.data(), 0, nullptr, nullptr)); ASSERT_SUCCESS(urMemRelease(mem)); - // Create the buffer again and read back the data, data should have been written to the // memory behind the native handle. Use different queue to test data migration logic. ASSERT_SUCCESS(urMemBufferCreateWithNativeHandle( reinterpret_cast(ptr), context, &props, &mem)); ASSERT_NE(mem, nullptr); - std::vector dst(size, 0); ASSERT_SUCCESS(urEnqueueMemBufferRead(queues[0], mem, true, 0, size, dst.data(), 0, nullptr, nullptr)); - ASSERT_EQ(src, dst); - ASSERT_SUCCESS(urMemRelease(mem)); } diff --git a/test/conformance/memory/urMemBufferPartition.cpp b/test/conformance/memory/urMemBufferPartition.cpp index 01ab7f8c62..fca20693c7 100644 --- a/test/conformance/memory/urMemBufferPartition.cpp +++ b/test/conformance/memory/urMemBufferPartition.cpp @@ -6,19 +6,32 @@ #include "uur/fixtures.h" #include "uur/raii.h" -using urMemBufferPartitionTest = uur::urMemBufferTest; -UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferPartitionTest); +using urMemBufferPartitionWithFlagsTest = + uur::urContextTestWithParam; +UUR_TEST_SUITE_P(urMemBufferPartitionWithFlagsTest, + ::testing::Values(UR_MEM_FLAG_READ_WRITE, + UR_MEM_FLAG_WRITE_ONLY, + UR_MEM_FLAG_READ_ONLY), + uur::deviceTestWithParamPrinter); -TEST_P(urMemBufferPartitionTest, Success) { - ur_buffer_region_t region{UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, 0, - 1024}; +TEST_P(urMemBufferPartitionWithFlagsTest, Success) { + uur::raii::Mem buffer = nullptr; + + ASSERT_SUCCESS( + urMemBufferCreate(context, getParam(), 1024, nullptr, buffer.ptr())); + ASSERT_NE(nullptr, buffer); + + ur_buffer_region_t region{UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, 0, 512}; uur::raii::Mem partition = nullptr; - ASSERT_SUCCESS(urMemBufferPartition(buffer, UR_MEM_FLAG_READ_WRITE, + ASSERT_SUCCESS(urMemBufferPartition(buffer, getParam(), UR_BUFFER_CREATE_TYPE_REGION, ®ion, partition.ptr())); ASSERT_NE(partition, nullptr); } +using urMemBufferPartitionTest = uur::urMemBufferTest; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemBufferPartitionTest); + TEST_P(urMemBufferPartitionTest, InvalidNullHandleBuffer) { ur_buffer_region_t region{UR_STRUCTURE_TYPE_BUFFER_REGION, nullptr, 0, 1024}; diff --git a/test/conformance/memory/urMemGetInfo.cpp b/test/conformance/memory/urMemGetInfo.cpp index a28b581a26..3f933d39a9 100644 --- a/test/conformance/memory/urMemGetInfo.cpp +++ b/test/conformance/memory/urMemGetInfo.cpp @@ -6,19 +6,21 @@ #include #include -using urMemGetInfoTest = uur::urMemBufferTestWithParam; +using urMemGetInfoTestWithParam = uur::urMemBufferTestWithParam; -static constexpr std::array mem_info_values{ - UR_MEM_INFO_SIZE, UR_MEM_INFO_CONTEXT}; +static constexpr std::array mem_info_values{ + UR_MEM_INFO_SIZE, UR_MEM_INFO_CONTEXT, UR_MEM_INFO_REFERENCE_COUNT}; static std::unordered_map mem_info_size_map = { {UR_MEM_INFO_SIZE, sizeof(size_t)}, {UR_MEM_INFO_CONTEXT, sizeof(ur_context_handle_t)}, + {UR_MEM_INFO_REFERENCE_COUNT, sizeof(uint32_t)}, }; -UUR_TEST_SUITE_P(urMemGetInfoTest, ::testing::ValuesIn(mem_info_values), +UUR_TEST_SUITE_P(urMemGetInfoTestWithParam, + ::testing::ValuesIn(mem_info_values), uur::deviceTestWithParamPrinter); -TEST_P(urMemGetInfoTest, Success) { +TEST_P(urMemGetInfoTestWithParam, Success) { ur_mem_info_t info = getParam(); size_t size; ASSERT_SUCCESS(urMemGetInfo(buffer, info, 0, nullptr, &size)); @@ -44,11 +46,20 @@ TEST_P(urMemGetInfoTest, Success) { ASSERT_GE(*returned_size, allocation_size); break; } + case UR_MEM_INFO_REFERENCE_COUNT: { + const size_t ReferenceCount = + *reinterpret_cast(info_data.data()); + ASSERT_GT(ReferenceCount, 0); + break; + } default: break; } } +using urMemGetInfoTest = uur::urMemBufferTest; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemGetInfoTest); + TEST_P(urMemGetInfoTest, InvalidNullHandleMemory) { size_t mem_size = 0; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, @@ -108,16 +119,30 @@ TEST_P(urMemGetInfoImageTest, Success) { std::vector info_data(size); ASSERT_SUCCESS(urMemGetInfo(image, info, size, info_data.data(), nullptr)); - if (info == UR_MEM_INFO_SIZE) { + switch (info) { + case UR_MEM_INFO_SIZE: { const size_t ExpectedPixelSize = sizeof(float) * 4 /*NumChannels*/; const size_t ExpectedImageSize = ExpectedPixelSize * desc.arraySize * desc.width * desc.height * desc.depth; const size_t ImageSizeBytes = *reinterpret_cast(info_data.data()); ASSERT_EQ(ImageSizeBytes, ExpectedImageSize); - } else if (info == UR_MEM_INFO_CONTEXT) { + break; + } + case UR_MEM_INFO_CONTEXT: { ur_context_handle_t InfoContext = *reinterpret_cast(info_data.data()); ASSERT_EQ(InfoContext, context); + break; + } + case UR_MEM_INFO_REFERENCE_COUNT: { + const size_t ReferenceCount = + *reinterpret_cast(info_data.data()); + ASSERT_GT(ReferenceCount, 0); + break; + } + + default: + break; } } diff --git a/test/conformance/memory/urMemImageCreate.cpp b/test/conformance/memory/urMemImageCreate.cpp index 28d5d9c4e3..bfb4f6e719 100644 --- a/test/conformance/memory/urMemImageCreate.cpp +++ b/test/conformance/memory/urMemImageCreate.cpp @@ -286,6 +286,25 @@ TEST_P(urMemImageCreateTest, InvalidImageDescSlicePitch) { nullptr, image_handle.ptr())); } +TEST_P(urMemImageCreateTest, InvalidHostPtrNullHost) { + uur::raii::Mem image_handle = nullptr; + ur_mem_flags_t flags = + UR_MEM_FLAG_USE_HOST_POINTER | UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER; + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemImageCreate(context, flags, &image_format, + &image_desc, nullptr, + image_handle.ptr())); +} + +TEST_P(urMemImageCreateTest, InvalidHostPtrValidHost) { + uur::raii::Mem image_handle = nullptr; + ur_mem_flags_t flags = 0; + int data = 42; + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_HOST_PTR, + urMemImageCreate(context, flags, &image_format, + &image_desc, &data, image_handle.ptr())); +} + using urMemImageCreateWithHostPtrFlagsTest = urMemImageCreateTestWithParam; @@ -306,11 +325,3 @@ TEST_P(urMemImageCreateWithHostPtrFlagsTest, Success) { image_handle.ptr())); ASSERT_NE(nullptr, image_handle.ptr()); } - -TEST_P(urMemImageCreateWithHostPtrFlagsTest, InvalidHostPtr) { - uur::raii::Mem image_handle = nullptr; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_HOST_PTR, - urMemImageCreate(context, getParam(), &image_format, - &image_desc, nullptr, - image_handle.ptr())); -} diff --git a/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp b/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp index 3404b4203f..c33cc814a3 100644 --- a/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp +++ b/test/conformance/memory/urMemImageCreateWithNativeHandle.cpp @@ -10,15 +10,11 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemImageCreateWithNativeHandleTest); TEST_P(urMemImageCreateWithNativeHandleTest, Success) { ur_native_handle_t native_handle = 0; - if (urMemGetNativeHandle(image, device, &native_handle)) { - GTEST_SKIP(); - } + ASSERT_SUCCESS(urMemGetNativeHandle(image, device, &native_handle)); ur_mem_handle_t mem = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urMemImageCreateWithNativeHandle(native_handle, context, &image_format, - &image_desc, nullptr, &mem)); + ASSERT_SUCCESS(urMemImageCreateWithNativeHandle( + native_handle, context, &image_format, &image_desc, nullptr, &mem)); ASSERT_NE(nullptr, mem); ur_context_handle_t mem_context = nullptr; @@ -27,3 +23,24 @@ TEST_P(urMemImageCreateWithNativeHandleTest, Success) { nullptr)); ASSERT_EQ(context, mem_context); } + +TEST_P(urMemImageCreateWithNativeHandleTest, InvalidNullHandle) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(image, device, &native_handle)); + + ur_mem_handle_t mem = nullptr; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_HANDLE, + urMemImageCreateWithNativeHandle(native_handle, nullptr, &image_format, + &image_desc, nullptr, &mem)); +} + +TEST_P(urMemImageCreateWithNativeHandleTest, InvalidNullPointer) { + ur_native_handle_t native_handle = 0; + ASSERT_SUCCESS(urMemGetNativeHandle(image, device, &native_handle)); + + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_NULL_POINTER, + urMemImageCreateWithNativeHandle(native_handle, context, &image_format, + &image_desc, nullptr, nullptr)); +} diff --git a/test/conformance/memory/urMemRelease.cpp b/test/conformance/memory/urMemRelease.cpp index 3e84142638..730b6aa854 100644 --- a/test/conformance/memory/urMemRelease.cpp +++ b/test/conformance/memory/urMemRelease.cpp @@ -16,3 +16,24 @@ TEST_P(urMemReleaseTest, InvalidNullHandleMem) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urMemRelease(nullptr)); } + +TEST_P(urMemReleaseTest, CheckReferenceCount) { + uint32_t referenceCount = 0; + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); + + ASSERT_SUCCESS(urMemRetain(buffer)); + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 2); + + ASSERT_SUCCESS(urMemRelease(buffer)); + + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); +} diff --git a/test/conformance/memory/urMemRetain.cpp b/test/conformance/memory/urMemRetain.cpp index 895d68097e..a58896a91b 100644 --- a/test/conformance/memory/urMemRetain.cpp +++ b/test/conformance/memory/urMemRetain.cpp @@ -9,9 +9,31 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urMemRetainTest); TEST_P(urMemRetainTest, Success) { ASSERT_SUCCESS(urMemRetain(buffer)); - EXPECT_SUCCESS(urMemRelease(buffer)); + ASSERT_SUCCESS(urMemRelease(buffer)); } TEST_P(urMemRetainTest, InvalidNullHandleMem) { ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, urMemRetain(nullptr)); } + +TEST_P(urMemRetainTest, CheckReferenceCount) { + uint32_t referenceCount = 0; + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); + + { ASSERT_SUCCESS(urMemRetain(buffer)); } + + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 2); + + ASSERT_SUCCESS(urMemRelease(buffer)); + + ASSERT_SUCCESS(urMemGetInfo(buffer, UR_MEM_INFO_REFERENCE_COUNT, + sizeof(referenceCount), &referenceCount, + nullptr)); + ASSERT_EQ(referenceCount, 1); +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index d1b373af84..80b6db5268 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -205,6 +205,9 @@ struct urMemImageTest : urContextTest { if (!imageSupported) { GTEST_SKIP(); } + ASSERT_SUCCESS(urMemImageCreate(context, UR_MEM_FLAG_READ_WRITE, + &image_format, &image_desc, nullptr, + &image)); } void TearDown() override { @@ -215,7 +218,7 @@ struct urMemImageTest : urContextTest { } ur_image_format_t image_format = { - /*.channelOrder =*/UR_IMAGE_CHANNEL_ORDER_ARGB, + /*.channelOrder =*/UR_IMAGE_CHANNEL_ORDER_RGBA, /*.channelType =*/UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, }; ur_image_desc_t image_desc = { @@ -226,8 +229,8 @@ struct urMemImageTest : urContextTest { /*.height =*/16, /*.depth =*/1, /*.arraySize =*/1, - /*.rowPitch =*/16 * sizeof(char[4]), - /*.slicePitch =*/16 * 16 * sizeof(char[4]), + /*.rowPitch =*/0, + /*.slicePitch =*/0, /*.numMipLevel =*/0, /*.numSamples =*/0, }; From 65edd6b88855960d8c350e28505f438a71275946 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Fri, 25 Oct 2024 17:43:27 +0100 Subject: [PATCH 08/37] Check if properties param is null in L0 urMemBufferCreateWithNativeHandle and set OwnNativeHandle to false by default. --- source/adapters/level_zero/memory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index cd12800bdd..456ad56b80 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -1746,7 +1746,7 @@ ur_result_t urMemBufferCreateWithNativeHandle( ur_mem_handle_t *Mem ///< [out] pointer to handle of buffer memory object created. ) { - bool OwnNativeHandle = Properties->isNativeHandleOwned; + bool OwnNativeHandle = Properties ? Properties->isNativeHandleOwned : false; std::shared_lock Lock(Context->Mutex); From 827e857162f6ae0f6e7749b55195f1cfba14c4b2 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Mon, 28 Oct 2024 09:26:56 +0000 Subject: [PATCH 09/37] Add urMemImageCreateWithNativeHandleTest.Success to L0 match file. --- source/adapters/level_zero/v2/memory.cpp | 2 +- test/conformance/memory/memory_adapter_level_zero.match | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp index 05a25aa256..52bfea42a4 100644 --- a/source/adapters/level_zero/v2/memory.cpp +++ b/source/adapters/level_zero/v2/memory.cpp @@ -523,7 +523,7 @@ ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, return returnValue(size_t{hMemory->getSize()}); } case UR_MEM_INFO_REFERENCE_COUNT: { - return returnValue(hMemory->RefCount.load()); + return returnValue(hMemory->getRefCount().load()); } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/test/conformance/memory/memory_adapter_level_zero.match b/test/conformance/memory/memory_adapter_level_zero.match index f25d8f23f3..bce63823cc 100644 --- a/test/conformance/memory/memory_adapter_level_zero.match +++ b/test/conformance/memory/memory_adapter_level_zero.match @@ -4,6 +4,7 @@ urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_L urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__UR_MEM_FLAG_READ_ONLY urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urMemImageCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} {{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE {{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 {{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 From dd920712a37c17fada6a64d4736350905faf6023 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Wed, 6 Nov 2024 11:39:24 +0000 Subject: [PATCH 10/37] Add {{OPT}} to various L0V2 match entries --- .../memory/memory_adapter_level_zero_v2.match | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/conformance/memory/memory_adapter_level_zero_v2.match b/test/conformance/memory/memory_adapter_level_zero_v2.match index a0a5823fbc..3ff57fc1e5 100644 --- a/test/conformance/memory/memory_adapter_level_zero_v2.match +++ b/test/conformance/memory/memory_adapter_level_zero_v2.match @@ -1,9 +1,9 @@ {{NONDETERMINISTIC}} -urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_WRITE_ONLY -urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_ONLY -urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_WRITE -urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_WRITE_ONLY +{{OPT}}urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_ONLY +{{OPT}}urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_WRITE +{{OPT}}urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE {{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT {{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_REFERENCE_COUNT @@ -281,11 +281,11 @@ urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI {{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH {{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT {{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -urMemBufferCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemBufferCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} {{OPT}}urMemImageCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} {{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} {{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} From e02bd829db426160f45439d8d7e48a88e89ed39d Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Mon, 28 Oct 2024 08:35:50 +0000 Subject: [PATCH 11/37] Revert "Revert "[NATIVECPU] Initial implementation of events on Native CPU"" This reverts commit 93e2a10c954f39d292b9f6c0ada8a3ad17af536d. --- source/adapters/native_cpu/common.hpp | 29 ++++ source/adapters/native_cpu/context.hpp | 18 +-- source/adapters/native_cpu/device.cpp | 15 +- source/adapters/native_cpu/enqueue.cpp | 142 +++++++++++------- source/adapters/native_cpu/event.cpp | 108 ++++++++++--- source/adapters/native_cpu/event.hpp | 66 ++++++++ source/adapters/native_cpu/kernel.cpp | 18 +-- source/adapters/native_cpu/kernel.hpp | 92 +++++++++--- source/adapters/native_cpu/queue.cpp | 9 +- source/adapters/native_cpu/queue.hpp | 44 +++++- .../event/event_adapter_native_cpu.match | 8 - .../queue/queue_adapter_native_cpu.match | 1 - 12 files changed, 399 insertions(+), 151 deletions(-) create mode 100644 source/adapters/native_cpu/event.hpp diff --git a/source/adapters/native_cpu/common.hpp b/source/adapters/native_cpu/common.hpp index 2b4aabfbad..af0d11c5af 100644 --- a/source/adapters/native_cpu/common.hpp +++ b/source/adapters/native_cpu/common.hpp @@ -12,6 +12,7 @@ #include "logger/ur_logger.hpp" #include "ur/ur.hpp" +#include constexpr size_t MaxMessageSize = 256; @@ -70,3 +71,31 @@ template inline void decrementOrDelete(T *refC) { if (refC->decrementReferenceCount() == 0) delete refC; } + +inline uint64_t get_timestamp() { + return std::chrono::duration_cast( + std::chrono::high_resolution_clock::now().time_since_epoch()) + .count(); +} + +namespace native_cpu { + +inline void *aligned_malloc(size_t alignment, size_t size) { + void *ptr = nullptr; +#ifdef _MSC_VER + ptr = _aligned_malloc(size, alignment); +#else + ptr = std::aligned_alloc(alignment, size); +#endif + return ptr; +} + +inline void aligned_free(void *ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + +} // namespace native_cpu diff --git a/source/adapters/native_cpu/context.hpp b/source/adapters/native_cpu/context.hpp index c59ab4eafb..b9d2d22dd1 100644 --- a/source/adapters/native_cpu/context.hpp +++ b/source/adapters/native_cpu/context.hpp @@ -64,17 +64,10 @@ static size_t get_padding(uint32_t alignment) { // allocation so that the pointer returned to the user // always satisfies (ptr % align) == 0. static inline void *malloc_impl(uint32_t alignment, size_t size) { - void *ptr = nullptr; assert(alignment >= alignof(usm_alloc_info) && "memory not aligned to usm_alloc_info"); -#ifdef _MSC_VER - ptr = _aligned_malloc(alloc_header_size + get_padding(alignment) + size, - alignment); - -#else - ptr = std::aligned_alloc(alignment, - alloc_header_size + get_padding(alignment) + size); -#endif + void *ptr = native_cpu::aligned_malloc( + alignment, alloc_header_size + get_padding(alignment) + size); return ptr; } @@ -100,11 +93,8 @@ struct ur_context_handle_t_ : RefCounted { const native_cpu::usm_alloc_info &info = native_cpu::get_alloc_info(ptr); UR_ASSERT(info.type != UR_USM_TYPE_UNKNOWN, UR_RESULT_ERROR_INVALID_MEM_OBJECT); -#ifdef _MSC_VER - _aligned_free(info.base_alloc_ptr); -#else - free(info.base_alloc_ptr); -#endif + + native_cpu::aligned_free(info.base_alloc_ptr); allocations.erase(ptr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index d744d6290b..258d40ff21 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -10,6 +10,7 @@ #include +#include "common.hpp" #include "platform.hpp" #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) @@ -247,7 +248,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(uint32_t{4}); case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF: return ReturnValue(uint32_t{16}); - // Imported from level_zero case UR_DEVICE_INFO_USM_HOST_SUPPORT: case UR_DEVICE_INFO_USM_DEVICE_SUPPORT: case UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT: @@ -472,19 +472,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { - std::ignore = hDevice; // todo + std::ignore = hDevice; if (pHostTimestamp) { - using namespace std::chrono; - *pHostTimestamp = - duration_cast(steady_clock::now().time_since_epoch()) - .count(); + *pHostTimestamp = get_timestamp(); } if (pDeviceTimestamp) { - // todo: calculate elapsed time properly - using namespace std::chrono; - *pDeviceTimestamp = - duration_cast(steady_clock::now().time_since_epoch()) - .count(); + *pDeviceTimestamp = get_timestamp(); } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp index 33d8c35c36..7e03b323cc 100644 --- a/source/adapters/native_cpu/enqueue.cpp +++ b/source/adapters/native_cpu/enqueue.cpp @@ -13,6 +13,7 @@ #include "ur_api.h" #include "common.hpp" +#include "event.hpp" #include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" @@ -67,10 +68,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + urEventWait(numEventsInWaitList, phEventWaitList); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pGlobalWorkOffset, UR_RESULT_ERROR_INVALID_NULL_POINTER); @@ -103,10 +102,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( } // TODO: add proper error checking - // TODO: add proper event dep management native_cpu::NDRDescT ndr(workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize); - auto &tp = hQueue->device->tp; + auto &tp = hQueue->getDevice()->tp; const size_t numParallelThreads = tp.num_threads(); hKernel->updateMemPool(numParallelThreads); std::vector> futures; @@ -118,6 +116,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ndr.GlobalSize[2], ndr.LocalSize[0], ndr.LocalSize[1], ndr.LocalSize[2], ndr.GlobalOffset[0], ndr.GlobalOffset[1], ndr.GlobalOffset[2]); + auto event = new ur_event_handle_t_(hQueue, UR_COMMAND_KERNEL_LAUNCH); + event->tick_start(); + #ifndef NATIVECPU_USE_OCK hKernel->handleLocalArgs(1, 0); for (unsigned g2 = 0; g2 < numWG2; g2++) { @@ -127,7 +128,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned local1 = 0; local1 < ndr.LocalSize[1]; local1++) { for (unsigned local0 = 0; local0 < ndr.LocalSize[0]; local0++) { state.update(g0, g1, g2, local0, local1, local2); - hKernel->_subhandler(hKernel->_args.data(), &state); + hKernel->_subhandler(hKernel->getArgs().data(), &state); } } } @@ -158,13 +159,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned g2 = 0; g2 < numWG2; g2++) { for (unsigned g1 = 0; g1 < numWG1; g1++) { for (unsigned g0 = 0; g0 < new_num_work_groups_0; g0 += 1) { - futures.emplace_back( - tp.schedule_task([&ndr = std::as_const(ndr), itemsPerThread, - hKernel, g0, g1, g2](size_t) { + futures.emplace_back(tp.schedule_task( + [ndr, itemsPerThread, kernel = *hKernel, g0, g1, g2](size_t) { native_cpu::state resized_state = getResizedState(ndr, itemsPerThread); resized_state.update(g0, g1, g2); - hKernel->_subhandler(hKernel->_args.data(), &resized_state); + kernel._subhandler(kernel.getArgs().data(), &resized_state); })); } // Peel the remaining work items. Since the local size is 1, we iterate @@ -172,7 +172,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned g0 = new_num_work_groups_0 * itemsPerThread; g0 < numWG0; g0++) { state.update(g0, g1, g2); - hKernel->_subhandler(hKernel->_args.data(), &state); + hKernel->_subhandler(hKernel->getArgs().data(), &state); } } } @@ -190,7 +190,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (unsigned g0 = 0; g0 < numWG0; g0++) { kernel.handleLocalArgs(numParallelThreads, threadId); state.update(g0, g1, g2); - kernel._subhandler(kernel._args.data(), &state); + kernel._subhandler(kernel.getArgs().data(), &state); } })); } @@ -207,7 +207,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( size_t threadId, ur_kernel_handle_t_ kernel) mutable { kernel.handleLocalArgs(numParallelThreads, threadId); state.update(g0, g1, g2); - kernel._subhandler(kernel._args.data(), &state); + kernel._subhandler(kernel.getArgs().data(), &state); }); } } @@ -216,11 +216,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( auto groupsPerThread = numGroups / numParallelThreads; auto remainder = numGroups % numParallelThreads; for (unsigned thread = 0; thread < numParallelThreads; thread++) { - futures.emplace_back(tp.schedule_task( - [&groups, thread, groupsPerThread, hKernel](size_t threadId) { + futures.emplace_back( + tp.schedule_task([groups, thread, groupsPerThread, + kernel = *hKernel](size_t threadId) { for (unsigned i = 0; i < groupsPerThread; i++) { auto index = thread * groupsPerThread + i; - groups[index](threadId, *hKernel); + groups[index](threadId, kernel); } })); } @@ -228,25 +229,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( // schedule the remaining tasks if (remainder) { futures.emplace_back( - tp.schedule_task([&groups, remainder, + tp.schedule_task([groups, remainder, scheduled = numParallelThreads * groupsPerThread, - hKernel](size_t threadId) { + kernel = *hKernel](size_t threadId) { for (unsigned i = 0; i < remainder; i++) { auto index = scheduled + i; - groups[index](threadId, *hKernel); + groups[index](threadId, kernel); } })); } } } - for (auto &f : futures) - f.get(); #endif // NATIVECPU_USE_OCK - // TODO: we should avoid calling clear here by avoiding using push_back - // in setKernelArgs. - hKernel->_args.clear(); - hKernel->_localArgInfo.clear(); + event->set_futures(futures); + + *phEvent = event; + event->set_callback([hKernel, event]() { + event->tick_end(); + // TODO: avoid calling clear() here. + hKernel->_localArgInfo.clear(); + }); + + if (hQueue->isInOrder()) { + urEventWait(1, phEvent); + } + return UR_RESULT_SUCCESS; } @@ -274,15 +282,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( template static inline ur_result_t enqueueMemBufferReadWriteRect_impl( - ur_queue_handle_t, ur_mem_handle_t Buff, bool, + ur_queue_handle_t hQueue, ur_mem_handle_t Buff, bool, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, typename std::conditional::type DstMem, - uint32_t, const ur_event_handle_t *, ur_event_handle_t *) { - // TODO: events, blocking, check other constraints, performance optimizations + uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + ur_event_handle_t event; + if constexpr (IsRead) + event = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_READ_RECT); + else + event = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_WRITE_RECT); + event->tick_start(); + // TODO: blocking, check other constraints, performance optimizations // More sharing with level_zero where possible + urEventWait(NumEventsInWaitList, phEventWaitList); if (BufferRowPitch == 0) BufferRowPitch = region.width; if (BufferSlicePitch == 0) @@ -306,21 +322,26 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl( else buff_mem = ur_cast(DstMem)[host_origin]; } + + event->tick_end(); + *phEvent = event; return UR_RESULT_SUCCESS; } static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr, const void *SrcPtr, size_t Size, uint32_t numEventsInWaitList, - const ur_event_handle_t *EventWaitList, - ur_event_handle_t *Event) { - // todo: non-blocking, events, UR integration - std::ignore = EventWaitList; - std::ignore = Event; - std::ignore = hQueue; - std::ignore = numEventsInWaitList; + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + ur_command_t command_type) { + ur_event_handle_t event = new ur_event_handle_t_(hQueue, command_type); + event->tick_start(); + urEventWait(numEventsInWaitList, phEventWaitList); if (SrcPtr != DstPtr && Size) memmove(DstPtr, SrcPtr, Size); + event->tick_end(); + if (phEvent) + *phEvent = event; return UR_RESULT_SUCCESS; } @@ -331,8 +352,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( std::ignore = blockingRead; void *FromPtr = /*Src*/ hBuffer->_mem + offset; - return doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList, - phEventWaitList, phEvent); + auto res = doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ); + return res; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -342,8 +364,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( std::ignore = blockingWrite; void *ToPtr = hBuffer->_mem + offset; - return doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList, - phEventWaitList, phEvent); + auto res = doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE); + return res; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( @@ -377,10 +400,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( ur_mem_handle_t hBufferDst, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + urEventWait(numEventsInWaitList, phEventWaitList); const void *SrcPtr = hBufferSrc->_mem + srcOffset; void *DstPtr = hBufferDst->_mem + dstOffset; return doCopy_impl(hQueue, DstPtr, SrcPtr, size, numEventsInWaitList, - phEventWaitList, phEvent); + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( @@ -484,15 +508,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_map_flags_t mapFlags, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, void **ppRetMap) { - std::ignore = hQueue; std::ignore = blockingMap; std::ignore = mapFlags; std::ignore = size; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + urEventWait(numEventsInWaitList, phEventWaitList); + ur_event_handle_t event = + new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP); + event->tick_start(); *ppRetMap = hBuffer->_mem + offset; + event->tick_end(); + *phEvent = event; return UR_RESULT_SUCCESS; } @@ -501,12 +527,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_queue_handle_t hQueue, ur_mem_handle_t hMem, void *pMappedPtr, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = hMem; std::ignore = pMappedPtr; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + urEventWait(numEventsInWaitList, phEventWaitList); + *phEvent = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_UNMAP); return UR_RESULT_SUCCESS; } @@ -515,10 +539,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, void *ptr, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + urEventWait(numEventsInWaitList, phEventWaitList); + ur_event_handle_t event = + new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP); + event->tick_start(); UR_ASSERT(ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); @@ -564,6 +588,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( } } } + + event->tick_end(); + *phEvent = event; + return UR_RESULT_SUCCESS; } @@ -571,17 +599,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; std::ignore = blocking; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; + urEventWait(numEventsInWaitList, phEventWaitList); + ur_event_handle_t event = + new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP); + event->tick_start(); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE); UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); memcpy(pDst, pSrc, size); + event->tick_end(); + *phEvent = event; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/event.cpp b/source/adapters/native_cpu/event.cpp index 9049e3c1b6..37eaf1f6d1 100644 --- a/source/adapters/native_cpu/event.cpp +++ b/source/adapters/native_cpu/event.cpp @@ -11,50 +11,70 @@ #include "ur_api.h" #include "common.hpp" +#include "event.hpp" +#include "queue.hpp" +#include +#include UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - std::ignore = hEvent; - std::ignore = propName; - std::ignore = propSize; - std::ignore = pPropValue; - std::ignore = pPropSizeRet; - - DIE_NO_IMPLEMENTATION; + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_EVENT_INFO_COMMAND_QUEUE: + return ReturnValue(hEvent->getQueue()); + case UR_EVENT_INFO_COMMAND_TYPE: + return ReturnValue(hEvent->getCommandType()); + case UR_EVENT_INFO_REFERENCE_COUNT: + return ReturnValue(hEvent->getReferenceCount()); + case UR_EVENT_INFO_COMMAND_EXECUTION_STATUS: + return ReturnValue(hEvent->getExecutionStatus()); + case UR_EVENT_INFO_CONTEXT: + return ReturnValue(hEvent->getContext()); + default: + break; + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; } UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( ur_event_handle_t hEvent, ur_profiling_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - std::ignore = hEvent; - std::ignore = propName; - std::ignore = propSize; - std::ignore = pPropValue; - std::ignore = pPropSizeRet; - - DIE_NO_IMPLEMENTATION; + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_PROFILING_INFO_COMMAND_START: + return ReturnValue(hEvent->get_start_timestamp()); + case UR_PROFILING_INFO_COMMAND_END: + return ReturnValue(hEvent->get_end_timestamp()); + case UR_PROFILING_INFO_COMMAND_QUEUED: + case UR_PROFILING_INFO_COMMAND_SUBMIT: + case UR_PROFILING_INFO_COMMAND_COMPLETE: + default: + break; + } + + return UR_RESULT_ERROR_INVALID_ENUMERATION; } UR_APIEXPORT ur_result_t UR_APICALL urEventWait(uint32_t numEvents, const ur_event_handle_t *phEventWaitList) { - std::ignore = numEvents; - std::ignore = phEventWaitList; - // TODO: currently we do everything synchronously so this is a no-op + for (uint32_t i = 0; i < numEvents; i++) { + phEventWaitList[i]->wait(); + } return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRetain(ur_event_handle_t hEvent) { - std::ignore = hEvent; - - DIE_NO_IMPLEMENTATION; + hEvent->incrementReferenceCount(); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { - std::ignore = hEvent; - DIE_NO_IMPLEMENTATION; + decrementOrDelete(hEvent); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEventGetNativeHandle( @@ -99,3 +119,47 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( DIE_NO_IMPLEMENTATION; } + +ur_event_handle_t_::ur_event_handle_t_(ur_queue_handle_t queue, + ur_command_t command_type) + : queue(queue), context(queue->getContext()), command_type(command_type), + done(false) { + this->queue->addEvent(this); +} + +ur_event_handle_t_::~ur_event_handle_t_() { + if (!done) { + wait(); + } +} + +void ur_event_handle_t_::wait() { + std::unique_lock lock(mutex); + if (done) { + return; + } + for (auto &f : futures) { + f.wait(); + } + queue->removeEvent(this); + done = true; + // The callback may need to acquire the lock, so we unlock it here + lock.unlock(); + + if (callback) + callback(); +} + +void ur_event_handle_t_::tick_start() { + if (!queue->isProfiling()) + return; + std::lock_guard lock(mutex); + timestamp_start = get_timestamp(); +} + +void ur_event_handle_t_::tick_end() { + if (!queue->isProfiling()) + return; + std::lock_guard lock(mutex); + timestamp_end = get_timestamp(); +} diff --git a/source/adapters/native_cpu/event.hpp b/source/adapters/native_cpu/event.hpp new file mode 100644 index 0000000000..60176a33a6 --- /dev/null +++ b/source/adapters/native_cpu/event.hpp @@ -0,0 +1,66 @@ +//===----------- event.hpp - Native CPU Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once +#include "common.hpp" +#include "ur_api.h" +#include +#include +#include +#include + +struct ur_event_handle_t_ : RefCounted { + + ur_event_handle_t_(ur_queue_handle_t queue, ur_command_t command_type); + + ~ur_event_handle_t_(); + + void set_callback(const std::function &cb) { callback = cb; } + + void wait(); + + uint32_t getExecutionStatus() { + // TODO: add support for UR_EVENT_STATUS_RUNNING + std::lock_guard lock(mutex); + if (done) { + return UR_EVENT_STATUS_COMPLETE; + } + return UR_EVENT_STATUS_SUBMITTED; + } + + ur_queue_handle_t getQueue() const { return queue; } + + ur_context_handle_t getContext() const { return context; } + + ur_command_t getCommandType() const { return command_type; } + + void set_futures(std::vector> &fs) { + std::lock_guard lock(mutex); + futures = std::move(fs); + } + + void tick_start(); + + void tick_end(); + + uint64_t get_start_timestamp() const { return timestamp_start; } + + uint64_t get_end_timestamp() const { return timestamp_end; } + +private: + ur_queue_handle_t queue; + ur_context_handle_t context; + ur_command_t command_type; + bool done; + std::mutex mutex; + std::vector> futures; + std::function callback; + uint64_t timestamp_start = 0; + uint64_t timestamp_end = 0; +}; diff --git a/source/adapters/native_cpu/kernel.cpp b/source/adapters/native_cpu/kernel.cpp index af8906245c..596a3ffdf1 100644 --- a/source/adapters/native_cpu/kernel.cpp +++ b/source/adapters/native_cpu/kernel.cpp @@ -59,18 +59,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_value_properties_t *pProperties, const void *pArgValue) { - // Todo: error checking - // Todo: I think that the opencl spec (and therefore the pi spec mandates that - // arg is copied (this is why it is defined as const void*, I guess we should - // do it - // TODO: can args arrive out of order? + // TODO: error checking std::ignore = argIndex; std::ignore = pProperties; UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(argSize, UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE); - hKernel->_args.emplace_back(const_cast(pArgValue)); + hKernel->addArg(pArgValue, argIndex, argSize); return UR_RESULT_SUCCESS; } @@ -81,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal( std::ignore = pProperties; // emplace a placeholder kernel arg, gets replaced with a pointer to the // memory pool before enqueueing the kernel. - hKernel->_args.emplace_back(nullptr); + hKernel->addPtrArg(nullptr, argIndex); hKernel->_localArgInfo.emplace_back(argIndex, argSize); return UR_RESULT_SUCCESS; } @@ -221,14 +217,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_pointer_properties_t *pProperties, const void *pArgValue) { - // TODO: out_of_order args? std::ignore = argIndex; std::ignore = pProperties; UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(pArgValue, UR_RESULT_ERROR_INVALID_NULL_POINTER); - hKernel->_args.push_back(const_cast(pArgValue)); + hKernel->addPtrArg(const_cast(pArgValue), argIndex); return UR_RESULT_SUCCESS; } @@ -262,7 +257,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, const ur_kernel_arg_mem_obj_properties_t *pProperties, ur_mem_handle_t hArgValue) { - // TODO: out_of_order args? std::ignore = argIndex; std::ignore = pProperties; @@ -271,11 +265,11 @@ urKernelSetArgMemObj(ur_kernel_handle_t hKernel, uint32_t argIndex, // Taken from ur/adapters/cuda/kernel.cpp // zero-sized buffers are expected to be null. if (hArgValue == nullptr) { - hKernel->_args.emplace_back(nullptr); + hKernel->addPtrArg(nullptr, argIndex); return UR_RESULT_SUCCESS; } - hKernel->_args.emplace_back(hArgValue->_mem); + hKernel->addPtrArg(hArgValue->_mem, argIndex); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/kernel.hpp b/source/adapters/native_cpu/kernel.hpp index 084a0ee695..e2df672d05 100644 --- a/source/adapters/native_cpu/kernel.hpp +++ b/source/adapters/native_cpu/kernel.hpp @@ -11,22 +11,11 @@ #include "common.hpp" #include "nativecpu_state.hpp" #include "program.hpp" -#include +#include #include #include -namespace native_cpu { - -struct NativeCPUArgDesc { - void *MPtr; - - NativeCPUArgDesc(void *Ptr) : MPtr(Ptr){}; -}; - -} // namespace native_cpu - -using nativecpu_kernel_t = void(const native_cpu::NativeCPUArgDesc *, - native_cpu::state *); +using nativecpu_kernel_t = void(void *const *, native_cpu::state *); using nativecpu_ptr_t = nativecpu_kernel_t *; using nativecpu_task_t = std::function; @@ -44,9 +33,9 @@ struct ur_kernel_handle_t_ : RefCounted { : hProgram(hProgram), _name{name}, _subhandler{std::move(subhandler)} {} ur_kernel_handle_t_(const ur_kernel_handle_t_ &other) - : hProgram(other.hProgram), _name(other._name), - _subhandler(other._subhandler), _args(other._args), - _localArgInfo(other._localArgInfo), _localMemPool(other._localMemPool), + : Args(other.Args), hProgram(other.hProgram), _name(other._name), + _subhandler(other._subhandler), _localArgInfo(other._localArgInfo), + _localMemPool(other._localMemPool), _localMemPoolSize(other._localMemPoolSize), ReqdWGSize(other.ReqdWGSize) { incrementReferenceCount(); @@ -55,8 +44,10 @@ struct ur_kernel_handle_t_ : RefCounted { ~ur_kernel_handle_t_() { if (decrementReferenceCount() == 0) { free(_localMemPool); + Args.deallocate(); } } + ur_kernel_handle_t_(ur_program_handle_t hProgram, const char *name, nativecpu_task_t subhandler, std::optional ReqdWGSize, @@ -66,10 +57,67 @@ struct ur_kernel_handle_t_ : RefCounted { ReqdWGSize(ReqdWGSize), MaxWGSize(MaxWGSize), MaxLinearWGSize(MaxLinearWGSize) {} + struct arguments { + using args_index_t = std::vector; + args_index_t Indices; + std::vector ParamSizes; + std::vector OwnsMem; + static constexpr size_t MaxAlign = 16 * sizeof(double); + + /// Add an argument to the kernel. + /// If the argument existed before, it is replaced. + /// Otherwise, it is added. + /// Gaps are filled with empty arguments. + /// Implicit offset argument is kept at the back of the indices collection. + void addArg(size_t Index, size_t Size, const void *Arg) { + if (Index + 1 > Indices.size()) { + Indices.resize(Index + 1); + OwnsMem.resize(Index + 1); + ParamSizes.resize(Index + 1); + + // Update the stored value for the argument + Indices[Index] = native_cpu::aligned_malloc(MaxAlign, Size); + OwnsMem[Index] = true; + ParamSizes[Index] = Size; + } else { + if (ParamSizes[Index] != Size) { + Indices[Index] = realloc(Indices[Index], Size); + ParamSizes[Index] = Size; + } + } + std::memcpy(Indices[Index], Arg, Size); + } + + void addPtrArg(size_t Index, void *Arg) { + if (Index + 1 > Indices.size()) { + Indices.resize(Index + 1); + OwnsMem.resize(Index + 1); + ParamSizes.resize(Index + 1); + + OwnsMem[Index] = false; + ParamSizes[Index] = sizeof(uint8_t *); + } + Indices[Index] = Arg; + } + + // This is called by the destructor of ur_kernel_handle_t_, since + // ur_kernel_handle_t_ implements reference counting and we want + // to deallocate only when the reference count is 0. + void deallocate() { + assert(OwnsMem.size() == Indices.size() && "Size mismatch"); + for (size_t Index = 0; Index < Indices.size(); Index++) { + if (OwnsMem[Index]) + native_cpu::aligned_free(Indices[Index]); + } + } + + const args_index_t &getIndices() const noexcept { return Indices; } + + } Args; + ur_program_handle_t hProgram; std::string _name; nativecpu_task_t _subhandler; - std::vector _args; std::vector _localArgInfo; std::optional getReqdWGSize() const { @@ -99,13 +147,21 @@ struct ur_kernel_handle_t_ : RefCounted { // For each local argument we have size*numthreads size_t offset = 0; for (auto &entry : _localArgInfo) { - _args[entry.argIndex].MPtr = + Args.Indices[entry.argIndex] = _localMemPool + offset + (entry.argSize * threadId); // update offset in the memory pool offset += entry.argSize * numParallelThread; } } + const std::vector &getArgs() const { return Args.getIndices(); } + + void addArg(const void *Ptr, size_t Index, size_t Size) { + Args.addArg(Index, Size, Ptr); + } + + void addPtrArg(void *Ptr, size_t Index) { Args.addPtrArg(Index, Ptr); } + private: char *_localMemPool = nullptr; size_t _localMemPoolSize = 0; diff --git a/source/adapters/native_cpu/queue.cpp b/source/adapters/native_cpu/queue.cpp index 7ee1fdf04c..e2dda24236 100644 --- a/source/adapters/native_cpu/queue.cpp +++ b/source/adapters/native_cpu/queue.cpp @@ -31,11 +31,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { - std::ignore = hContext; - std::ignore = hDevice; - std::ignore = pProperties; + // TODO: UR_QUEUE_FLAG_PROFILING_ENABLE and other props - auto Queue = new ur_queue_handle_t_(hDevice); + auto Queue = new ur_queue_handle_t_(hDevice, hContext, pProperties); *phQueue = Queue; return UR_RESULT_SUCCESS; @@ -78,8 +76,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urQueueFinish(ur_queue_handle_t hQueue) { - std::ignore = hQueue; - // TODO: is this fine as no-op? + hQueue->finish(); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/native_cpu/queue.hpp b/source/adapters/native_cpu/queue.hpp index 8c34af6327..05ff78d822 100644 --- a/source/adapters/native_cpu/queue.hpp +++ b/source/adapters/native_cpu/queue.hpp @@ -9,10 +9,48 @@ //===----------------------------------------------------------------------===// #pragma once #include "common.hpp" -#include "device.hpp" +#include "event.hpp" +#include "ur_api.h" +#include struct ur_queue_handle_t_ : RefCounted { - ur_device_handle_t_ *const device; + ur_queue_handle_t_(ur_device_handle_t device, ur_context_handle_t context, + const ur_queue_properties_t *pProps) + : device(device), context(context), + inOrder(pProps ? !(pProps->flags & + UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) + : true), + profilingEnabled(pProps ? pProps->flags & UR_QUEUE_FLAG_PROFILING_ENABLE + : false) {} - ur_queue_handle_t_(ur_device_handle_t_ *device) : device(device) {} + ur_device_handle_t getDevice() const { return device; } + + ur_context_handle_t getContext() const { return context; } + + void addEvent(ur_event_handle_t event) { events.insert(event); } + + void removeEvent(ur_event_handle_t event) { events.erase(event); } + + void finish() { + while (!events.empty()) { + auto ev = *events.begin(); + // ur_event_handle_t_::wait removes itself from the events set in the + // queue + ev->wait(); + } + events.clear(); + } + + ~ur_queue_handle_t_() { finish(); } + + bool isInOrder() const { return inOrder; } + + bool isProfiling() const { return profilingEnabled; } + +private: + ur_device_handle_t device; + ur_context_handle_t context; + std::set events; + const bool inOrder; + const bool profilingEnabled; }; diff --git a/test/conformance/event/event_adapter_native_cpu.match b/test/conformance/event/event_adapter_native_cpu.match index 95b8228e1f..2467bb4ddf 100644 --- a/test/conformance/event/event_adapter_native_cpu.match +++ b/test/conformance/event/event_adapter_native_cpu.match @@ -12,19 +12,11 @@ urEventGetInfoNegativeTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Na urEventGetInfoNegativeTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_QUEUED urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_SUBMIT -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_START -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_END urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_COMPLETE urEventGetProfilingInfoWithTimingComparisonTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventGetProfilingInfoNegativeTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetProfilingInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventGetProfilingInfoNegativeTest.InvalidValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventWaitTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetNativeHandleTest.InvalidNullPointerNativeEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventSetCallbackTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventSetCallbackTest.ValidateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventSetCallbackTest.AllStates/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/queue/queue_adapter_native_cpu.match b/test/conformance/queue/queue_adapter_native_cpu.match index 32ea573390..5d39450e12 100644 --- a/test/conformance/queue/queue_adapter_native_cpu.match +++ b/test/conformance/queue/queue_adapter_native_cpu.match @@ -23,7 +23,6 @@ urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_C urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_USE_DEFAULT_STREAM urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM -urQueueFinishTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urQueueFlushTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_CONTEXT urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE From 35deee27908bcf05b65aeab9cdace92718d1ed81 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Mon, 28 Oct 2024 10:34:58 +0000 Subject: [PATCH 12/37] Handle nullptr event in enqueue --- source/adapters/native_cpu/enqueue.cpp | 296 +++++++++++++------------ 1 file changed, 149 insertions(+), 147 deletions(-) diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp index 7e03b323cc..7fd4119d40 100644 --- a/source/adapters/native_cpu/enqueue.cpp +++ b/source/adapters/native_cpu/enqueue.cpp @@ -258,26 +258,43 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( return UR_RESULT_SUCCESS; } +ur_result_t withTimingEvent(ur_command_t command_type, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, + const std::function &f) { + urEventWait(numEventsInWaitList, phEventWaitList); + ur_event_handle_t event; + if (phEvent) { + event = new ur_event_handle_t_(hQueue, command_type); + event->tick_start(); + } + + ur_result_t result = f(); + + if (phEvent) { + event->tick_end(); + *phEvent = event; + } + return result; +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - DIE_NO_IMPLEMENTATION; + // TODO: the wait here should be async + return withTimingEvent(UR_COMMAND_EVENTS_WAIT, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, + [&]() { return UR_RESULT_SUCCESS; }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hQueue; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - - DIE_NO_IMPLEMENTATION; + return withTimingEvent(UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, hQueue, + numEventsInWaitList, phEventWaitList, phEvent, + [&]() { return UR_RESULT_SUCCESS; }); } template @@ -289,43 +306,42 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl( typename std::conditional::type DstMem, uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - ur_event_handle_t event; + ur_command_t command_t; if constexpr (IsRead) - event = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_READ_RECT); + command_t = UR_COMMAND_MEM_BUFFER_READ_RECT; else - event = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_WRITE_RECT); - event->tick_start(); - // TODO: blocking, check other constraints, performance optimizations - // More sharing with level_zero where possible - - urEventWait(NumEventsInWaitList, phEventWaitList); - if (BufferRowPitch == 0) - BufferRowPitch = region.width; - if (BufferSlicePitch == 0) - BufferSlicePitch = BufferRowPitch * region.height; - if (HostRowPitch == 0) - HostRowPitch = region.width; - if (HostSlicePitch == 0) - HostSlicePitch = HostRowPitch * region.height; - for (size_t w = 0; w < region.width; w++) - for (size_t h = 0; h < region.height; h++) - for (size_t d = 0; d < region.depth; d++) { - size_t buff_orign = (d + BufferOffset.z) * BufferSlicePitch + - (h + BufferOffset.y) * BufferRowPitch + w + - BufferOffset.x; - size_t host_origin = (d + HostOffset.z) * HostSlicePitch + - (h + HostOffset.y) * HostRowPitch + w + - HostOffset.x; - int8_t &buff_mem = ur_cast(Buff->_mem)[buff_orign]; - if constexpr (IsRead) - ur_cast(DstMem)[host_origin] = buff_mem; - else - buff_mem = ur_cast(DstMem)[host_origin]; - } + command_t = UR_COMMAND_MEM_BUFFER_WRITE_RECT; + return withTimingEvent( + command_t, hQueue, NumEventsInWaitList, phEventWaitList, phEvent, [&]() { + // TODO: blocking, check other constraints, performance optimizations + // More sharing with level_zero where possible + + if (BufferRowPitch == 0) + BufferRowPitch = region.width; + if (BufferSlicePitch == 0) + BufferSlicePitch = BufferRowPitch * region.height; + if (HostRowPitch == 0) + HostRowPitch = region.width; + if (HostSlicePitch == 0) + HostSlicePitch = HostRowPitch * region.height; + for (size_t w = 0; w < region.width; w++) + for (size_t h = 0; h < region.height; h++) + for (size_t d = 0; d < region.depth; d++) { + size_t buff_orign = (d + BufferOffset.z) * BufferSlicePitch + + (h + BufferOffset.y) * BufferRowPitch + w + + BufferOffset.x; + size_t host_origin = (d + HostOffset.z) * HostSlicePitch + + (h + HostOffset.y) * HostRowPitch + w + + HostOffset.x; + int8_t &buff_mem = ur_cast(Buff->_mem)[buff_orign]; + if constexpr (IsRead) + ur_cast(DstMem)[host_origin] = buff_mem; + else + buff_mem = ur_cast(DstMem)[host_origin]; + } - event->tick_end(); - *phEvent = event; - return UR_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; + }); } static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr, @@ -334,15 +350,12 @@ static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, ur_command_t command_type) { - ur_event_handle_t event = new ur_event_handle_t_(hQueue, command_type); - event->tick_start(); - urEventWait(numEventsInWaitList, phEventWaitList); - if (SrcPtr != DstPtr && Size) - memmove(DstPtr, SrcPtr, Size); - event->tick_end(); - if (phEvent) - *phEvent = event; - return UR_RESULT_SUCCESS; + return withTimingEvent(command_type, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, [&]() { + if (SrcPtr != DstPtr && Size) + memmove(DstPtr, SrcPtr, Size); + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( @@ -426,22 +439,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( size_t patternSize, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - // TODO: error checking - // TODO: handle async - void *startingPtr = hBuffer->_mem + offset; - unsigned steps = size / patternSize; - for (unsigned i = 0; i < steps; i++) { - memcpy(static_cast(startingPtr) + i * patternSize, pPattern, - patternSize); - } + return withTimingEvent( + UR_COMMAND_MEM_BUFFER_FILL, hQueue, numEventsInWaitList, phEventWaitList, + phEvent, [&]() { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + + // TODO: error checking + // TODO: handle async + void *startingPtr = hBuffer->_mem + offset; + unsigned steps = size / patternSize; + for (unsigned i = 0; i < steps; i++) { + memcpy(static_cast(startingPtr) + i * patternSize, pPattern, + patternSize); + } - return UR_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( @@ -512,15 +526,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( std::ignore = mapFlags; std::ignore = size; - urEventWait(numEventsInWaitList, phEventWaitList); - ur_event_handle_t event = - new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP); - event->tick_start(); - *ppRetMap = hBuffer->_mem + offset; - event->tick_end(); - *phEvent = event; - - return UR_RESULT_SUCCESS; + return withTimingEvent(UR_COMMAND_MEM_BUFFER_MAP, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, [&]() { + *ppRetMap = hBuffer->_mem + offset; + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( @@ -529,70 +539,65 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_event_handle_t *phEvent) { std::ignore = hMem; std::ignore = pMappedPtr; - urEventWait(numEventsInWaitList, phEventWaitList); - *phEvent = new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_UNMAP); - - return UR_RESULT_SUCCESS; + return withTimingEvent(UR_COMMAND_MEM_UNMAP, hQueue, numEventsInWaitList, + phEventWaitList, phEvent, + [&]() { return UR_RESULT_SUCCESS; }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, void *ptr, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - urEventWait(numEventsInWaitList, phEventWaitList); - ur_event_handle_t event = - new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP); - event->tick_start(); - - UR_ASSERT(ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(patternSize != 0, UR_RESULT_ERROR_INVALID_SIZE) - UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_SIZE) - UR_ASSERT(patternSize < size, UR_RESULT_ERROR_INVALID_SIZE) - UR_ASSERT(size % patternSize == 0, UR_RESULT_ERROR_INVALID_SIZE) - // TODO: add check for allocation size once the query is supported - - switch (patternSize) { - case 1: - memset(ptr, *static_cast(pPattern), size * patternSize); - break; - case 2: { - const auto pattern = *static_cast(pPattern); - auto *start = reinterpret_cast(ptr); - auto *end = - reinterpret_cast(reinterpret_cast(ptr) + size); - std::fill(start, end, pattern); - break; - } - case 4: { - const auto pattern = *static_cast(pPattern); - auto *start = reinterpret_cast(ptr); - auto *end = - reinterpret_cast(reinterpret_cast(ptr) + size); - std::fill(start, end, pattern); - break; - } - case 8: { - const auto pattern = *static_cast(pPattern); - auto *start = reinterpret_cast(ptr); - auto *end = - reinterpret_cast(reinterpret_cast(ptr) + size); - std::fill(start, end, pattern); - break; - } - default: { - for (unsigned int step{0}; step < size; step += patternSize) { - auto *dest = - reinterpret_cast(reinterpret_cast(ptr) + step); - memcpy(dest, pPattern, patternSize); - } - } - } - - event->tick_end(); - *phEvent = event; - - return UR_RESULT_SUCCESS; + return withTimingEvent( + UR_COMMAND_USM_FILL, hQueue, numEventsInWaitList, phEventWaitList, + phEvent, [&]() { + UR_ASSERT(ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(patternSize != 0, UR_RESULT_ERROR_INVALID_SIZE) + UR_ASSERT(size != 0, UR_RESULT_ERROR_INVALID_SIZE) + UR_ASSERT(patternSize < size, UR_RESULT_ERROR_INVALID_SIZE) + UR_ASSERT(size % patternSize == 0, UR_RESULT_ERROR_INVALID_SIZE) + // TODO: add check for allocation size once the query is supported + + switch (patternSize) { + case 1: + memset(ptr, *static_cast(pPattern), + size * patternSize); + break; + case 2: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = reinterpret_cast( + reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + case 4: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = reinterpret_cast( + reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + case 8: { + const auto pattern = *static_cast(pPattern); + auto *start = reinterpret_cast(ptr); + auto *end = reinterpret_cast( + reinterpret_cast(ptr) + size); + std::fill(start, end, pattern); + break; + } + default: { + for (unsigned int step{0}; step < size; step += patternSize) { + auto *dest = reinterpret_cast( + reinterpret_cast(ptr) + step); + memcpy(dest, pPattern, patternSize); + } + } + } + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( @@ -600,20 +605,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { std::ignore = blocking; - urEventWait(numEventsInWaitList, phEventWaitList); - ur_event_handle_t event = - new ur_event_handle_t_(hQueue, UR_COMMAND_MEM_BUFFER_MAP); - event->tick_start(); - - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); + return withTimingEvent( + UR_COMMAND_USM_MEMCPY, hQueue, numEventsInWaitList, phEventWaitList, + phEvent, [&]() { + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - memcpy(pDst, pSrc, size); - event->tick_end(); - *phEvent = event; + memcpy(pDst, pSrc, size); - return UR_RESULT_SUCCESS; + return UR_RESULT_SUCCESS; + }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( From b1222f08943e136d2f571011e3e80958dedc6806 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Wed, 6 Nov 2024 10:36:12 +0000 Subject: [PATCH 13/37] Update match files --- .../event/event_adapter_native_cpu.match | 13 ----- .../usm/usm_adapter_native_cpu.match | 50 ------------------- 2 files changed, 63 deletions(-) diff --git a/test/conformance/event/event_adapter_native_cpu.match b/test/conformance/event/event_adapter_native_cpu.match index 2467bb4ddf..03e653a2eb 100644 --- a/test/conformance/event/event_adapter_native_cpu.match +++ b/test/conformance/event/event_adapter_native_cpu.match @@ -1,15 +1,4 @@ # Note: This file is only for use with cts_exe.py -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_QUEUE -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_CONTEXT -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_TYPE -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_COMMAND_EXECUTION_STATUS -urEventGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_EVENT_INFO_REFERENCE_COUNT -urEventGetInfoNegativeTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidSizePropSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetInfoNegativeTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_QUEUED urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_SUBMIT urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_COMPLETE @@ -21,8 +10,6 @@ urEventSetCallbackTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventSetCallbackTest.ValidateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventSetCallbackTest.AllStates/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urEventSetCallbackTest.EventAlreadyCompleted/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackNegativeTest.InvalidNullPointerCallback/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} # These crash when ran through the loader {{OPT}}urEventRetainTest.InvalidNullHandle/* diff --git a/test/conformance/usm/usm_adapter_native_cpu.match b/test/conformance/usm/usm_adapter_native_cpu.match index 08a9c18cae..a0de04a27b 100644 --- a/test/conformance/usm/usm_adapter_native_cpu.match +++ b/test/conformance/usm/usm_adapter_native_cpu.match @@ -1,22 +1,6 @@ {{NONDETERMINISTIC}} urUSMDeviceAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMDeviceAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled urUSMDeviceAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048 urUSMFreeTest.SuccessDeviceAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urUSMFreeTest.SuccessHostAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urUSMFreeTest.SuccessSharedAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} @@ -30,39 +14,5 @@ urUSMGetMemAllocInfoNegativeTest.InvalidNullPointerMem/SYCL_NATIVE_CPU___SYCL_Na urUSMGetMemAllocInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urUSMGetMemAllocInfoNegativeTest.InvalidValuePropSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} urUSMHostAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMHostAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled urUSMHostAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048 -urUSMSharedAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocTest.SuccessWithDescriptors/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled urUSMSharedAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_4_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_8_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_16_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_32_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled_64_2048 From 6e3cd3ee78d60c0c4a6b6db2364594fd95d9e535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Mestre?= Date: Wed, 6 Nov 2024 14:17:37 +0000 Subject: [PATCH 14/37] Fix update validation for L0 and OpenCL --- source/adapters/level_zero/command_buffer.cpp | 2 +- source/adapters/opencl/command_buffer.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 206787a68e..8f094ee373 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1372,7 +1372,7 @@ ur_result_t validateCommandDesc( logger::debug("Mutable features supported by device {}", SupportedFeatures); // Kernel handle updates are not yet supported. - if (CommandDesc->hNewKernel != Command->Kernel) { + if (CommandDesc->hNewKernel && CommandDesc->hNewKernel != Command->Kernel) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 15029d5e27..a161a5b32b 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -547,7 +547,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( *pUpdateKernelLaunch) { // Kernel handle updates are not yet supported. - if (pUpdateKernelLaunch->hNewKernel != hCommand->Kernel) { + if (pUpdateKernelLaunch->hNewKernel && + pUpdateKernelLaunch->hNewKernel != hCommand->Kernel) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From dc5d208e801059f31706ef91c52f74abc0feb402 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Wed, 6 Nov 2024 15:25:56 +0000 Subject: [PATCH 15/37] [NFC] Update documentation to detail match files --- scripts/core/CONTRIB.rst | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index 80aa32d6aa..df46652617 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -367,6 +367,40 @@ the following command from the build directory. ctest -L "conformance" +Conformance Match Files +----------------------- + +At the moment, not all tests currently pass with all adapters. Some tests are +selectively marked as failing on certain adapters using a .match file located +at ``test/conformance//_adapter_.match``. If +that file exists, then it must contain a list of test specifiers which +specify tests that fail for the given adapter. + +when run through ``ctest``, each failing test will be ran in a separate +invocation (to capture any crashes) to verify that they are still failing. All +tests not matched by the filters will also be ran in a single invocation which +must succeed. + +This behaviour can be disabled by setting the environment variable +``GTEST_OUTPUT``. If this is set, the test runner assumes it is being ran to +collect testing statistics, and just runs the test suite with no filters. + +The format of the match files are as follows: + +* Each line consists of the name of a test as understood by gtest. This is the + name printed next to ``[ RUN ]`` in the test log. +* ``*`` is a wildcard that matches any number of characters in a test name. ``?`` + matches a single character. +* Empty lines or lines beginning with ``#`` are ignored. +* A line beginning with ``{{OPT}}`` is a optional test; see below. +* For compatibility with an older version of the matching logic, ``{{.*}}`` is + interpreted as ``*`` and ``{{NONDETERMINISTIC}}`` is ignored. + +Normally tests in the match file must fail (either by crashing or having a test +failure) for the given adapter. However this can be disabled by prepending +``{{OPT}}`` to the match line. This can be used if the test is flaky or +depends on a particular environment. + Experimental Features ===================== From 8e032a7f10acb0c07fb17338e96f3fcd72ef781c Mon Sep 17 00:00:00 2001 From: Fabio Mestre Date: Wed, 6 Nov 2024 15:27:00 +0000 Subject: [PATCH 16/37] Add regression conformance test for nullptr hNewKernel --- ...xp_command_buffer_adapter_native_cpu.match | 1 + .../update/usm_saxpy_kernel_update.cpp | 53 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index e6b8320def..4fe4050b5c 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -21,6 +21,7 @@ {{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}USMMultiSaxpyKernelTest.UpdateNullptrKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}NDRangeUpdateTest.Update1D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp index 1dc34c00fd..ddf8730eb7 100644 --- a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp @@ -284,6 +284,59 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { Validate(new_output, new_X, new_Y, new_A, global_size); } +// Checks that passing nullptr to hNewKernel even when kernel binary updates +// is not supported by the adapter. +TEST_P(USMMultiSaxpyKernelTest, UpdateNullptrKernel) { + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + uint32_t *output = (uint32_t *)shared_ptrs[0]; + uint32_t *X = (uint32_t *)shared_ptrs[1]; + uint32_t *Y = (uint32_t *)shared_ptrs[2]; + Validate(output, X, Y, A, global_size); + + // New A at index 1 + uint32_t new_A = 33; + ur_exp_command_buffer_update_value_arg_desc_t new_A_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(new_A), // argSize + nullptr, // pProperties + &new_A, // hArgValue + }; + + // Update kernel inputs + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + nullptr, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 1, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + &new_A_desc, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + for (auto &handle : command_handles) { + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(handle, &update_desc)); + } + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + // Verify that update occurred correctly + uint32_t *new_output = (uint32_t *)shared_ptrs[0]; + Validate(new_output, X, Y, new_A, global_size); +} + TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { // Prepare new inputs ur_exp_command_buffer_update_pointer_arg_desc_t new_input_descs[2]; From 4d454fa94c5eb3d54018d9a63e11c981ef6f732f Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Wed, 6 Nov 2024 21:21:53 -0800 Subject: [PATCH 17/37] [DeviceASAN] Disable memory leak detection when asan exit with errors --- .../layers/sanitizer/asan_interceptor.cpp | 22 +++++++++++-------- .../layers/sanitizer/asan_interceptor.hpp | 9 ++++++++ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index 4a315588fd..129731193f 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -39,9 +39,11 @@ SanitizerInterceptor::~SanitizerInterceptor() { m_Quarantine = nullptr; m_MemBufferMap.clear(); - m_AllocationMap.clear(); m_KernelMap.clear(); m_ContextMap.clear(); + // AllocationMap need to be cleared after ContextMap because memory leak + // detection depends on it. + m_AllocationMap.clear(); for (auto Adapter : m_Adapters) { getContext()->urDdiTable.Global.pfnAdapterRelease(Adapter); @@ -290,7 +292,7 @@ ur_result_t SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, ReportFatalError(AH); } if (!AH.IsRecover) { - exit(1); + exitWithErrors(); } } } @@ -616,7 +618,7 @@ ur_result_t SanitizerInterceptor::prepareLaunch( ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) { ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, ValidateResult, PtrPair.second); - exit(1); + exitWithErrors(); } } } @@ -838,12 +840,14 @@ ContextInfo::~ContextInfo() { assert(Result == UR_RESULT_SUCCESS); // check memory leaks - std::vector AllocInfos = - getContext()->interceptor->findAllocInfoByContext(Handle); - for (const auto &It : AllocInfos) { - const auto &[_, AI] = *It; - if (!AI->IsReleased) { - ReportMemoryLeak(AI); + if (getContext()->interceptor->isNormalExit()) { + std::vector AllocInfos = + getContext()->interceptor->findAllocInfoByContext(Handle); + for (const auto &It : AllocInfos) { + const auto &[_, AI] = *It; + if (!AI->IsReleased) { + ReportMemoryLeak(AI); + } } } } diff --git a/source/loader/layers/sanitizer/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan_interceptor.hpp index e5429acd56..c1bf710425 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan_interceptor.hpp @@ -271,6 +271,13 @@ class SanitizerInterceptor { const AsanOptions &getOptions() { return m_Options; } + void exitWithErrors() { + m_NormalExit = false; + exit(1); + } + + bool isNormalExit() { return m_NormalExit; } + private: ur_result_t updateShadowMemory(std::shared_ptr &ContextInfo, std::shared_ptr &DeviceInfo, @@ -320,6 +327,8 @@ class SanitizerInterceptor { std::unordered_set m_Adapters; ur_shared_mutex m_AdaptersMutex; + + bool m_NormalExit = true; }; } // namespace ur_sanitizer_layer From 17d4b720ac59c064892fb7d2f0e3126745739c86 Mon Sep 17 00:00:00 2001 From: pbalcer Date: Thu, 7 Nov 2024 10:49:49 +0100 Subject: [PATCH 18/37] use a relative benchmark cache path The absolute path may differ between runners, resulting in the cached elements not being found. --- .github/workflows/benchmarks-reusable.yml | 2 +- .github/workflows/docs.yml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml index 94b500d08a..bc59d09cdf 100644 --- a/.github/workflows/benchmarks-reusable.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -199,5 +199,5 @@ jobs: if: ${{ always() && inputs.upload_report }} uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: - path: ${{ github.workspace }}/ur-repo/benchmark_results.html + path: ur-repo/benchmark_results.html key: benchmark-results-${{ matrix.adapter.str_name }}-${{ github.run_id }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c1d73bfeef..b4c40334d4 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -54,9 +54,8 @@ jobs: id: download-bench-html uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 with: - path: ${{ github.workspace }}/ur-repo/benchmark_results.html + path: ur-repo/benchmark_results.html key: benchmark-results- - restore-keys: benchmark-results- - name: Move benchmark HTML # exact or partial cache hit From 17a14f6547e0e6c354aac04177b0c88b3230131b Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Mon, 4 Nov 2024 20:39:28 +0000 Subject: [PATCH 19/37] Support null kernel arg update --- ...xp_command_buffer_adapter_native_cpu.match | 1 + .../update/usm_fill_kernel_update.cpp | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index e6b8320def..ea1719571d 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -15,6 +15,7 @@ {{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}USMFillCommandTest.UpdateNull/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} {{OPT}}BufferSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index 85e6beccf9..bbadc2b57b 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -199,6 +199,46 @@ TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { Validate((uint32_t *)new_shared_ptr, global_size, new_val); } +// Test using a different global size to fill and larger USM output buffer +TEST_P(USMFillCommandTest, UpdateNull) { + // Run command-buffer prior to update an verify output + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + Validate((uint32_t *)shared_ptr, global_size, val); + + // Set nullptr as kernel output at index 0 + void *null_ptr = nullptr; + ur_exp_command_buffer_update_pointer_arg_desc_t new_output_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 0, // argIndex + nullptr, // pProperties + &null_ptr, // pArgValue + }; + + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 1, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions, // newWorkDim + nullptr, // pNewMemObjArgList + &new_output_desc, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + nullptr, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Verify update kernel succeeded but don't run to avoid dereferencing + // the nullptr. + ASSERT_SUCCESS( + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc)); +} + // Test updating a command-buffer with multiple USM fill kernel commands struct USMMultipleFillCommandTest : uur::command_buffer::urUpdatableCommandBufferExpExecutionTest { From 8e5634775bcf6e8bb9067a1aa5564605fb5bd043 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Tue, 17 Sep 2024 17:39:08 +0100 Subject: [PATCH 20/37] Raise MSVC warning level from /W3 to /W4 This patch increases the warning level when using the MSVC compiler from `/W3` to `/W4` and fixes the issues found. Four warnings introduced by `/W4` are disabled, all related to variable name shadowing, as they overly prescriptive to valid code. --- cmake/helpers.cmake | 15 ++++-- examples/collector/collector.cpp | 7 +++ include/ur_api.h | 2 +- scripts/core/common.yml | 2 +- source/adapters/cuda/CMakeLists.txt | 7 +-- source/adapters/cuda/command_buffer.cpp | 26 +++++----- source/adapters/cuda/device.cpp | 4 +- source/adapters/cuda/enqueue.cpp | 50 +++++++++++-------- source/adapters/cuda/image.cpp | 5 +- source/adapters/cuda/kernel.cpp | 4 +- source/adapters/cuda/kernel.hpp | 8 +-- source/adapters/cuda/program.cpp | 4 +- source/adapters/cuda/usm.cpp | 2 +- source/adapters/level_zero/CMakeLists.txt | 18 ++++--- source/adapters/level_zero/adapter.cpp | 2 - source/adapters/level_zero/context.cpp | 2 +- source/adapters/level_zero/device.cpp | 16 +++--- source/adapters/level_zero/event.cpp | 2 - source/adapters/level_zero/platform.cpp | 2 - source/adapters/level_zero/program.hpp | 3 +- source/adapters/level_zero/queue.cpp | 2 +- source/adapters/level_zero/usm.cpp | 1 - source/adapters/level_zero/v2/event.cpp | 2 - source/adapters/level_zero/v2/kernel.cpp | 2 - source/adapters/level_zero/v2/memory.cpp | 2 - .../v2/queue_immediate_in_order.cpp | 2 - source/adapters/level_zero/v2/usm.cpp | 1 - source/adapters/level_zero/virtual_mem.cpp | 3 -- source/adapters/opencl/adapter.cpp | 2 - source/adapters/opencl/event.cpp | 6 +-- source/adapters/opencl/memory.cpp | 9 ++-- source/adapters/opencl/program.cpp | 15 +++--- source/adapters/opencl/queue.cpp | 4 +- source/adapters/opencl/usm.cpp | 8 +-- source/adapters/opencl/usm_p2p.cpp | 11 ++-- source/common/logger/ur_logger.hpp | 5 +- source/common/ur_util.cpp | 2 +- source/common/ur_util.hpp | 5 +- .../layers/tracing/ur_tracing_layer.cpp | 8 +++ source/loader/ur_adapter_registry.hpp | 13 ++--- source/loader/ur_lib.cpp | 13 ++--- source/loader/windows/adapter_search.cpp | 3 +- test/CMakeLists.txt | 3 +- test/conformance/device/urDevicePartition.cpp | 2 +- .../urEnqueueDeviceGlobalVariableRead.cpp | 2 +- .../urEnqueueEventsWaitMultiDevice.cpp | 11 ++-- .../enqueue/urEnqueueKernelLaunch.cpp | 16 +++--- .../urEnqueueKernelLaunchAndMemcpyInOrder.cpp | 4 +- .../exp_command_buffer/commands.cpp | 6 +-- .../conformance/exp_command_buffer/fixtures.h | 2 + .../update/buffer_fill_kernel_update.cpp | 4 +- .../update/buffer_saxpy_kernel_update.cpp | 2 +- .../update/kernel_handle_update.cpp | 18 ++++--- .../update/usm_fill_kernel_update.cpp | 10 ++-- .../launch_properties.cpp | 2 +- test/conformance/integration/QueueBuffer.cpp | 23 ++++----- test/conformance/integration/fixtures.h | 3 +- .../urKernelGetSuggestedLocalWorkSize.cpp | 2 +- .../kernel/urKernelSetArgSampler.cpp | 2 +- .../urMemBufferMigrateAcrossDevices.cpp | 2 +- .../urMultiDeviceProgramCreateWithBinary.cpp | 22 ++++---- test/conformance/testing/include/uur/utils.h | 2 - test/conformance/usm/urUSMDeviceAlloc.cpp | 7 +-- test/conformance/usm/urUSMHostAlloc.cpp | 4 +- test/conformance/usm/urUSMSharedAlloc.cpp | 8 +-- test/layers/tracing/test_collector.cpp | 7 +++ tools/urinfo/utils.hpp | 4 +- tools/urtrace/collector.cpp | 8 ++- 68 files changed, 262 insertions(+), 214 deletions(-) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index a6e3a344a4..e45020bdea 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -99,18 +99,25 @@ function(add_ur_target_compile_options name) elseif(MSVC) target_compile_options(${name} PRIVATE $<$:/MP> # clang-cl.exe does not support /MP - /W3 + /W4 + /wd4456 # Disable: declaration of 'identifier' hides previous local declaration + /wd4457 # Disable: declaration of 'identifier' hides function parameter + /wd4458 # Disable: declaration of 'identifier' hides class member + /wd4459 # Disable: declaration of 'identifier' hides global declaration /MD$<$:d> /GS /DWIN32_LEAN_AND_MEAN /DNOMINMAX ) - if(UR_DEVELOPER_MODE) + target_compile_definitions(${name} PRIVATE # _CRT_SECURE_NO_WARNINGS used mainly because of getenv - # C4267: The compiler detected a conversion from size_t to a smaller type. + _CRT_SECURE_NO_WARNINGS + ) + + if(UR_DEVELOPER_MODE) target_compile_options(${name} PRIVATE - /WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267 + /WX /GS ) endif() endif() diff --git a/examples/collector/collector.cpp b/examples/collector/collector.cpp index 6312dba549..ddcd32e775 100644 --- a/examples/collector/collector.cpp +++ b/examples/collector/collector.cpp @@ -25,7 +25,14 @@ #include #include "ur_api.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4245) +#endif #include "xpti/xpti_trace_framework.h" +#ifdef _MSC_VER +#pragma warning(default : 4245) +#endif constexpr uint16_t TRACE_FN_BEGIN = static_cast(xpti::trace_point_type_t::function_with_args_begin); diff --git a/include/ur_api.h b/include/ur_api.h index 60d6fc2f70..59747a72ea 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -424,7 +424,7 @@ typedef struct ur_physical_mem_handle_t_ *ur_physical_mem_handle_t; /////////////////////////////////////////////////////////////////////////////// #ifndef UR_BIT /// @brief Generic macro for enumerator bit masks -#define UR_BIT(_i) (1 << _i) +#define UR_BIT(_i) (1U << _i) #endif // UR_BIT /////////////////////////////////////////////////////////////////////////////// diff --git a/scripts/core/common.yml b/scripts/core/common.yml index 73501ac39d..d1f5b769fa 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -134,7 +134,7 @@ name: "$x_physical_mem_handle_t" type: macro desc: "Generic macro for enumerator bit masks" name: "$X_BIT( _i )" -value: "( 1 << _i )" +value: "( 1U << _i )" --- #-------------------------------------------------------------------------- type: enum desc: "Defines Return/Error codes" diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index b6b153a5d8..a73b7ee886 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -97,15 +97,16 @@ if (UR_ENABLE_TRACING) get_target_property(XPTI_SRC_DIR xpti SOURCE_DIR) set(XPTI_PROXY_SRC "${XPTI_SRC_DIR}/xpti_proxy.cpp") endif() - target_compile_definitions(${TARGET_NAME} PRIVATE + add_library(cuda-xpti-proxy STATIC ${XPTI_PROXY_SRC}) + target_compile_definitions(cuda-xpti-proxy PRIVATE XPTI_ENABLE_INSTRUMENTATION XPTI_STATIC_LIBRARY ) - target_include_directories(${TARGET_NAME} PRIVATE + target_include_directories(cuda-xpti-proxy PRIVATE ${XPTI_INCLUDES} ${CUDA_CUPTI_INCLUDE_DIR} ) - target_sources(${TARGET_NAME} PRIVATE ${XPTI_PROXY_SRC}) + target_link_libraries(${TARGET_NAME} PRIVATE cuda-xpti-proxy) endif() if (CUDA_cupti_LIBRARY) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 527c339783..65253da739 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -242,7 +242,7 @@ static ur_result_t enqueueCommandBufferFillHelper( if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) { CUDA_MEMSET_NODE_PARAMS NodeParams = {}; NodeParams.dst = DstPtr; - NodeParams.elementSize = PatternSize; + NodeParams.elementSize = static_cast(PatternSize); NodeParams.height = N; NodeParams.pitch = PatternSize; NodeParams.width = 1; @@ -508,12 +508,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( auto &ArgIndices = hKernel->getArgIndices(); CUDA_KERNEL_NODE_PARAMS NodeParams = {}; NodeParams.func = CuFunc; - NodeParams.gridDimX = BlocksPerGrid[0]; - NodeParams.gridDimY = BlocksPerGrid[1]; - NodeParams.gridDimZ = BlocksPerGrid[2]; - NodeParams.blockDimX = ThreadsPerBlock[0]; - NodeParams.blockDimY = ThreadsPerBlock[1]; - NodeParams.blockDimZ = ThreadsPerBlock[2]; + NodeParams.gridDimX = static_cast(BlocksPerGrid[0]); + NodeParams.gridDimY = static_cast(BlocksPerGrid[1]); + NodeParams.gridDimZ = static_cast(BlocksPerGrid[2]); + NodeParams.blockDimX = static_cast(ThreadsPerBlock[0]); + NodeParams.blockDimY = static_cast(ThreadsPerBlock[1]); + NodeParams.blockDimZ = static_cast(ThreadsPerBlock[2]); NodeParams.sharedMemBytes = LocalSize; NodeParams.kernelParams = const_cast(ArgIndices.data()); @@ -1397,12 +1397,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params; Params.func = CuFunc; - Params.gridDimX = BlocksPerGrid[0]; - Params.gridDimY = BlocksPerGrid[1]; - Params.gridDimZ = BlocksPerGrid[2]; - Params.blockDimX = ThreadsPerBlock[0]; - Params.blockDimY = ThreadsPerBlock[1]; - Params.blockDimZ = ThreadsPerBlock[2]; + Params.gridDimX = static_cast(BlocksPerGrid[0]); + Params.gridDimY = static_cast(BlocksPerGrid[1]); + Params.gridDimZ = static_cast(BlocksPerGrid[2]); + Params.blockDimX = static_cast(ThreadsPerBlock[0]); + Params.blockDimY = static_cast(ThreadsPerBlock[1]); + Params.blockDimZ = static_cast(ThreadsPerBlock[2]); Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize(); Params.kernelParams = const_cast(KernelCommandHandle->Kernel->getArgIndices().data()); diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index be5867628d..b1e34586dc 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1152,7 +1152,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, try { if (pNumDevices) { - *pNumDevices = NumDevices; + *pNumDevices = static_cast(NumDevices); } if (ReturnDevices && phDevices) { @@ -1235,7 +1235,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { - CUevent Event; + CUevent Event{}; ScopedContext Active(hDevice); if (pDeviceTimestamp) { diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 0e00f680f6..15f812403f 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock, int MinGrid, MaxBlockSize; UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize( &MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(), - MaxBlockDim[0])); + static_cast(MaxBlockDim[0]))); roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized, MaxBlockDim, MaxBlockSize); @@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context, MaxWorkGroupSize = Device->getMaxWorkGroupSize(); if (ProvidedLocalWorkGroupSize) { - auto IsValid = [&](int Dim) { + auto IsValid = [&](size_t Dim) { if (ReqdThreadsPerBlock[Dim] != 0 && LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim]) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; @@ -217,7 +217,8 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context, LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim]) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; - if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim)) + if (LocalWorkSize[Dim] > + Device->getMaxWorkItemSizes(static_cast(Dim))) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; // Checks that local work sizes are a divisor of the global work sizes // which includes that the local work sizes are neither larger than @@ -481,9 +482,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( auto &ArgIndices = hKernel->getArgIndices(); UR_CHECK_ERROR(cuLaunchKernel( - CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2], - ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize, - CuStream, const_cast(ArgIndices.data()), nullptr)); + CuFunc, static_cast(BlocksPerGrid[0]), + static_cast(BlocksPerGrid[1]), + static_cast(BlocksPerGrid[2]), + static_cast(ThreadsPerBlock[0]), + static_cast(ThreadsPerBlock[1]), + static_cast(ThreadsPerBlock[2]), LocalSize, CuStream, + const_cast(ArgIndices.data()), nullptr)); if (LocalSize != 0) hKernel->clearLocalSize(); @@ -649,12 +654,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( auto &ArgIndices = hKernel->getArgIndices(); CUlaunchConfig launch_config; - launch_config.gridDimX = BlocksPerGrid[0]; - launch_config.gridDimY = BlocksPerGrid[1]; - launch_config.gridDimZ = BlocksPerGrid[2]; - launch_config.blockDimX = ThreadsPerBlock[0]; - launch_config.blockDimY = ThreadsPerBlock[1]; - launch_config.blockDimZ = ThreadsPerBlock[2]; + launch_config.gridDimX = static_cast(BlocksPerGrid[0]); + launch_config.gridDimY = static_cast(BlocksPerGrid[1]); + launch_config.gridDimZ = static_cast(BlocksPerGrid[2]); + launch_config.blockDimX = static_cast(ThreadsPerBlock[0]); + launch_config.blockDimY = static_cast(ThreadsPerBlock[1]); + launch_config.blockDimZ = static_cast(ThreadsPerBlock[2]); launch_config.sharedMemBytes = LocalSize; launch_config.hStream = CuStream; @@ -979,8 +984,9 @@ ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize, auto OffsetPtr = Ptr + (step * sizeof(uint8_t)); // set all of the pattern chunks - UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, Value, sizeof(uint8_t), - Height, Stream)); + UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, + static_cast(Value), + sizeof(uint8_t), Height, Stream)); } return UR_RESULT_SUCCESS; } @@ -1031,8 +1037,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( break; } default: { - UR_CHECK_ERROR(commonMemSetLargePattern(Stream, patternSize, size, - pPattern, DstDevice)); + UR_CHECK_ERROR( + commonMemSetLargePattern(Stream, static_cast(patternSize), + size, pPattern, DstDevice)); break; } } @@ -1064,7 +1071,6 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) { return 4; default: detail::ur::die("Invalid image format."); - return 0; } } @@ -1168,7 +1174,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( CUDA_ARRAY_DESCRIPTOR ArrayDesc; UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array)); - int ElementByteSize = imageElementByteSize(ArrayDesc); + int ElementByteSize = static_cast(imageElementByteSize(ArrayDesc)); size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels; size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width; @@ -1241,7 +1247,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( CUDA_ARRAY_DESCRIPTOR ArrayDesc; UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array)); - int ElementByteSize = imageElementByteSize(ArrayDesc); + int ElementByteSize = static_cast(imageElementByteSize(ArrayDesc)); size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels; size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width; @@ -1320,7 +1326,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels, UR_RESULT_ERROR_INVALID_MEM_OBJECT); - int ElementByteSize = imageElementByteSize(SrcArrayDesc); + int ElementByteSize = static_cast(imageElementByteSize(SrcArrayDesc)); size_t DstByteOffsetX = dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels; @@ -1505,8 +1511,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( CuStream)); break; default: - commonMemSetLargePattern(CuStream, patternSize, size, pPattern, - (CUdeviceptr)ptr); + commonMemSetLargePattern(CuStream, static_cast(patternSize), + size, pPattern, (CUdeviceptr)ptr); break; } if (phEvent) { diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 4840553cc1..40fd18fef7 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -284,8 +284,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( ur_result_t Result = UR_RESULT_SUCCESS; try { ScopedContext Active(hDevice); - UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, - widthInBytes, height, elementSizeBytes)); + UR_CHECK_ERROR( + cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, widthInBytes, + height, static_cast(elementSizeBytes))); } catch (ur_result_t error) { Result = error; } catch (...) { diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp index 5fb097c304..91daf5649c 100644 --- a/source/adapters/cuda/kernel.cpp +++ b/source/adapters/cuda/kernel.cpp @@ -203,8 +203,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( int MaxNumActiveGroupsPerCU{0}; UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor( - &MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize, - dynamicSharedMemorySize)); + &MaxNumActiveGroupsPerCU, hKernel->get(), + static_cast(localWorkSize), dynamicSharedMemorySize)); detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0); // Handle the case where we can't have all SMs active with at least 1 group // per SM. In that case, the device is still able to run 1 work-group, hence diff --git a/source/adapters/cuda/kernel.hpp b/source/adapters/cuda/kernel.hpp index 7ad20a4f0e..77d8c817f4 100644 --- a/source/adapters/cuda/kernel.hpp +++ b/source/adapters/cuda/kernel.hpp @@ -97,8 +97,8 @@ struct ur_kernel_handle_t_ { } ParamSizes[Index] = Size; // calculate the insertion point on the array - size_t InsertPos = std::accumulate(std::begin(ParamSizes), - std::begin(ParamSizes) + Index, 0); + size_t InsertPos = std::accumulate( + std::begin(ParamSizes), std::begin(ParamSizes) + Index, size_t{0}); // Update the stored value for the argument std::memcpy(&Storage[InsertPos], Arg, Size); Indices[Index] = &Storage[InsertPos]; @@ -152,8 +152,8 @@ struct ur_kernel_handle_t_ { const args_index_t &getIndices() const noexcept { return Indices; } uint32_t getLocalSize() const { - return std::accumulate(std::begin(OffsetPerIndex), - std::end(OffsetPerIndex), 0); + return static_cast(std::accumulate( + std::begin(OffsetPerIndex), std::end(OffsetPerIndex), size_t{0})); } } Args; diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 4b963a737a..8a29df8de6 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -148,8 +148,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) { } UR_CHECK_ERROR(cuModuleLoadDataEx(&Module, static_cast(Binary), - Options.size(), Options.data(), - OptionVals.data())); + static_cast(Options.size()), + Options.data(), OptionVals.data())); BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS; diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 8a6ac41b08..8915736b3e 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -325,7 +325,7 @@ umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx, enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align, void **Ptr) { - auto Res = allocateImpl(Ptr, Size, Align); + auto Res = allocateImpl(Ptr, Size, static_cast(Align)); if (Res != UR_RESULT_SUCCESS) { getLastStatusRef() = Res; return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index d700fbb2c3..86424aaf41 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -89,8 +89,9 @@ if(UR_BUILD_ADAPTER_L0) endif() # TODO: fix level_zero adapter conversion warnings + # C4267: The compiler detected a conversion from size_t to a smaller type. target_compile_options(ur_adapter_level_zero PRIVATE - $<$:/wd4805 /wd4244> + $<$:/wd4805 /wd4244 /wd4267> ) set_target_properties(ur_adapter_level_zero PROPERTIES @@ -98,9 +99,9 @@ if(UR_BUILD_ADAPTER_L0) SOVERSION "${PROJECT_VERSION_MAJOR}" ) - if (WIN32) - # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800) + if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC) + # 0x800: Search for the DLL only in the System32 folder + target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero PRIVATE @@ -183,8 +184,9 @@ if(UR_BUILD_ADAPTER_L0_V2) target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2) # TODO: fix level_zero adapter conversion warnings + # C4267: The compiler detected a conversion from size_t to a smaller type. target_compile_options(ur_adapter_level_zero_v2 PRIVATE - $<$:/wd4805 /wd4244> + $<$:/wd4805 /wd4244 /wd4100 /wd4267> ) set_target_properties(ur_adapter_level_zero_v2 PROPERTIES @@ -192,9 +194,9 @@ if(UR_BUILD_ADAPTER_L0_V2) SOVERSION "${PROJECT_VERSION_MAJOR}" ) - if (WIN32) - # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800) + if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC) + # 0x800: Search for the DLL only in the System32 folder + target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero_v2 PRIVATE diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 9dd2a31268..7d3d571c68 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -514,7 +514,5 @@ ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName, default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - - return UR_RESULT_SUCCESS; } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 41c7593237..d18aeb684f 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -756,7 +756,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( // queue's map to hold the fence and other associated command // list information. auto &QGroup = Queue->getQueueGroup(UseCopyEngine); - uint32_t QueueGroupOrdinal; + uint32_t QueueGroupOrdinal = 0; auto &ZeCommandQueue = ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal); diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 94dad86070..cfa97e7179 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -155,7 +155,7 @@ ur_result_t urDeviceGet( } } - uint32_t ZeDeviceCount = MatchedDevices.size(); + uint32_t ZeDeviceCount = static_cast(MatchedDevices.size()); auto N = (std::min)(ZeDeviceCount, NumEntries); if (Devices) @@ -318,9 +318,10 @@ ur_result_t urDeviceGetInfo( Device->QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute] .ZeIndex >= 0; if (RepresentsCSlice) - MaxComputeUnits /= Device->RootDevice->SubDevices.size(); + MaxComputeUnits /= + static_cast(Device->RootDevice->SubDevices.size()); - return ReturnValue(uint32_t{MaxComputeUnits}); + return ReturnValue(MaxComputeUnits); } case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: // Level Zero spec defines only three dimensions @@ -422,7 +423,8 @@ ur_result_t urDeviceGetInfo( return Res; } - uint32_t ZeSubDeviceCount = Device->SubDevices.size(); + uint32_t ZeSubDeviceCount = + static_cast(Device->SubDevices.size()); if (pSize && ZeSubDeviceCount < 2) { *pSize = 0; return UR_RESULT_SUCCESS; @@ -1157,8 +1159,6 @@ ur_result_t urDeviceGetInfo( logger::toHex(ParamName)); return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - - return UR_RESULT_SUCCESS; } bool CopyEngineRequested(const ur_device_handle_t &Device) { @@ -1206,7 +1206,7 @@ ur_result_t urDevicePartition( return Res; } - auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { + auto EffectiveNumDevices = [&]() -> uint32_t { if (Device->SubDevices.size() == 0) return 0; @@ -1229,7 +1229,7 @@ ur_result_t urDevicePartition( } } - return Device->SubDevices.size(); + return static_cast(Device->SubDevices.size()); }(); // TODO: Consider support for partitioning to <= total sub-devices. diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 408580dd80..f4fbd1db39 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -498,8 +498,6 @@ ur_result_t urEventGetInfo( PropName, logger::toHex(PropName)); return UR_RESULT_ERROR_INVALID_VALUE; } - - return UR_RESULT_SUCCESS; } ur_result_t urEventGetProfilingInfo( diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 721db3c359..506b0ee35b 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -99,8 +99,6 @@ ur_result_t urPlatformGetInfo( logger::debug("urPlatformGetInfo: unrecognized ParamName"); return UR_RESULT_ERROR_INVALID_VALUE; } - - return UR_RESULT_SUCCESS; } ur_result_t urPlatformGetApiVersion( diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index 4fe8c24acd..e4818e37bf 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -46,7 +46,8 @@ struct ur_program_handle_t_ : _ur_object { class SpecConstantShim { public: SpecConstantShim(ur_program_handle_t_ *Program) { - ZeSpecConstants.numConstants = Program->SpecConstants.size(); + ZeSpecConstants.numConstants = + static_cast(Program->SpecConstants.size()); ZeSpecContantsIds.reserve(ZeSpecConstants.numConstants); ZeSpecContantsValues.reserve(ZeSpecConstants.numConstants); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index c4598f3472..7bce4fc687 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -2272,7 +2272,7 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeStruct ZeFenceDesc; ze_command_list_handle_t ZeCommandList; - uint32_t QueueGroupOrdinal; + uint32_t QueueGroupOrdinal = 0; auto &QGroup = getQueueGroup(UseCopyEngine); auto &ZeCommandQueue = ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal); diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 28bdf233e8..bf592e0db6 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -689,7 +689,6 @@ ur_result_t urUSMGetMemAllocInfo( logger::error("urUSMGetMemAllocInfo: unsupported ParamName"); return UR_RESULT_ERROR_INVALID_VALUE; } - return UR_RESULT_SUCCESS; } ur_result_t urUSMPoolCreate( diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index 1197ca7d82..9f37024252 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -165,8 +165,6 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, propName, logger::toHex(propName)); return UR_RESULT_ERROR_INVALID_VALUE; } - - return UR_RESULT_SUCCESS; } ur_result_t urEventGetProfilingInfo( diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index de2e37e9bb..13e5adaf95 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -531,7 +531,6 @@ ur_result_t urKernelGetGroupInfo( return UR_RESULT_ERROR_INVALID_VALUE; } } - return UR_RESULT_SUCCESS; } ur_result_t urKernelGetSubGroupInfo( @@ -560,7 +559,6 @@ ur_result_t urKernelGetSubGroupInfo( returnValue(uint32_t{props.requiredSubgroupSize}); } else { die("urKernelGetSubGroupInfo: parameter not implemented"); - return {}; } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp index 52bfea42a4..84a5542bae 100644 --- a/source/adapters/level_zero/v2/memory.cpp +++ b/source/adapters/level_zero/v2/memory.cpp @@ -529,8 +529,6 @@ ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, return UR_RESULT_ERROR_INVALID_ENUMERATION; } } - - return UR_RESULT_SUCCESS; } ur_result_t urMemRetain(ur_mem_handle_t hMem) { diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index b4f61adbba..09cc5dab5a 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -190,8 +190,6 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, propName, logger::toHex(propName)); return UR_RESULT_ERROR_INVALID_VALUE; } - - return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::queueRetain() { diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp index f23a6c6fe8..9b2ae2a0d7 100644 --- a/source/adapters/level_zero/v2/usm.cpp +++ b/source/adapters/level_zero/v2/usm.cpp @@ -378,6 +378,5 @@ ur_result_t urUSMGetMemAllocInfo( return UR_RESULT_ERROR_INVALID_VALUE; } } - return UR_RESULT_SUCCESS; } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index e89899ded7..68c457d181 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -38,7 +38,6 @@ ur_result_t urVirtualMemGranularityGetInfo( propName, propName); return UR_RESULT_ERROR_INVALID_VALUE; } - return UR_RESULT_SUCCESS; } ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, @@ -119,7 +118,5 @@ ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, propName, propName); return UR_RESULT_ERROR_INVALID_VALUE; } - - return UR_RESULT_SUCCESS; } } // namespace ur::level_zero diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index e794c308f9..015c4facfd 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -131,6 +131,4 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } - - return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 45550a68e8..1792d0f110 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -10,6 +10,7 @@ #include "common.hpp" +#include #include #include #include @@ -32,8 +33,7 @@ cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { return CL_EVENT_REFERENCE_COUNT; break; default: - return -1; - break; + return std::numeric_limits::max(); } } @@ -51,7 +51,7 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { case UR_PROFILING_INFO_COMMAND_END: return CL_PROFILING_COMMAND_END; default: - return -1; + return std::numeric_limits::max(); } } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 201df1f678..89d9f1d383 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "common.hpp" +#include cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_format CLImageFormat; @@ -59,7 +60,8 @@ cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { CLImageFormat.image_channel_order = CL_sRGBA; break; default: - CLImageFormat.image_channel_order = -1; + CLImageFormat.image_channel_order = + std::numeric_limits::max(); break; } @@ -110,7 +112,8 @@ cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { CLImageFormat.image_channel_data_type = CL_FLOAT; break; default: - CLImageFormat.image_channel_data_type = -1; + CLImageFormat.image_channel_data_type = + std::numeric_limits::max(); break; } @@ -139,7 +142,7 @@ cl_image_desc mapURImageDescToCL(const ur_image_desc_t *PImageDesc) { CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break; default: - CLImageDesc.image_type = -1; + CLImageDesc.image_type = std::numeric_limits::max(); break; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 20aaa8fd3a..9018ee43f2 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -161,10 +161,10 @@ urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, std::unique_ptr> DevicesInProgram; UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), - DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, 0, - nullptr, nullptr, nullptr, nullptr)); + CL_RETURN_ON_FAILURE(clCompileProgram( + cl_adapter::cast(hProgram), + static_cast(DevicesInProgram->size()), DevicesInProgram->data(), + pOptions, 0, nullptr, nullptr, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -219,9 +219,10 @@ urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, std::unique_ptr> DevicesInProgram; UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - CL_RETURN_ON_FAILURE(clBuildProgram( - cl_adapter::cast(hProgram), DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, nullptr, nullptr)); + CL_RETURN_ON_FAILURE( + clBuildProgram(cl_adapter::cast(hProgram), + static_cast(DevicesInProgram->size()), + DevicesInProgram->data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 2e40963ad1..0bb81cb1e5 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -8,9 +8,9 @@ #include "common.hpp" #include "platform.hpp" +#include cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { - switch (PropName) { case UR_QUEUE_INFO_CONTEXT: return CL_QUEUE_CONTEXT; @@ -25,7 +25,7 @@ cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { case UR_QUEUE_INFO_SIZE: return CL_QUEUE_SIZE; default: - return -1; + return std::numeric_limits::max(); } } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index dfcc1dfafa..4119f4ddfd 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -614,12 +614,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } cl_int ClResult = CL_SUCCESS; if (blocking) { - ClResult = clWaitForEvents(Events.size(), Events.data()); + ClResult = + clWaitForEvents(static_cast(Events.size()), Events.data()); } if (phEvent && ClResult == CL_SUCCESS) { ClResult = clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), Events.size(), - Events.data(), cl_adapter::cast(phEvent)); + cl_adapter::cast(hQueue), + static_cast(Events.size()), Events.data(), + cl_adapter::cast(phEvent)); } for (const auto &E : Events) { CL_RETURN_ON_FAILURE(clReleaseEvent(E)); diff --git a/source/adapters/opencl/usm_p2p.cpp b/source/adapters/opencl/usm_p2p.cpp index b0f51eac2b..66387f5226 100644 --- a/source/adapters/opencl/usm_p2p.cpp +++ b/source/adapters/opencl/usm_p2p.cpp @@ -8,13 +8,12 @@ // //===----------------------------------------------------------------------===// -#include "common.hpp" +#include "logger/ur_logger.hpp" UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, [[maybe_unused]] ur_device_handle_t peerDevice) { - - cl_adapter::die( + logger::warning( "Experimental P2P feature is not implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -22,8 +21,7 @@ urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, [[maybe_unused]] ur_device_handle_t peerDevice) { - - cl_adapter::die( + logger::warning( "Experimental P2P feature is not implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -34,8 +32,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( [[maybe_unused]] ur_exp_peer_info_t propName, [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, [[maybe_unused]] size_t *pPropSizeRet) { - - cl_adapter::die( + logger::warning( "Experimental P2P feature is not implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/common/logger/ur_logger.hpp b/source/common/logger/ur_logger.hpp index c4dc655444..1039d16d3f 100644 --- a/source/common/logger/ur_logger.hpp +++ b/source/common/logger/ur_logger.hpp @@ -116,8 +116,9 @@ template inline std::string toHex(T t) { inline Logger create_logger(std::string logger_name, bool skip_prefix, bool skip_linebreak, logger::Level default_log_level) { - std::transform(logger_name.begin(), logger_name.end(), logger_name.begin(), - ::toupper); + std::transform( + logger_name.begin(), logger_name.end(), logger_name.begin(), + [](char c) -> char { return static_cast(::toupper(c)); }); std::stringstream env_var_name; const auto default_flush_level = logger::Level::ERR; const std::string default_output = "stderr"; diff --git a/source/common/ur_util.cpp b/source/common/ur_util.cpp index 176a2e028e..78651c4212 100644 --- a/source/common/ur_util.cpp +++ b/source/common/ur_util.cpp @@ -15,7 +15,7 @@ #include int ur_getpid(void) { return static_cast(GetCurrentProcessId()); } -int ur_close_fd(int fd) { return -1; } +int ur_close_fd(int fd [[maybe_unused]]) { return -1; } int ur_duplicate_fd(int pid, int fd_in) { // TODO: find another way to obtain a duplicate of another process's file descriptor diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 0ede3c93dc..878123b6f0 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -98,8 +98,9 @@ std::optional ur_getenv(const char *name); inline bool getenv_tobool(const char *name, bool def = false) { if (auto env = ur_getenv(name); env) { - std::transform(env->begin(), env->end(), env->begin(), - [](unsigned char c) { return std::tolower(c); }); + std::transform(env->begin(), env->end(), env->begin(), [](char c) { + return static_cast(std::tolower(c)); + }); auto true_str = {"y", "yes", "t", "true", "1"}; return std::find(true_str.begin(), true_str.end(), *env) != true_str.end(); diff --git a/source/loader/layers/tracing/ur_tracing_layer.cpp b/source/loader/layers/tracing/ur_tracing_layer.cpp index 614f649a3c..3a1cada334 100644 --- a/source/loader/layers/tracing/ur_tracing_layer.cpp +++ b/source/loader/layers/tracing/ur_tracing_layer.cpp @@ -12,8 +12,16 @@ #include "ur_tracing_layer.hpp" #include "ur_api.h" #include "ur_util.hpp" + +#ifdef _MSC_VER +#pragma warning(disable : 4245) +#endif #include "xpti/xpti_data_types.h" #include "xpti/xpti_trace_framework.h" +#ifdef _MSC_VER +#pragma warning(default : 4245) +#endif + #include #include #include diff --git a/source/loader/ur_adapter_registry.hpp b/source/loader/ur_adapter_registry.hpp index 7df799ab1e..201b57c6f0 100644 --- a/source/loader/ur_adapter_registry.hpp +++ b/source/loader/ur_adapter_registry.hpp @@ -225,12 +225,13 @@ class AdapterRegistry { } // case-insensitive comparison by converting both tolower - std::transform(platformBackendName.begin(), - platformBackendName.end(), - platformBackendName.begin(), - [](unsigned char c) { return std::tolower(c); }); - std::transform(backend.begin(), backend.end(), backend.begin(), - [](unsigned char c) { return std::tolower(c); }); + std::transform( + platformBackendName.begin(), platformBackendName.end(), + platformBackendName.begin(), + [](char c) { return static_cast(std::tolower(c)); }); + std::transform( + backend.begin(), backend.end(), backend.begin(), + [](char c) { return static_cast(std::tolower(c)); }); std::size_t nameFound = platformBackendName.find(backend); bool backendFound = nameFound != std::string::npos; diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index e1de6d6237..fb8035b428 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -17,6 +17,7 @@ #define NOMINMAX #include "ur_api.h" #include "ur_ldrddi.hpp" +#include #endif // !NOMINMAX #include "logger/ur_logger.hpp" @@ -412,7 +413,7 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, using DeviceIdType = unsigned long; constexpr DeviceIdType DeviceIdTypeALL = - -1; // ULONG_MAX but without #include + std::numeric_limits::max(); struct DeviceSpec { DevicePartLevel level; @@ -426,8 +427,9 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, auto getRootHardwareType = [](const std::string &input) -> DeviceHardwareType { std::string lowerInput(input); - std::transform(lowerInput.cbegin(), lowerInput.cend(), - lowerInput.begin(), ::tolower); + std::transform( + lowerInput.cbegin(), lowerInput.cend(), lowerInput.begin(), + [](char c) { return static_cast(std::tolower(c)); }); if (lowerInput == "cpu") { return ::UR_DEVICE_TYPE_CPU; } @@ -482,9 +484,8 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, platformBackendName.cend(), backend.cbegin(), backend.cend(), [](const auto &a, const auto &b) { // case-insensitive comparison by converting both tolower - return std::tolower( - static_cast(a)) == - std::tolower(static_cast(b)); + return std::tolower(static_cast(a)) == + std::tolower(static_cast(b)); })) { // irrelevant term for current request: different backend -- silently ignore logger::error("unrecognised backend '{}'", backend); diff --git a/source/loader/windows/adapter_search.cpp b/source/loader/windows/adapter_search.cpp index b514897d91..f850ec5de7 100644 --- a/source/loader/windows/adapter_search.cpp +++ b/source/loader/windows/adapter_search.cpp @@ -40,7 +40,8 @@ std::optional getLoaderLibPath() { return std::nullopt; } -std::optional getAdapterNameAsPath(std::string adapterName) { +std::optional getAdapterNameAsPath(std::string adapterName + [[maybe_unused]]) { return std::nullopt; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e7514cefd8..b9a7f5a0d0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,6 +32,7 @@ add_subdirectory(mock) if(UR_BUILD_TOOLS) add_subdirectory(tools) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND UR_DPCXX AND UR_TEST_FUZZTESTS) +if(CMAKE_CXX_COMPILER_ID STREQUAL Clang AND UR_DPCXX AND UR_TEST_FUZZTESTS AND + CMAKE_SYSTEM_NAME STREQUAL Linux) add_subdirectory(fuzz) endif() diff --git a/test/conformance/device/urDevicePartition.cpp b/test/conformance/device/urDevicePartition.cpp index 2b2939066d..1241900a1a 100644 --- a/test/conformance/device/urDevicePartition.cpp +++ b/test/conformance/device/urDevicePartition.cpp @@ -138,7 +138,7 @@ TEST_F(urDevicePartitionTest, PartitionByCounts) { uint32_t sum = 0; for (auto sub_device : sub_devices) { ASSERT_NE(sub_device, nullptr); - uint32_t n_cu_in_sub_device; + uint32_t n_cu_in_sub_device = 0; ASSERT_NO_FATAL_FAILURE( getNumberComputeUnits(sub_device, n_cu_in_sub_device)); sum += n_cu_in_sub_device; diff --git a/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp b/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp index 799ce7a67d..d247ee6e96 100644 --- a/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp +++ b/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp @@ -14,7 +14,7 @@ TEST_P(urEnqueueDeviceGetGlobalVariableReadTest, Success) { 0, &global_var.value, 0, nullptr, nullptr)); size_t global_offset = 0; - size_t n_dimensions = 1; + uint32_t n_dimensions = 1; size_t global_size = 1; // execute the kernel diff --git a/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp b/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp index 1e281b0632..8ee6dc5365 100644 --- a/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp @@ -175,8 +175,9 @@ TEST_P(urEnqueueEventsWaitMultiDeviceMTTest, EnqueueWaitOnAllQueues) { doComputation(work); uur::raii::Event gatherEvent; - ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], devices.size(), events.data(), - gatherEvent.ptr())); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], + static_cast(devices.size()), + events.data(), gatherEvent.ptr())); ASSERT_SUCCESS(urEventWait(1, gatherEvent.ptr())); for (size_t i = 0; i < devices.size(); i++) { @@ -201,9 +202,9 @@ TEST_P(urEnqueueEventsWaitMultiDeviceMTTest, doComputation(work); uur::raii::Event hGatherEvent; - ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], eventHandles.size(), - eventHandles.data(), - hGatherEvent.ptr())); + ASSERT_SUCCESS(urEnqueueEventsWait( + queues[0], static_cast(eventHandles.size()), + eventHandles.data(), hGatherEvent.ptr())); ASSERT_SUCCESS(urEventWait(1, hGatherEvent.ptr())); for (auto &event : eventHandles) { diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index 7ffa072466..eefd204cc3 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -15,7 +15,7 @@ struct urEnqueueKernelLaunchTest : uur::urKernelExecutionTest { uint32_t val = 42; size_t global_size = 32; size_t global_offset = 0; - size_t n_dimensions = 1; + uint32_t n_dimensions = 1; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchTest); @@ -29,7 +29,7 @@ struct urEnqueueKernelLaunchKernelWgSizeTest : uur::urKernelExecutionTest { std::array global_offset{0, 0, 0}; // This must match the size in fixed_wg_size.cpp std::array wg_size{4, 4, 4}; - size_t n_dimensions = 3; + uint32_t n_dimensions = 3; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelWgSizeTest); @@ -42,7 +42,7 @@ struct urEnqueueKernelLaunchKernelSubGroupTest : uur::urKernelExecutionTest { std::array global_size{32, 32, 32}; std::array global_offset{0, 0, 0}; - size_t n_dimensions = 3; + uint32_t n_dimensions = 3; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelSubGroupTest); @@ -52,7 +52,7 @@ struct urEnqueueKernelLaunchKernelStandardTest : uur::urKernelExecutionTest { UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); } - size_t n_dimensions = 1; + uint32_t n_dimensions = 1; size_t global_size = 1; size_t offset = 0; }; @@ -210,7 +210,7 @@ TEST_P(urEnqueueKernelLaunchKernelStandardTest, Success) { struct testParametersEnqueueKernel { size_t X, Y, Z; - size_t Dims; + uint32_t Dims; }; template @@ -261,7 +261,7 @@ struct urEnqueueKernelLaunchTestWithParam uint32_t val = 42; size_t global_range[3]; size_t global_offset[3] = {0, 0, 0}; - size_t n_dimensions; + uint32_t n_dimensions; size_t buffer_size; }; @@ -333,7 +333,7 @@ struct urEnqueueKernelLaunchWithUSM : uur::urKernelExecutionTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchWithUSM); TEST_P(urEnqueueKernelLaunchWithUSM, Success) { - size_t work_dim = 1; + uint32_t work_dim = 1; size_t global_offset = 0; size_t global_size = alloc_size / sizeof(uint32_t); uint32_t fill_val = 42; @@ -424,7 +424,7 @@ struct urEnqueueKernelLaunchWithVirtualMemory : uur::urKernelExecutionTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchWithVirtualMemory); TEST_P(urEnqueueKernelLaunchWithVirtualMemory, Success) { - size_t work_dim = 1; + uint32_t work_dim = 1; size_t global_offset = 0; size_t global_size = alloc_size / sizeof(uint32_t); uint32_t fill_val = 42; diff --git a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp index b6306f1693..f20e4f1873 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp @@ -354,7 +354,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { for (size_t i = 0; i < numThreads; i++) { threads.emplace_back([this, i, queuePerThread, useEvents]() { constexpr size_t global_offset = 0; - constexpr size_t n_dimensions = 1; + constexpr uint32_t n_dimensions = 1; auto queue = queuePerThread ? queues[i] : queues.back(); auto kernel = kernels[i]; @@ -362,7 +362,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { std::vector Events(numOpsPerThread + 1); for (size_t j = 0; j < numOpsPerThread; j++) { - size_t waitNum = 0; + uint32_t waitNum = 0; ur_event_handle_t *lastEvent = nullptr; ur_event_handle_t *signalEvent = nullptr; diff --git a/test/conformance/exp_command_buffer/commands.cpp b/test/conformance/exp_command_buffer/commands.cpp index 49b2444176..4ca2d263cb 100644 --- a/test/conformance/exp_command_buffer/commands.cpp +++ b/test/conformance/exp_command_buffer/commands.cpp @@ -148,8 +148,8 @@ struct urCommandBufferAppendKernelLaunchExpTest int32_t *ptrX = static_cast(shared_ptrs[1]); int32_t *ptrY = static_cast(shared_ptrs[2]); for (size_t i = 0; i < global_size; i++) { - ptrX[i] = i; - ptrY[i] = i * 2; + ptrX[i] = static_cast(i); + ptrY[i] = static_cast(i * 2); } // Index 0 is output @@ -200,7 +200,7 @@ TEST_P(urCommandBufferAppendKernelLaunchExpTest, Basic) { int32_t *ptrZ = static_cast(shared_ptrs[0]); for (size_t i = 0; i < global_size; i++) { - uint32_t result = (A * i) + (i * 2); + int32_t result = static_cast((A * i) + (i * 2)); ASSERT_EQ(result, ptrZ[i]); } } diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 42bee05b5a..9f9455ce98 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -8,6 +8,8 @@ #include +#include + namespace uur { namespace command_buffer { diff --git a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp index 3e13a895ff..1a56e356be 100644 --- a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp @@ -336,7 +336,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, &first_update_desc)); - uint32_t second_val = -99; + uint32_t second_val = 99; ur_exp_command_buffer_update_value_arg_desc_t second_input_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype nullptr, // pNext @@ -393,7 +393,7 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { &first_val, // hArgValue }; - uint32_t second_val = -99; + uint32_t second_val = 99; input_descs[1] = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype nullptr, // pNext diff --git a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp index 858b6b5680..5484289cec 100644 --- a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp @@ -31,7 +31,7 @@ struct BufferSaxpyKernelTest } // Variable that is incremented as arguments are added to the kernel - size_t current_arg_index = 0; + uint32_t current_arg_index = 0; // Index 0 is output buffer for HIP/Non-HIP ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, nullptr, buffers[0])); diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp index 9fb408fb42..15ea4787c0 100644 --- a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -269,7 +269,8 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, Success) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + &(SaxpyKernel->LocalSize), + static_cast(KernelAlternatives.size()), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); @@ -298,7 +299,8 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, UpdateAgain) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + &(SaxpyKernel->LocalSize), + static_cast(KernelAlternatives.size()), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); @@ -336,7 +338,8 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, RestoreOriginalKernel) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + &(SaxpyKernel->LocalSize), + static_cast(KernelAlternatives.size()), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); @@ -394,9 +397,9 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), &(SaxpyKernel->LocalSize), - KernelAlternatives.size(), KernelAlternatives.data(), - 0, nullptr, 0, nullptr, nullptr, nullptr, - &CommandHandle)); + static_cast(KernelAlternatives.size()), + KernelAlternatives.data(), 0, nullptr, 0, nullptr, + nullptr, nullptr, &CommandHandle)); } using urCommandBufferValidUpdateParametersTest = @@ -451,7 +454,8 @@ TEST_P(urCommandBufferValidUpdateParametersTest, UpdateOnlyLocalWorkSize) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + &(SaxpyKernel->LocalSize), + static_cast(KernelAlternatives.size()), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index 85e6beccf9..a28414c94d 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -222,7 +222,7 @@ struct USMMultipleFillCommandTest std::memset(shared_ptr, 0, allocation_size); // Append multiple kernel commands to command-buffer - for (size_t k = 0; k < num_kernels; k++) { + for (uint32_t k = 0; k < num_kernels; k++) { // Calculate offset into output allocation, and set as // kernel output. void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); @@ -270,7 +270,7 @@ struct USMMultipleFillCommandTest static constexpr size_t global_offset = 0; static constexpr size_t n_dimensions = 1; static constexpr size_t allocation_size = sizeof(val) * global_size; - static constexpr size_t num_kernels = 8; + static constexpr uint32_t num_kernels = 8; static constexpr size_t elements = global_size / num_kernels; void *shared_ptr = nullptr; @@ -290,7 +290,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { uint32_t *output = (uint32_t *)shared_ptr; for (size_t i = 0; i < global_size; i++) { - const uint32_t expected = val + (i / elements); + const uint32_t expected = val + (static_cast(i) / elements); ASSERT_EQ(expected, output[i]); } @@ -314,7 +314,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { }; // Update fill value - uint32_t new_fill_val = new_val + k; + uint32_t new_fill_val = new_val + static_cast(k); ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype nullptr, // pNext @@ -352,7 +352,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { // Verify that update occurred correctly uint32_t *updated_output = (uint32_t *)new_shared_ptr; for (size_t i = 0; i < global_size; i++) { - uint32_t expected = new_val + (i / elements); + uint32_t expected = new_val + (static_cast(i) / elements); ASSERT_EQ(expected, updated_output[i]) << i; } } diff --git a/test/conformance/exp_launch_properties/launch_properties.cpp b/test/conformance/exp_launch_properties/launch_properties.cpp index a54a44ecaf..fcb1bdc78c 100644 --- a/test/conformance/exp_launch_properties/launch_properties.cpp +++ b/test/conformance/exp_launch_properties/launch_properties.cpp @@ -14,7 +14,7 @@ struct urEnqueueKernelLaunchCustomTest : uur::urKernelExecutionTest { uint32_t val = 42; size_t global_size = 32; size_t global_offset = 0; - size_t n_dimensions = 1; + uint32_t n_dimensions = 1; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchCustomTest); diff --git a/test/conformance/integration/QueueBuffer.cpp b/test/conformance/integration/QueueBuffer.cpp index d801ebf684..02d5b7b1b9 100644 --- a/test/conformance/integration/QueueBuffer.cpp +++ b/test/conformance/integration/QueueBuffer.cpp @@ -4,8 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" -#include -#include +#include struct QueueBufferTestWithParam : uur::IntegrationQueueTestWithParam { void SetUp() override { @@ -47,8 +46,8 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) { std::vector EventsFill; ur_event_handle_t Event; - size_t Buffer1Index; - size_t Buffer2Index; + size_t Buffer1Index = 0; + size_t Buffer2Index = 0; ASSERT_NO_FATAL_FAILURE( AddBuffer1DArg(ArraySize * sizeof(uint32_t), &Buffer1, &Buffer1Index)); ASSERT_NO_FATAL_FAILURE( @@ -75,10 +74,10 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) { for (uint32_t i = 0; i < NumIterations; ++i) { /* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */ - ASSERT_SUCCESS( - urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer2)); - ASSERT_SUCCESS( - urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer1)); + ASSERT_SUCCESS(urKernelSetArgMemObj( + kernel, static_cast(Buffer2Index), nullptr, Buffer2)); + ASSERT_SUCCESS(urKernelSetArgMemObj( + kernel, static_cast(Buffer1Index), nullptr, Buffer1)); ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions, &GlobalOffset, &ArraySize, nullptr, @@ -88,10 +87,10 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) { CurValueMem2 = CurValueMem1 * 2; /* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */ - ASSERT_SUCCESS( - urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer2)); - ASSERT_SUCCESS( - urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer1)); + ASSERT_SUCCESS(urKernelSetArgMemObj( + kernel, static_cast(Buffer1Index), nullptr, Buffer2)); + ASSERT_SUCCESS(urKernelSetArgMemObj( + kernel, static_cast(Buffer2Index), nullptr, Buffer1)); ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions, &GlobalOffset, &ArraySize, nullptr, diff --git a/test/conformance/integration/fixtures.h b/test/conformance/integration/fixtures.h index aca70a5245..d4d71fa9fa 100644 --- a/test/conformance/integration/fixtures.h +++ b/test/conformance/integration/fixtures.h @@ -35,7 +35,8 @@ struct IntegrationQueueTestWithParam void submitBarrierIfNeeded(std::vector &(Events)) { if (QueueFlags == UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier( - Queue, Events.size(), Events.data(), nullptr)); + Queue, static_cast(Events.size()), Events.data(), + nullptr)); AllEvents.insert(AllEvents.end(), Events.begin(), Events.end()); } } diff --git a/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp b/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp index 4eeabf5573..4b39755400 100644 --- a/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp +++ b/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp @@ -12,7 +12,7 @@ struct urKernelGetSuggestedLocalWorkSizeTest : uur::urKernelExecutionTest { } size_t global_size = 32; size_t global_offset = 0; - size_t n_dimensions = 1; + uint32_t n_dimensions = 1; size_t suggested_local_work_size; }; diff --git a/test/conformance/kernel/urKernelSetArgSampler.cpp b/test/conformance/kernel/urKernelSetArgSampler.cpp index 83a65613e5..9a8a16599e 100644 --- a/test/conformance/kernel/urKernelSetArgSampler.cpp +++ b/test/conformance/kernel/urKernelSetArgSampler.cpp @@ -105,7 +105,7 @@ UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelSetArgSamplerTest); TEST_P(urKernelSetArgSamplerTest, SuccessWithProps) { ur_kernel_arg_sampler_properties_t props{ UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES, nullptr}; - size_t arg_index = 2; + uint32_t arg_index = 2; ASSERT_SUCCESS(urKernelSetArgSampler(kernel, arg_index, &props, sampler)); } diff --git a/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp b/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp index f7617a2940..bf2a44300a 100644 --- a/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp +++ b/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp @@ -88,7 +88,7 @@ struct urMultiDeviceContextMemBufferTest : urMultiDeviceContextTest { } // Adds a kernel arg representing a sycl buffer constructed with a 1D range. - void AddBuffer1DArg(ur_kernel_handle_t kernel, size_t current_arg_index, + void AddBuffer1DArg(ur_kernel_handle_t kernel, uint32_t current_arg_index, ur_mem_handle_t buffer) { ASSERT_SUCCESS( urKernelSetArgMemObj(kernel, current_arg_index, nullptr, buffer)); diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp index 95a135af1c..1b8e380d27 100644 --- a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp +++ b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -39,8 +39,8 @@ struct urMultiDeviceProgramCreateWithBinaryTest // Now create a program with multiple device binaries. ASSERT_SUCCESS(urProgramCreateWithBinary( - context, devices.size(), devices.data(), binary_sizes.data(), - pointers.data(), nullptr, &binary_program)); + context, static_cast(devices.size()), devices.data(), + binary_sizes.data(), pointers.data(), nullptr, &binary_program)); } void TearDown() override { @@ -61,7 +61,7 @@ struct urMultiDeviceProgramCreateWithBinaryTest TEST_F(urMultiDeviceProgramCreateWithBinaryTest, CreateAndRunKernelOnAllDevices) { constexpr size_t global_offset = 0; - constexpr size_t n_dimensions = 1; + constexpr uint32_t n_dimensions = 1; constexpr size_t global_size = 100; constexpr size_t local_size = 100; @@ -112,8 +112,9 @@ TEST_F(urMultiDeviceProgramCreateWithBinaryTest, pointers_with_invalid_binary.push_back(nullptr); } uur::raii::Program invalid_bin_program; - ASSERT_EQ(urProgramCreateWithBinary(context, devices.size(), devices.data(), - binary_sizes.data(), + ASSERT_EQ(urProgramCreateWithBinary(context, + static_cast(devices.size()), + devices.data(), binary_sizes.data(), pointers_with_invalid_binary.data(), nullptr, invalid_bin_program.ptr()), UR_RESULT_ERROR_INVALID_VALUE); @@ -132,20 +133,23 @@ TEST_F(urMultiDeviceProgramCreateWithBinaryTest, MultipleBuildCalls) { devices.begin(), devices.begin() + devices.size() / 2); auto second_subset = std::vector( devices.begin() + devices.size() / 2, devices.end()); - ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), + ASSERT_SUCCESS(urProgramBuildExp(binary_program, + static_cast(first_subset.size()), first_subset.data(), nullptr)); auto kernelName = uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; uur::raii::Kernel kernel; ASSERT_SUCCESS( urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); - ASSERT_SUCCESS(urProgramBuildExp(binary_program, second_subset.size(), - second_subset.data(), nullptr)); + ASSERT_SUCCESS(urProgramBuildExp( + binary_program, static_cast(second_subset.size()), + second_subset.data(), nullptr)); ASSERT_SUCCESS( urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); // Building for the same subset of devices should not fail. - ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), + ASSERT_SUCCESS(urProgramBuildExp(binary_program, + static_cast(first_subset.size()), first_subset.data(), nullptr)); } diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index 8548b12d11..dc2ade0310 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -176,8 +176,6 @@ ur_result_t GetObjectReferenceCount(T object, uint32_t &out_ref_count) { object, UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT, out_ref_count); } - - return UR_RESULT_ERROR_INVALID_VALUE; } inline std::string GetPlatformName(ur_platform_handle_t hPlatform) { diff --git a/test/conformance/usm/urUSMDeviceAlloc.cpp b/test/conformance/usm/urUSMDeviceAlloc.cpp index bfc48c2aa6..8f3bc680ce 100644 --- a/test/conformance/usm/urUSMDeviceAlloc.cpp +++ b/test/conformance/usm/urUSMDeviceAlloc.cpp @@ -116,9 +116,10 @@ TEST_P(urUSMDeviceAllocTest, InvalidNullPtrResult) { TEST_P(urUSMDeviceAllocTest, InvalidUSMSize) { void *ptr = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_USM_SIZE, - urUSMDeviceAlloc(context, device, nullptr, pool, -1, &ptr)); + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_USM_SIZE, + urUSMDeviceAlloc(context, device, nullptr, pool, + std::numeric_limits::max(), + &ptr)); } TEST_P(urUSMDeviceAllocTest, InvalidValueAlignPowerOfTwo) { diff --git a/test/conformance/usm/urUSMHostAlloc.cpp b/test/conformance/usm/urUSMHostAlloc.cpp index f59af21897..8e24c2d6b5 100644 --- a/test/conformance/usm/urUSMHostAlloc.cpp +++ b/test/conformance/usm/urUSMHostAlloc.cpp @@ -5,6 +5,7 @@ #include "helpers.h" #include +#include #include struct urUSMHostAllocTest @@ -128,7 +129,8 @@ TEST_P(urUSMHostAllocTest, InvalidNullPtrMem) { TEST_P(urUSMHostAllocTest, InvalidUSMSize) { void *ptr = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_USM_SIZE, - urUSMHostAlloc(context, nullptr, pool, -1, &ptr)); + urUSMHostAlloc(context, nullptr, pool, + std::numeric_limits::max(), &ptr)); } TEST_P(urUSMHostAllocTest, InvalidValueAlignPowerOfTwo) { diff --git a/test/conformance/usm/urUSMSharedAlloc.cpp b/test/conformance/usm/urUSMSharedAlloc.cpp index e543602fbc..f42e714fd5 100644 --- a/test/conformance/usm/urUSMSharedAlloc.cpp +++ b/test/conformance/usm/urUSMSharedAlloc.cpp @@ -4,6 +4,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "helpers.h" +#include #include struct urUSMSharedAllocTest @@ -145,9 +146,10 @@ TEST_P(urUSMSharedAllocTest, InvalidNullPtrMem) { TEST_P(urUSMSharedAllocTest, InvalidUSMSize) { void *ptr = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_USM_SIZE, - urUSMSharedAlloc(context, device, nullptr, pool, -1, &ptr)); + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_USM_SIZE, + urUSMSharedAlloc(context, device, nullptr, pool, + std::numeric_limits::max(), + &ptr)); } TEST_P(urUSMSharedAllocTest, InvalidValueAlignPowerOfTwo) { diff --git a/test/layers/tracing/test_collector.cpp b/test/layers/tracing/test_collector.cpp index db0940ad14..fa5a8bee4e 100644 --- a/test/layers/tracing/test_collector.cpp +++ b/test/layers/tracing/test_collector.cpp @@ -19,7 +19,14 @@ #include #include "ur_api.h" + +#ifdef _MSC_VER +#pragma warning(disable : 4245) +#endif #include "xpti/xpti_trace_framework.h" +#ifdef _MSC_VER +#pragma warning(default : 4245) +#endif constexpr uint16_t TRACE_FN_BEGIN = static_cast(xpti::trace_point_type_t::function_with_args_begin); diff --git a/tools/urinfo/utils.hpp b/tools/urinfo/utils.hpp index d7819b2947..c879d7fb56 100644 --- a/tools/urinfo/utils.hpp +++ b/tools/urinfo/utils.hpp @@ -51,7 +51,7 @@ inline std::string getAdapterBackend(ur_adapter_handle_t adapter) { stripPrefix(adapterBackendStream.str(), "UR_ADAPTER_BACKEND_"); std::transform(adapterBackendStr.begin(), adapterBackendStr.end(), adapterBackendStr.begin(), - [](unsigned char c) { return std::tolower(c); }); + [](char c) { return static_cast(std::tolower(c)); }); return adapterBackendStr; } @@ -65,7 +65,7 @@ inline std::string getDeviceType(ur_device_handle_t device) { stripPrefix(deviceTypeStream.str(), "UR_DEVICE_TYPE_"); std::transform(deviceTypeStr.begin(), deviceTypeStr.end(), deviceTypeStr.begin(), - [](unsigned char c) { return std::tolower(c); }); + [](char c) { return static_cast(std::tolower(c)); }); return deviceTypeStr; } diff --git a/tools/urtrace/collector.cpp b/tools/urtrace/collector.cpp index eb8c18d164..ea40d63e06 100644 --- a/tools/urtrace/collector.cpp +++ b/tools/urtrace/collector.cpp @@ -30,7 +30,14 @@ #include "ur_api.h" #include "ur_print.hpp" #include "ur_util.hpp" + +#ifdef _MSC_VER +#pragma warning(disable : 4245) +#endif #include "xpti/xpti_trace_framework.h" +#ifdef _MSC_VER +#pragma warning(default : 4245) +#endif constexpr uint16_t TRACE_FN_BEGIN = static_cast(xpti::trace_point_type_t::function_with_args_begin); @@ -279,7 +286,6 @@ std::unique_ptr create_writer() { default: ur::unreachable(); } - return nullptr; } static std::unique_ptr &writer() { From dc5246dd3339f374851130e0a02c7191b0877a90 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Fri, 1 Nov 2024 14:12:51 -0700 Subject: [PATCH 21/37] [L0] Update Level Zero Loader version to v1.18.5 Signed-off-by: Neil R. Spruit --- cmake/FetchLevelZero.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/FetchLevelZero.cmake b/cmake/FetchLevelZero.cmake index 75bd5db0e7..564cff22fe 100644 --- a/cmake/FetchLevelZero.cmake +++ b/cmake/FetchLevelZero.cmake @@ -40,7 +40,7 @@ if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) set(UR_LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") endif() if (UR_LEVEL_ZERO_LOADER_TAG STREQUAL "") - set(UR_LEVEL_ZERO_LOADER_TAG v1.17.39) + set(UR_LEVEL_ZERO_LOADER_TAG v1.18.5) endif() # Disable due to a bug https://github.com/oneapi-src/level-zero/issues/104 From acecc34808b0fbb04927eef86746b5a366b65dec Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 30 Oct 2024 22:12:36 +0100 Subject: [PATCH 22/37] [L0 v2] Implement deferred event deallocation and fix timestamp checking in events. Until now, recordEventEndTimestamp was being checked directly to see if the WriteGlobalTimestamp completed. This is not safe, as we didn't use any atomics. To avoid any races, only read recordEventEndTimestamp when the associated zeEvent completed. --- scripts/templates/queue_api.hpp.mako | 3 ++ source/adapters/level_zero/v2/event.cpp | 40 ++++++++++++++----- source/adapters/level_zero/v2/event.hpp | 13 +++++- source/adapters/level_zero/v2/event_pool.cpp | 4 +- source/adapters/level_zero/v2/event_pool.hpp | 2 +- source/adapters/level_zero/v2/queue_api.hpp | 3 ++ .../v2/queue_immediate_in_order.cpp | 20 ++++++++-- .../v2/queue_immediate_in_order.hpp | 4 ++ .../level_zero/v2/event_pool_test.cpp | 12 +++--- .../event/event_adapter_level_zero_v2.match | 4 +- 10 files changed, 82 insertions(+), 23 deletions(-) diff --git a/scripts/templates/queue_api.hpp.mako b/scripts/templates/queue_api.hpp.mako index dcc86265f7..69a9af328b 100644 --- a/scripts/templates/queue_api.hpp.mako +++ b/scripts/templates/queue_api.hpp.mako @@ -25,6 +25,9 @@ from templates import helper as th struct ur_queue_handle_t_ { virtual ~ur_queue_handle_t_(); + + virtual void deferEventFree(ur_event_handle_t hEvent) = 0; + %for obj in th.get_queue_related_functions(specs, n, tags): virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0; %endfor diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index 1197ca7d82..fc479b1c4a 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -13,6 +13,7 @@ #include "event.hpp" #include "event_pool.hpp" #include "event_provider.hpp" +#include "queue_api.hpp" #include "../ur_interface_loader.hpp" @@ -24,6 +25,10 @@ ur_event_handle_t_::ur_event_handle_t_( zeTimerResolution(getDevice()->ZeDeviceProperties->timerResolution), timestampMaxValue(getDevice()->getTimestampMask()) {} +void ur_event_handle_t_::resetQueue(ur_queue_handle_t hQueue) { + this->hQueue = hQueue; +} + void ur_event_handle_t_::reset() { // consider make an abstraction for regular/counter based // events if there's more of this type of conditions @@ -33,6 +38,7 @@ void ur_event_handle_t_::reset() { } ze_event_handle_t ur_event_handle_t_::getZeEvent() const { + assert(hQueue); return zeEvent.get(); } @@ -41,14 +47,27 @@ ur_result_t ur_event_handle_t_::retain() { return UR_RESULT_SUCCESS; } +ur_result_t ur_event_handle_t_::releaseDeferred() { + assert(zeEventQueryStatus(zeEvent.get()) == ZE_RESULT_SUCCESS); + assert(RefCount.load() == 0); + + pool->free(this); + return UR_RESULT_SUCCESS; +} + ur_result_t ur_event_handle_t_::release() { if (!RefCount.decrementAndTest()) return UR_RESULT_SUCCESS; + // Need to take a lock before checking if the event is timestamped. + std::unique_lock lock(Mutex); + if (isTimestamped() && adjustedEventEndTimestamp == 0) { // L0 will write end timestamp to this event some time in the future, // so we can't release it yet. - // TODO: delay releasing until the end timestamp is written. + + assert(hQueue); + hQueue->deferEventFree(this); return UR_RESULT_SUCCESS; } @@ -99,17 +118,16 @@ uint64_t ur_event_handle_t_::getEventEndTimestamp() { if (adjustedEventEndTimestamp) return adjustedEventEndTimestamp; - // If the result is 0, we have not yet gotten results back and so we just - // return it. - if (recordEventEndTimestamp == 0) - return recordEventEndTimestamp; + auto status = zeEventQueryStatus(zeEvent.get()); + if (status != ZE_RESULT_SUCCESS) { + // profiling info not ready + return 0; + } - // Now that we have the result, there is no need to keep it in the queue - // anymore, so we cache it on the event and evict the record from the - // queue. adjustedEventEndTimestamp = adjustEndEventTimestamp(getEventStartTimestmap(), recordEventEndTimestamp, timestampMaxValue, zeTimerResolution); + return adjustedEventEndTimestamp; } @@ -118,11 +136,13 @@ void ur_event_handle_t_::recordStartTimestamp() { UR_CALL_THROWS(ur::level_zero::urDeviceGetGlobalTimestamps( getDevice(), &deviceStartTimestamp, nullptr)); + assert(adjustedEventStartTimestamp == 0); adjustedEventStartTimestamp = deviceStartTimestamp; } -uint64_t *ur_event_handle_t_::getEventEndTimestampPtr() { - return &recordEventEndTimestamp; +std::pair +ur_event_handle_t_::getEventEndTimestampAndHandle() { + return {&recordEventEndTimestamp, zeEvent.get()}; } namespace ur::level_zero { diff --git a/source/adapters/level_zero/v2/event.hpp b/source/adapters/level_zero/v2/event.hpp index 394f139b30..99ddd894ca 100644 --- a/source/adapters/level_zero/v2/event.hpp +++ b/source/adapters/level_zero/v2/event.hpp @@ -27,12 +27,19 @@ struct ur_event_handle_t_ : _ur_object { ur_event_handle_t_(v2::raii::cache_borrowed_event eventAllocation, v2::event_pool *pool); + // Set the queue that this event is associated with + void resetQueue(ur_queue_handle_t hQueue); + void reset(); ze_event_handle_t getZeEvent() const; ur_result_t retain(); ur_result_t release(); + // releases a signaled and no longer in-use event, that's on the + // deffered events list in the queue + ur_result_t releaseDeferred(); + // Tells if this event was created as a timestamp event, allowing profiling // info even if profiling is not enabled. bool isTimestamped() const; @@ -44,12 +51,16 @@ struct ur_event_handle_t_ : _ur_object { ur_device_handle_t getDevice() const; void recordStartTimestamp(); - uint64_t *getEventEndTimestampPtr(); + + // Get pointer to the end timestamp, and ze event handle. + // Caller is responsible for signaling the event once the timestamp is ready. + std::pair getEventEndTimestampAndHandle(); uint64_t getEventStartTimestmap() const; uint64_t getEventEndTimestamp(); private: + ur_queue_handle_t hQueue = nullptr; v2::raii::cache_borrowed_event zeEvent; v2::event_pool *pool; diff --git a/source/adapters/level_zero/v2/event_pool.cpp b/source/adapters/level_zero/v2/event_pool.cpp index fe63681764..c97e733643 100644 --- a/source/adapters/level_zero/v2/event_pool.cpp +++ b/source/adapters/level_zero/v2/event_pool.cpp @@ -15,7 +15,7 @@ namespace v2 { static constexpr size_t EVENTS_BURST = 64; -ur_event_handle_t_ *event_pool::allocate() { +ur_event_handle_t_ *event_pool::allocate(ur_queue_handle_t hQueue) { TRACK_SCOPE_LATENCY("event_pool::allocate"); std::unique_lock lock(*mutex); @@ -32,6 +32,8 @@ ur_event_handle_t_ *event_pool::allocate() { auto event = freelist.back(); freelist.pop_back(); + event->resetQueue(hQueue); + return event; } diff --git a/source/adapters/level_zero/v2/event_pool.hpp b/source/adapters/level_zero/v2/event_pool.hpp index 924d29b907..7cd0d94513 100644 --- a/source/adapters/level_zero/v2/event_pool.hpp +++ b/source/adapters/level_zero/v2/event_pool.hpp @@ -41,7 +41,7 @@ class event_pool { DeviceId Id() { return provider->device()->Id.value(); }; // Allocate an event from the pool. Thread safe. - ur_event_handle_t_ *allocate(); + ur_event_handle_t_ *allocate(ur_queue_handle_t hQueue); // Free an event back to the pool. Thread safe. void free(ur_event_handle_t_ *event); diff --git a/source/adapters/level_zero/v2/queue_api.hpp b/source/adapters/level_zero/v2/queue_api.hpp index bc01596d2b..577f6c5aba 100644 --- a/source/adapters/level_zero/v2/queue_api.hpp +++ b/source/adapters/level_zero/v2/queue_api.hpp @@ -16,6 +16,9 @@ struct ur_queue_handle_t_ { virtual ~ur_queue_handle_t_(); + + virtual void deferEventFree(ur_event_handle_t hEvent) = 0; + virtual ur_result_t queueGetInfo(ur_queue_info_t, size_t, void *, size_t *) = 0; virtual ur_result_t queueRetain() = 0; diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 8e31dd1bc6..717cadaf95 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -104,7 +104,7 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( ur_event_handle_t ur_queue_immediate_in_order_t::getSignalEvent(ur_event_handle_t *hUserEvent) { if (hUserEvent) { - *hUserEvent = eventPool->allocate(); + *hUserEvent = eventPool->allocate(this); return *hUserEvent; } else { return nullptr; @@ -156,6 +156,11 @@ ur_result_t ur_queue_immediate_in_order_t::queueRelease() { return UR_RESULT_SUCCESS; } +void ur_queue_immediate_in_order_t::deferEventFree(ur_event_handle_t hEvent) { + std::unique_lock lock(this->Mutex); + deferredEvents.push_back(hEvent); +} + ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle( ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) { std::ignore = pDesc; @@ -175,6 +180,12 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() { ZE2UR_CALL(zeCommandListHostSynchronize, (handler.commandList.get(), UINT64_MAX)); + // Free deferred events + for (auto &hEvent : deferredEvents) { + hEvent->releaseDeferred(); + } + deferredEvents.clear(); + return UR_RESULT_SUCCESS; } @@ -983,9 +994,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( signalEvent->recordStartTimestamp(); + auto [timestampPtr, zeSignalEvent] = + signalEvent->getEventEndTimestampAndHandle(); + ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, - (handler.commandList.get(), signalEvent->getEventEndTimestampPtr(), - signalEvent->getZeEvent(), numWaitEvents, pWaitEvents)); + (handler.commandList.get(), timestampPtr, zeSignalEvent, + numWaitEvents, pWaitEvents)); if (blocking) { ZE2UR_CALL(zeCommandListHostSynchronize, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index d8769d3b97..7800a08bad 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -46,12 +46,16 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { std::vector waitList; + std::vector deferredEvents; + std::pair getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents); ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent); + void deferEventFree(ur_event_handle_t hEvent) override; + ur_result_t enqueueRegionCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, diff --git a/test/adapters/level_zero/v2/event_pool_test.cpp b/test/adapters/level_zero/v2/event_pool_test.cpp index 9443e8fa7a..a4842425ca 100644 --- a/test/adapters/level_zero/v2/event_pool_test.cpp +++ b/test/adapters/level_zero/v2/event_pool_test.cpp @@ -150,7 +150,7 @@ TEST_P(EventPoolTest, Basic) { { auto pool = cache->borrow(device->Id.value(), getParam().flags); - first = pool->allocate(); + first = pool->allocate(reinterpret_cast(0x1)); zeFirst = first->getZeEvent(); urEventRelease(first); @@ -160,7 +160,7 @@ TEST_P(EventPoolTest, Basic) { { auto pool = cache->borrow(device->Id.value(), getParam().flags); - second = pool->allocate(); + second = pool->allocate(reinterpret_cast(0x1)); zeSecond = second->getZeEvent(); urEventRelease(second); @@ -179,7 +179,8 @@ TEST_P(EventPoolTest, Threaded) { auto pool = cache->borrow(device->Id.value(), getParam().flags); std::vector events; for (int i = 0; i < 100; ++i) { - events.push_back(pool->allocate()); + events.push_back(pool->allocate( + reinterpret_cast(0x1))); } for (int i = 0; i < 100; ++i) { urEventRelease(events[i]); @@ -197,7 +198,8 @@ TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) { auto pool = cache->borrow(device->Id.value(), getParam().flags); std::list events; for (int i = 0; i < 128; ++i) { - events.push_back(pool->allocate()); + events.push_back( + pool->allocate(reinterpret_cast(0x1))); } auto frontZeHandle = events.front()->getZeEvent(); for (int i = 0; i < 8; ++i) { @@ -205,7 +207,7 @@ TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) { events.pop_front(); } for (int i = 0; i < 8; ++i) { - auto e = pool->allocate(); + auto e = pool->allocate(reinterpret_cast(0x1)); events.push_back(e); } diff --git a/test/conformance/event/event_adapter_level_zero_v2.match b/test/conformance/event/event_adapter_level_zero_v2.match index 911e7b6783..8dd12f1830 100644 --- a/test/conformance/event/event_adapter_level_zero_v2.match +++ b/test/conformance/event/event_adapter_level_zero_v2.match @@ -5,8 +5,8 @@ urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{. urEventGetInfoNegativeTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetInfoNegativeTest.InvalidSizePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT +urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED +urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventSetCallbackTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventSetCallbackTest.ValidateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ From f48ae3d72ad12b9f0c289229212809d7d2b6047d Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 30 Oct 2024 22:53:00 +0100 Subject: [PATCH 23/37] [L0 v2] implement missing urEventGetInfo support --- source/adapters/level_zero/v2/event.cpp | 22 ++++++- source/adapters/level_zero/v2/event.hpp | 11 +++- source/adapters/level_zero/v2/event_pool.cpp | 5 +- source/adapters/level_zero/v2/event_pool.hpp | 3 +- .../v2/queue_immediate_in_order.cpp | 64 +++++++++++-------- .../v2/queue_immediate_in_order.hpp | 12 ++-- test/adapters/level_zero/v2/CMakeLists.txt | 1 + .../level_zero/v2/event_pool_test.cpp | 17 +++-- .../event/event_adapter_level_zero_v2.match | 7 -- 9 files changed, 91 insertions(+), 51 deletions(-) diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index fc479b1c4a..a0efad0692 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -25,8 +25,10 @@ ur_event_handle_t_::ur_event_handle_t_( zeTimerResolution(getDevice()->ZeDeviceProperties->timerResolution), timestampMaxValue(getDevice()->getTimestampMask()) {} -void ur_event_handle_t_::resetQueue(ur_queue_handle_t hQueue) { +void ur_event_handle_t_::resetQueueAndCommand(ur_queue_handle_t hQueue, + ur_command_t commandType) { this->hQueue = hQueue; + this->commandType = commandType; } void ur_event_handle_t_::reset() { @@ -39,6 +41,7 @@ void ur_event_handle_t_::reset() { ze_event_handle_t ur_event_handle_t_::getZeEvent() const { assert(hQueue); + assert(commandType != UR_COMMAND_FORCE_UINT32); return zeEvent.get(); } @@ -145,6 +148,10 @@ ur_event_handle_t_::getEventEndTimestampAndHandle() { return {&recordEventEndTimestamp, zeEvent.get()}; } +ur_queue_handle_t ur_event_handle_t_::getQueue() const { return hQueue; } + +ur_command_t ur_event_handle_t_::getCommandType() const { return commandType; } + namespace ur::level_zero { ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); } @@ -179,6 +186,19 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, case UR_EVENT_INFO_REFERENCE_COUNT: { return returnValue(hEvent->RefCount.load()); } + case UR_EVENT_INFO_COMMAND_QUEUE: { + return returnValue(ur_queue_handle_t{hEvent->getQueue()}); + } + case UR_EVENT_INFO_CONTEXT: { + ur_context_handle_t hContext; + UR_CALL(::ur::level_zero::urQueueGetInfo( + hEvent->getQueue(), UR_QUEUE_INFO_CONTEXT, sizeof(hContext), + reinterpret_cast(&hContext), nullptr)); + return returnValue(hContext); + } + case UR_EVENT_INFO_COMMAND_TYPE: { + return returnValue(hEvent->getCommandType()); + } default: logger::error( "Unsupported ParamName in urEventGetInfo: ParamName=ParamName={}(0x{})", diff --git a/source/adapters/level_zero/v2/event.hpp b/source/adapters/level_zero/v2/event.hpp index 99ddd894ca..3c22ef1337 100644 --- a/source/adapters/level_zero/v2/event.hpp +++ b/source/adapters/level_zero/v2/event.hpp @@ -27,8 +27,8 @@ struct ur_event_handle_t_ : _ur_object { ur_event_handle_t_(v2::raii::cache_borrowed_event eventAllocation, v2::event_pool *pool); - // Set the queue that this event is associated with - void resetQueue(ur_queue_handle_t hQueue); + // Set the queue and command that this event is associated with + void resetQueueAndCommand(ur_queue_handle_t hQueue, ur_command_t commandType); void reset(); ze_event_handle_t getZeEvent() const; @@ -50,6 +50,12 @@ struct ur_event_handle_t_ : _ur_object { // Device associated with this event ur_device_handle_t getDevice() const; + // Queue associated with this event + ur_queue_handle_t getQueue() const; + + // Get the type of the command that this event is associated with + ur_command_t getCommandType() const; + void recordStartTimestamp(); // Get pointer to the end timestamp, and ze event handle. @@ -61,6 +67,7 @@ struct ur_event_handle_t_ : _ur_object { private: ur_queue_handle_t hQueue = nullptr; + ur_command_t commandType = UR_COMMAND_FORCE_UINT32; v2::raii::cache_borrowed_event zeEvent; v2::event_pool *pool; diff --git a/source/adapters/level_zero/v2/event_pool.cpp b/source/adapters/level_zero/v2/event_pool.cpp index c97e733643..523aaf7fb9 100644 --- a/source/adapters/level_zero/v2/event_pool.cpp +++ b/source/adapters/level_zero/v2/event_pool.cpp @@ -15,7 +15,8 @@ namespace v2 { static constexpr size_t EVENTS_BURST = 64; -ur_event_handle_t_ *event_pool::allocate(ur_queue_handle_t hQueue) { +ur_event_handle_t_ *event_pool::allocate(ur_queue_handle_t hQueue, + ur_command_t commandType) { TRACK_SCOPE_LATENCY("event_pool::allocate"); std::unique_lock lock(*mutex); @@ -32,7 +33,7 @@ ur_event_handle_t_ *event_pool::allocate(ur_queue_handle_t hQueue) { auto event = freelist.back(); freelist.pop_back(); - event->resetQueue(hQueue); + event->resetQueueAndCommand(hQueue, commandType); return event; } diff --git a/source/adapters/level_zero/v2/event_pool.hpp b/source/adapters/level_zero/v2/event_pool.hpp index 7cd0d94513..e9ad4051e6 100644 --- a/source/adapters/level_zero/v2/event_pool.hpp +++ b/source/adapters/level_zero/v2/event_pool.hpp @@ -41,7 +41,8 @@ class event_pool { DeviceId Id() { return provider->device()->Id.value(); }; // Allocate an event from the pool. Thread safe. - ur_event_handle_t_ *allocate(ur_queue_handle_t hQueue); + ur_event_handle_t_ *allocate(ur_queue_handle_t hQueue, + ur_command_t commandType); // Free an event back to the pool. Thread safe. void free(ur_event_handle_t_ *event); diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 717cadaf95..08fae0719f 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -102,9 +102,10 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( ownZeQueue) {} ur_event_handle_t -ur_queue_immediate_in_order_t::getSignalEvent(ur_event_handle_t *hUserEvent) { +ur_queue_immediate_in_order_t::getSignalEvent(ur_event_handle_t *hUserEvent, + ur_command_t commandType) { if (hUserEvent) { - *hUserEvent = eventPool->allocate(this); + *hUserEvent = eventPool->allocate(this, commandType); return *hUserEvent; } else { return nullptr; @@ -217,7 +218,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( zeThreadGroupDimensions, WG, workDim, pGlobalWorkSize, pLocalWorkSize)); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -262,7 +263,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( return UR_RESULT_SUCCESS; } - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -292,8 +293,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - auto signalEvent = getSignalEvent(phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { + auto signalEvent = getSignalEvent(phEvent, commandType); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -349,9 +351,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferRead( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - return enqueueGenericCopyUnlocked(hBuffer, &dstHandle, blockingRead, offset, - 0, size, numEventsInWaitList, - phEventWaitList, phEvent); + return enqueueGenericCopyUnlocked( + hBuffer, &dstHandle, blockingRead, offset, 0, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( @@ -367,9 +369,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWrite( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - return enqueueGenericCopyUnlocked(&srcHandle, hBuffer, blockingWrite, 0, - offset, size, numEventsInWaitList, - phEventWaitList, phEvent); + return enqueueGenericCopyUnlocked( + &srcHandle, hBuffer, blockingWrite, 0, offset, size, numEventsInWaitList, + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE); } ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( @@ -377,11 +379,12 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { auto zeParams = ur2zeRegionParams(srcOrigin, dstOrigin, region, srcRowPitch, dstRowPitch, srcSlicePitch, dstSlicePitch); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, commandType); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -442,7 +445,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferReadRect( return enqueueRegionCopyUnlocked( hBuffer, &dstHandle, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, - numEventsInWaitList, phEventWaitList, phEvent); + numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_READ_RECT); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( @@ -462,7 +466,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferWriteRect( return enqueueRegionCopyUnlocked( &srcHandle, hBuffer, blockingWrite, hostOrigin, bufferOrigin, region, hostRowPitch, hostSlicePitch, bufferRowPitch, bufferSlicePitch, - numEventsInWaitList, phEventWaitList, phEvent); + numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_WRITE_RECT); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( @@ -481,7 +486,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopy( return enqueueGenericCopyUnlocked(hBufferSrc, hBufferDst, false, srcOffset, dstOffset, size, numEventsInWaitList, - phEventWaitList, phEvent); + phEventWaitList, phEvent, + UR_COMMAND_MEM_BUFFER_COPY); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( @@ -499,7 +505,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferCopyRect( return enqueueRegionCopyUnlocked( hBufferSrc, hBufferDst, false, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, - phEventWaitList, phEvent); + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY_RECT); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( @@ -515,7 +521,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferFill( return enqueueGenericFillUnlocked(hBuffer, offset, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, - phEvent); + phEvent, UR_COMMAND_MEM_BUFFER_FILL); } ur_result_t ur_queue_immediate_in_order_t::enqueueMemImageRead( @@ -580,7 +586,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_BUFFER_MAP); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -619,7 +625,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( std::scoped_lock lock(this->Mutex); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_UNMAP); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -648,9 +654,10 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( ur_mem_handle_t dst, size_t offset, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType) { - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, commandType); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -694,7 +701,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFill( ur_usm_handle_t_ dstHandle(hContext, size, pMem); return enqueueGenericFillUnlocked(&dstHandle, 0, patternSize, pPattern, size, numEventsInWaitList, phEventWaitList, - phEvent); + phEvent, UR_COMMAND_USM_FILL); } ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( @@ -706,7 +713,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( std::scoped_lock lock(this->Mutex); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -734,7 +741,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( std::scoped_lock lock(this->Mutex); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -767,7 +774,7 @@ ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size, auto zeAdvice = ur_cast(advice); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE); auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0); @@ -983,7 +990,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( std::scoped_lock lock(this->Mutex); - auto signalEvent = getSignalEvent(phEvent); + auto signalEvent = + getSignalEvent(phEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP); if (!signalEvent) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 7800a08bad..3fac90110a 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -52,7 +52,8 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents); - ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent); + ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent, + ur_command_t commandType); void deferEventFree(ur_event_handle_t hEvent) override; @@ -61,17 +62,20 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { ur_rect_offset_t srcOrigin, ur_rect_offset_t dstOrigin, ur_rect_region_t region, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); ur_result_t enqueueGenericCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); ur_result_t enqueueGenericFillUnlocked( ur_mem_handle_t hBuffer, size_t offset, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, + ur_command_t commandType); public: ur_queue_immediate_in_order_t(ur_context_handle_t, ur_device_handle_t, diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index f6fa03bd6a..4e4a955f49 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -44,6 +44,7 @@ add_unittest(level_zero_event_pool ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_normal.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event_provider_counter.cpp ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/event.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/v2/queue_api.cpp ) add_adapter_test(level_zero_memory_residency diff --git a/test/adapters/level_zero/v2/event_pool_test.cpp b/test/adapters/level_zero/v2/event_pool_test.cpp index a4842425ca..1029d471df 100644 --- a/test/adapters/level_zero/v2/event_pool_test.cpp +++ b/test/adapters/level_zero/v2/event_pool_test.cpp @@ -150,7 +150,8 @@ TEST_P(EventPoolTest, Basic) { { auto pool = cache->borrow(device->Id.value(), getParam().flags); - first = pool->allocate(reinterpret_cast(0x1)); + first = pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH); zeFirst = first->getZeEvent(); urEventRelease(first); @@ -160,7 +161,8 @@ TEST_P(EventPoolTest, Basic) { { auto pool = cache->borrow(device->Id.value(), getParam().flags); - second = pool->allocate(reinterpret_cast(0x1)); + second = pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH); zeSecond = second->getZeEvent(); urEventRelease(second); @@ -179,8 +181,9 @@ TEST_P(EventPoolTest, Threaded) { auto pool = cache->borrow(device->Id.value(), getParam().flags); std::vector events; for (int i = 0; i < 100; ++i) { - events.push_back(pool->allocate( - reinterpret_cast(0x1))); + events.push_back( + pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH)); } for (int i = 0; i < 100; ++i) { urEventRelease(events[i]); @@ -199,7 +202,8 @@ TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) { std::list events; for (int i = 0; i < 128; ++i) { events.push_back( - pool->allocate(reinterpret_cast(0x1))); + pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH)); } auto frontZeHandle = events.front()->getZeEvent(); for (int i = 0; i < 8; ++i) { @@ -207,7 +211,8 @@ TEST_P(EventPoolTest, ProviderNormalUseMostFreePool) { events.pop_front(); } for (int i = 0; i < 8; ++i) { - auto e = pool->allocate(reinterpret_cast(0x1)); + auto e = pool->allocate(reinterpret_cast(0x1), + UR_COMMAND_KERNEL_LAUNCH); events.push_back(e); } diff --git a/test/conformance/event/event_adapter_level_zero_v2.match b/test/conformance/event/event_adapter_level_zero_v2.match index 8dd12f1830..767bb53c6e 100644 --- a/test/conformance/event/event_adapter_level_zero_v2.match +++ b/test/conformance/event/event_adapter_level_zero_v2.match @@ -1,10 +1,3 @@ -{{NONDETERMINISTIC}} -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_QUEUE -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_CONTEXT -urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_TYPE -urEventGetInfoNegativeTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetInfoNegativeTest.InvalidSizePropSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ From 1a110613094a2335d71b85b34b6aef4d21607238 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Fri, 8 Nov 2024 09:46:47 +0100 Subject: [PATCH 24/37] fix path to markdown results in benchmarks workflow The benchmarks script is now run with its working directory set to ur-repo so that the scripts can figure out the commit hash. This means that the output markdown is not stored in a different location. --- .github/workflows/benchmarks-reusable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks-reusable.yml b/.github/workflows/benchmarks-reusable.yml index bc59d09cdf..79cb35748e 100644 --- a/.github/workflows/benchmarks-reusable.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -176,7 +176,7 @@ jobs: let markdown = "" try { const fs = require('fs'); - markdown = fs.readFileSync('benchmark_results.md', 'utf8'); + markdown = fs.readFileSync('ur-repo/benchmark_results.md', 'utf8'); } catch(err) { } From 9f60cb4d93257fdd36703a7e7a2e1b641cd456ec Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Fri, 8 Nov 2024 12:45:01 +0100 Subject: [PATCH 25/37] [benchmarks] HTML output improvements This patch is mostly a few minor changes to the bar charts to make them more legible. --- scripts/benchmarks/main.py | 6 ++-- scripts/benchmarks/output_html.py | 60 +++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index 0756554e77..a31268a240 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -114,7 +114,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): history.load(1000) for name in compare_names: - print(f"compare name: {name}") compare_result = history.get_compare(name) if compare_result: chart_data[name] = compare_result.results @@ -125,6 +124,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): with open('benchmark_results.md', 'w') as file: file.write(markdown_content) + print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md") + saved_name = save_name if save_name is not None else this_name # It's important we don't save the current results into history before @@ -132,7 +133,6 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): # Otherwise we might be comparing the results to themselves. if not options.dry_run: history.save(saved_name, results, save_name is not None) - print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md") compare_names.append(saved_name) if options.output_html: @@ -141,7 +141,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): with open('benchmark_results.html', 'w') as file: file.write(html_content) - print(f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html") + print(f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html") def validate_and_parse_env_args(env_args): env_vars = {} diff --git a/scripts/benchmarks/output_html.py b/scripts/benchmarks/output_html.py index 8249bc75c9..4a04252797 100644 --- a/scripts/benchmarks/output_html.py +++ b/scripts/benchmarks/output_html.py @@ -3,6 +3,7 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import re import matplotlib.pyplot as plt import mpld3 from collections import defaultdict @@ -67,12 +68,22 @@ def prepare_normalized_data(latest_results: dict[str, LatestResults], return normalized_data def format_benchmark_label(label: str) -> list[str]: - words = label.split() - if len(words) <= 2: - return [label] + words = re.split(' |_', label) + lines = [] + current_line = [] - mid = len(words) // 2 - return [' '.join(words[:mid]), ' '.join(words[mid:])] + # max line length 30 + for word in words: + if len(' '.join(current_line + [word])) > 30: + lines.append(' '.join(current_line)) + current_line = [word] + else: + current_line.append(word) + + if current_line: + lines.append(' '.join(current_line)) + + return lines def create_bar_plot(ax: plt.Axes, normalized_data: list[list[float]], @@ -109,9 +120,8 @@ def create_bar_plot(ax: plt.Axes, tooltip_labels = [ f"Run: {run_name}\n" - f"Benchmark: {benchmark_label}\n" f"Value: {current_value:.2f} {unit}\n" - f"Baseline ({baseline_name}): {baseline_value:.2f} {unit}\n" + f"Normalized to ({baseline_name}): {baseline_value:.2f} {unit}\n" f"Normalized: {value:.1f}%" ] tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}') @@ -141,6 +151,37 @@ def add_chart_elements(ax: plt.Axes, ax.grid(True, axis='y', alpha=0.2) ax.legend(bbox_to_anchor=(1, 1), loc='upper left') +def split_large_groups(benchmark_groups): + miscellaneous = [] + new_groups = defaultdict(list) + + split_happened = False + for group, labels in benchmark_groups.items(): + if len(labels) == 1: + miscellaneous.extend(labels) + elif len(labels) > 5: + split_happened = True + mid = len(labels) // 2 + new_groups[group] = labels[:mid] + new_groups[group + '_'] = labels[mid:] + else: + new_groups[group] = labels + + if miscellaneous: + new_groups['Miscellaneous'] = miscellaneous + + if split_happened: + return split_large_groups(new_groups) + else: + return new_groups + +def group_benchmark_labels(benchmark_labels): + benchmark_groups = defaultdict(list) + for label in benchmark_labels: + group = re.match(r'^[^_\s]+', label)[0] + benchmark_groups[group].append(label) + return split_large_groups(benchmark_groups) + def create_normalized_bar_chart(benchmarks: list[BenchmarkSeries], baseline_name: str) -> list[str]: latest_results = get_latest_results(benchmarks) @@ -154,10 +195,7 @@ def create_normalized_bar_chart(benchmarks: list[BenchmarkSeries], baseline_name benchmark_labels = [b.label for b in benchmarks] - benchmark_groups = defaultdict(list) - for label in benchmark_labels: - group_name = label.split()[0] - benchmark_groups[group_name].append(label) + benchmark_groups = group_benchmark_labels(benchmark_labels) html_charts = [] From 5bc8c9250f6f21cd50e9e58fa8ac54e5bdf0007b Mon Sep 17 00:00:00 2001 From: Callum Fare Date: Fri, 8 Nov 2024 16:05:56 +0000 Subject: [PATCH 26/37] Revert "Raise MSVC warning level from /W3 to /W4" This reverts commit 8e5634775bcf6e8bb9067a1aa5564605fb5bd043. --- cmake/helpers.cmake | 15 ++---- examples/collector/collector.cpp | 7 --- include/ur_api.h | 2 +- scripts/core/common.yml | 2 +- source/adapters/cuda/CMakeLists.txt | 7 ++- source/adapters/cuda/command_buffer.cpp | 26 +++++----- source/adapters/cuda/device.cpp | 4 +- source/adapters/cuda/enqueue.cpp | 50 ++++++++----------- source/adapters/cuda/image.cpp | 5 +- source/adapters/cuda/kernel.cpp | 4 +- source/adapters/cuda/kernel.hpp | 8 +-- source/adapters/cuda/program.cpp | 4 +- source/adapters/cuda/usm.cpp | 2 +- source/adapters/level_zero/CMakeLists.txt | 18 +++---- source/adapters/level_zero/adapter.cpp | 2 + source/adapters/level_zero/context.cpp | 2 +- source/adapters/level_zero/device.cpp | 16 +++--- source/adapters/level_zero/event.cpp | 2 + source/adapters/level_zero/platform.cpp | 2 + source/adapters/level_zero/program.hpp | 3 +- source/adapters/level_zero/queue.cpp | 2 +- source/adapters/level_zero/usm.cpp | 1 + source/adapters/level_zero/v2/event.cpp | 2 + source/adapters/level_zero/v2/kernel.cpp | 2 + source/adapters/level_zero/v2/memory.cpp | 2 + .../v2/queue_immediate_in_order.cpp | 2 + source/adapters/level_zero/v2/usm.cpp | 1 + source/adapters/level_zero/virtual_mem.cpp | 3 ++ source/adapters/opencl/adapter.cpp | 2 + source/adapters/opencl/event.cpp | 6 +-- source/adapters/opencl/memory.cpp | 9 ++-- source/adapters/opencl/program.cpp | 15 +++--- source/adapters/opencl/queue.cpp | 4 +- source/adapters/opencl/usm.cpp | 8 ++- source/adapters/opencl/usm_p2p.cpp | 11 ++-- source/common/logger/ur_logger.hpp | 5 +- source/common/ur_util.cpp | 2 +- source/common/ur_util.hpp | 5 +- .../layers/tracing/ur_tracing_layer.cpp | 8 --- source/loader/ur_adapter_registry.hpp | 13 +++-- source/loader/ur_lib.cpp | 13 +++-- source/loader/windows/adapter_search.cpp | 3 +- test/CMakeLists.txt | 3 +- test/conformance/device/urDevicePartition.cpp | 2 +- .../urEnqueueDeviceGlobalVariableRead.cpp | 2 +- .../urEnqueueEventsWaitMultiDevice.cpp | 11 ++-- .../enqueue/urEnqueueKernelLaunch.cpp | 16 +++--- .../urEnqueueKernelLaunchAndMemcpyInOrder.cpp | 4 +- .../exp_command_buffer/commands.cpp | 6 +-- .../conformance/exp_command_buffer/fixtures.h | 2 - .../update/buffer_fill_kernel_update.cpp | 4 +- .../update/buffer_saxpy_kernel_update.cpp | 2 +- .../update/kernel_handle_update.cpp | 18 +++---- .../update/usm_fill_kernel_update.cpp | 10 ++-- .../launch_properties.cpp | 2 +- test/conformance/integration/QueueBuffer.cpp | 23 +++++---- test/conformance/integration/fixtures.h | 3 +- .../urKernelGetSuggestedLocalWorkSize.cpp | 2 +- .../kernel/urKernelSetArgSampler.cpp | 2 +- .../urMemBufferMigrateAcrossDevices.cpp | 2 +- .../urMultiDeviceProgramCreateWithBinary.cpp | 22 ++++---- test/conformance/testing/include/uur/utils.h | 2 + test/conformance/usm/urUSMDeviceAlloc.cpp | 7 ++- test/conformance/usm/urUSMHostAlloc.cpp | 4 +- test/conformance/usm/urUSMSharedAlloc.cpp | 8 ++- test/layers/tracing/test_collector.cpp | 7 --- tools/urinfo/utils.hpp | 4 +- tools/urtrace/collector.cpp | 8 +-- 68 files changed, 214 insertions(+), 262 deletions(-) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index e45020bdea..a6e3a344a4 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -99,25 +99,18 @@ function(add_ur_target_compile_options name) elseif(MSVC) target_compile_options(${name} PRIVATE $<$:/MP> # clang-cl.exe does not support /MP - /W4 - /wd4456 # Disable: declaration of 'identifier' hides previous local declaration - /wd4457 # Disable: declaration of 'identifier' hides function parameter - /wd4458 # Disable: declaration of 'identifier' hides class member - /wd4459 # Disable: declaration of 'identifier' hides global declaration + /W3 /MD$<$:d> /GS /DWIN32_LEAN_AND_MEAN /DNOMINMAX ) - target_compile_definitions(${name} PRIVATE - # _CRT_SECURE_NO_WARNINGS used mainly because of getenv - _CRT_SECURE_NO_WARNINGS - ) - if(UR_DEVELOPER_MODE) + # _CRT_SECURE_NO_WARNINGS used mainly because of getenv + # C4267: The compiler detected a conversion from size_t to a smaller type. target_compile_options(${name} PRIVATE - /WX /GS + /WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267 ) endif() endif() diff --git a/examples/collector/collector.cpp b/examples/collector/collector.cpp index ddcd32e775..6312dba549 100644 --- a/examples/collector/collector.cpp +++ b/examples/collector/collector.cpp @@ -25,14 +25,7 @@ #include #include "ur_api.h" - -#ifdef _MSC_VER -#pragma warning(disable : 4245) -#endif #include "xpti/xpti_trace_framework.h" -#ifdef _MSC_VER -#pragma warning(default : 4245) -#endif constexpr uint16_t TRACE_FN_BEGIN = static_cast(xpti::trace_point_type_t::function_with_args_begin); diff --git a/include/ur_api.h b/include/ur_api.h index 59747a72ea..60d6fc2f70 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -424,7 +424,7 @@ typedef struct ur_physical_mem_handle_t_ *ur_physical_mem_handle_t; /////////////////////////////////////////////////////////////////////////////// #ifndef UR_BIT /// @brief Generic macro for enumerator bit masks -#define UR_BIT(_i) (1U << _i) +#define UR_BIT(_i) (1 << _i) #endif // UR_BIT /////////////////////////////////////////////////////////////////////////////// diff --git a/scripts/core/common.yml b/scripts/core/common.yml index d1f5b769fa..73501ac39d 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -134,7 +134,7 @@ name: "$x_physical_mem_handle_t" type: macro desc: "Generic macro for enumerator bit masks" name: "$X_BIT( _i )" -value: "( 1U << _i )" +value: "( 1 << _i )" --- #-------------------------------------------------------------------------- type: enum desc: "Defines Return/Error codes" diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index a73b7ee886..b6b153a5d8 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -97,16 +97,15 @@ if (UR_ENABLE_TRACING) get_target_property(XPTI_SRC_DIR xpti SOURCE_DIR) set(XPTI_PROXY_SRC "${XPTI_SRC_DIR}/xpti_proxy.cpp") endif() - add_library(cuda-xpti-proxy STATIC ${XPTI_PROXY_SRC}) - target_compile_definitions(cuda-xpti-proxy PRIVATE + target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_ENABLE_INSTRUMENTATION XPTI_STATIC_LIBRARY ) - target_include_directories(cuda-xpti-proxy PRIVATE + target_include_directories(${TARGET_NAME} PRIVATE ${XPTI_INCLUDES} ${CUDA_CUPTI_INCLUDE_DIR} ) - target_link_libraries(${TARGET_NAME} PRIVATE cuda-xpti-proxy) + target_sources(${TARGET_NAME} PRIVATE ${XPTI_PROXY_SRC}) endif() if (CUDA_cupti_LIBRARY) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 65253da739..527c339783 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -242,7 +242,7 @@ static ur_result_t enqueueCommandBufferFillHelper( if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) { CUDA_MEMSET_NODE_PARAMS NodeParams = {}; NodeParams.dst = DstPtr; - NodeParams.elementSize = static_cast(PatternSize); + NodeParams.elementSize = PatternSize; NodeParams.height = N; NodeParams.pitch = PatternSize; NodeParams.width = 1; @@ -508,12 +508,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( auto &ArgIndices = hKernel->getArgIndices(); CUDA_KERNEL_NODE_PARAMS NodeParams = {}; NodeParams.func = CuFunc; - NodeParams.gridDimX = static_cast(BlocksPerGrid[0]); - NodeParams.gridDimY = static_cast(BlocksPerGrid[1]); - NodeParams.gridDimZ = static_cast(BlocksPerGrid[2]); - NodeParams.blockDimX = static_cast(ThreadsPerBlock[0]); - NodeParams.blockDimY = static_cast(ThreadsPerBlock[1]); - NodeParams.blockDimZ = static_cast(ThreadsPerBlock[2]); + NodeParams.gridDimX = BlocksPerGrid[0]; + NodeParams.gridDimY = BlocksPerGrid[1]; + NodeParams.gridDimZ = BlocksPerGrid[2]; + NodeParams.blockDimX = ThreadsPerBlock[0]; + NodeParams.blockDimY = ThreadsPerBlock[1]; + NodeParams.blockDimZ = ThreadsPerBlock[2]; NodeParams.sharedMemBytes = LocalSize; NodeParams.kernelParams = const_cast(ArgIndices.data()); @@ -1397,12 +1397,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params; Params.func = CuFunc; - Params.gridDimX = static_cast(BlocksPerGrid[0]); - Params.gridDimY = static_cast(BlocksPerGrid[1]); - Params.gridDimZ = static_cast(BlocksPerGrid[2]); - Params.blockDimX = static_cast(ThreadsPerBlock[0]); - Params.blockDimY = static_cast(ThreadsPerBlock[1]); - Params.blockDimZ = static_cast(ThreadsPerBlock[2]); + Params.gridDimX = BlocksPerGrid[0]; + Params.gridDimY = BlocksPerGrid[1]; + Params.gridDimZ = BlocksPerGrid[2]; + Params.blockDimX = ThreadsPerBlock[0]; + Params.blockDimY = ThreadsPerBlock[1]; + Params.blockDimZ = ThreadsPerBlock[2]; Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize(); Params.kernelParams = const_cast(KernelCommandHandle->Kernel->getArgIndices().data()); diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index b1e34586dc..be5867628d 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1152,7 +1152,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, try { if (pNumDevices) { - *pNumDevices = static_cast(NumDevices); + *pNumDevices = NumDevices; } if (ReturnDevices && phDevices) { @@ -1235,7 +1235,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice, uint64_t *pDeviceTimestamp, uint64_t *pHostTimestamp) { - CUevent Event{}; + CUevent Event; ScopedContext Active(hDevice); if (pDeviceTimestamp) { diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 15f812403f..0e00f680f6 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock, int MinGrid, MaxBlockSize; UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize( &MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(), - static_cast(MaxBlockDim[0]))); + MaxBlockDim[0])); roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized, MaxBlockDim, MaxBlockSize); @@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context, MaxWorkGroupSize = Device->getMaxWorkGroupSize(); if (ProvidedLocalWorkGroupSize) { - auto IsValid = [&](size_t Dim) { + auto IsValid = [&](int Dim) { if (ReqdThreadsPerBlock[Dim] != 0 && LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim]) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; @@ -217,8 +217,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context, LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim]) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; - if (LocalWorkSize[Dim] > - Device->getMaxWorkItemSizes(static_cast(Dim))) + if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim)) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; // Checks that local work sizes are a divisor of the global work sizes // which includes that the local work sizes are neither larger than @@ -482,13 +481,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( auto &ArgIndices = hKernel->getArgIndices(); UR_CHECK_ERROR(cuLaunchKernel( - CuFunc, static_cast(BlocksPerGrid[0]), - static_cast(BlocksPerGrid[1]), - static_cast(BlocksPerGrid[2]), - static_cast(ThreadsPerBlock[0]), - static_cast(ThreadsPerBlock[1]), - static_cast(ThreadsPerBlock[2]), LocalSize, CuStream, - const_cast(ArgIndices.data()), nullptr)); + CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2], + ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize, + CuStream, const_cast(ArgIndices.data()), nullptr)); if (LocalSize != 0) hKernel->clearLocalSize(); @@ -654,12 +649,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( auto &ArgIndices = hKernel->getArgIndices(); CUlaunchConfig launch_config; - launch_config.gridDimX = static_cast(BlocksPerGrid[0]); - launch_config.gridDimY = static_cast(BlocksPerGrid[1]); - launch_config.gridDimZ = static_cast(BlocksPerGrid[2]); - launch_config.blockDimX = static_cast(ThreadsPerBlock[0]); - launch_config.blockDimY = static_cast(ThreadsPerBlock[1]); - launch_config.blockDimZ = static_cast(ThreadsPerBlock[2]); + launch_config.gridDimX = BlocksPerGrid[0]; + launch_config.gridDimY = BlocksPerGrid[1]; + launch_config.gridDimZ = BlocksPerGrid[2]; + launch_config.blockDimX = ThreadsPerBlock[0]; + launch_config.blockDimY = ThreadsPerBlock[1]; + launch_config.blockDimZ = ThreadsPerBlock[2]; launch_config.sharedMemBytes = LocalSize; launch_config.hStream = CuStream; @@ -984,9 +979,8 @@ ur_result_t commonMemSetLargePattern(CUstream Stream, uint32_t PatternSize, auto OffsetPtr = Ptr + (step * sizeof(uint8_t)); // set all of the pattern chunks - UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, - static_cast(Value), - sizeof(uint8_t), Height, Stream)); + UR_CHECK_ERROR(cuMemsetD2D8Async(OffsetPtr, Pitch, Value, sizeof(uint8_t), + Height, Stream)); } return UR_RESULT_SUCCESS; } @@ -1037,9 +1031,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( break; } default: { - UR_CHECK_ERROR( - commonMemSetLargePattern(Stream, static_cast(patternSize), - size, pPattern, DstDevice)); + UR_CHECK_ERROR(commonMemSetLargePattern(Stream, patternSize, size, + pPattern, DstDevice)); break; } } @@ -1071,6 +1064,7 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) { return 4; default: detail::ur::die("Invalid image format."); + return 0; } } @@ -1174,7 +1168,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( CUDA_ARRAY_DESCRIPTOR ArrayDesc; UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array)); - int ElementByteSize = static_cast(imageElementByteSize(ArrayDesc)); + int ElementByteSize = imageElementByteSize(ArrayDesc); size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels; size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width; @@ -1247,7 +1241,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( CUDA_ARRAY_DESCRIPTOR ArrayDesc; UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array)); - int ElementByteSize = static_cast(imageElementByteSize(ArrayDesc)); + int ElementByteSize = imageElementByteSize(ArrayDesc); size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels; size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width; @@ -1326,7 +1320,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels, UR_RESULT_ERROR_INVALID_MEM_OBJECT); - int ElementByteSize = static_cast(imageElementByteSize(SrcArrayDesc)); + int ElementByteSize = imageElementByteSize(SrcArrayDesc); size_t DstByteOffsetX = dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels; @@ -1511,8 +1505,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( CuStream)); break; default: - commonMemSetLargePattern(CuStream, static_cast(patternSize), - size, pPattern, (CUdeviceptr)ptr); + commonMemSetLargePattern(CuStream, patternSize, size, pPattern, + (CUdeviceptr)ptr); break; } if (phEvent) { diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 40fd18fef7..4840553cc1 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -284,9 +284,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp( ur_result_t Result = UR_RESULT_SUCCESS; try { ScopedContext Active(hDevice); - UR_CHECK_ERROR( - cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, widthInBytes, - height, static_cast(elementSizeBytes))); + UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, + widthInBytes, height, elementSizeBytes)); } catch (ur_result_t error) { Result = error; } catch (...) { diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp index 91daf5649c..5fb097c304 100644 --- a/source/adapters/cuda/kernel.cpp +++ b/source/adapters/cuda/kernel.cpp @@ -203,8 +203,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( int MaxNumActiveGroupsPerCU{0}; UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor( - &MaxNumActiveGroupsPerCU, hKernel->get(), - static_cast(localWorkSize), dynamicSharedMemorySize)); + &MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize, + dynamicSharedMemorySize)); detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0); // Handle the case where we can't have all SMs active with at least 1 group // per SM. In that case, the device is still able to run 1 work-group, hence diff --git a/source/adapters/cuda/kernel.hpp b/source/adapters/cuda/kernel.hpp index 77d8c817f4..7ad20a4f0e 100644 --- a/source/adapters/cuda/kernel.hpp +++ b/source/adapters/cuda/kernel.hpp @@ -97,8 +97,8 @@ struct ur_kernel_handle_t_ { } ParamSizes[Index] = Size; // calculate the insertion point on the array - size_t InsertPos = std::accumulate( - std::begin(ParamSizes), std::begin(ParamSizes) + Index, size_t{0}); + size_t InsertPos = std::accumulate(std::begin(ParamSizes), + std::begin(ParamSizes) + Index, 0); // Update the stored value for the argument std::memcpy(&Storage[InsertPos], Arg, Size); Indices[Index] = &Storage[InsertPos]; @@ -152,8 +152,8 @@ struct ur_kernel_handle_t_ { const args_index_t &getIndices() const noexcept { return Indices; } uint32_t getLocalSize() const { - return static_cast(std::accumulate( - std::begin(OffsetPerIndex), std::end(OffsetPerIndex), size_t{0})); + return std::accumulate(std::begin(OffsetPerIndex), + std::end(OffsetPerIndex), 0); } } Args; diff --git a/source/adapters/cuda/program.cpp b/source/adapters/cuda/program.cpp index 8a29df8de6..4b963a737a 100644 --- a/source/adapters/cuda/program.cpp +++ b/source/adapters/cuda/program.cpp @@ -148,8 +148,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) { } UR_CHECK_ERROR(cuModuleLoadDataEx(&Module, static_cast(Binary), - static_cast(Options.size()), - Options.data(), OptionVals.data())); + Options.size(), Options.data(), + OptionVals.data())); BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS; diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 8915736b3e..8a6ac41b08 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -325,7 +325,7 @@ umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx, enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align, void **Ptr) { - auto Res = allocateImpl(Ptr, Size, static_cast(Align)); + auto Res = allocateImpl(Ptr, Size, Align); if (Res != UR_RESULT_SUCCESS) { getLastStatusRef() = Res; return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 86424aaf41..d700fbb2c3 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -89,9 +89,8 @@ if(UR_BUILD_ADAPTER_L0) endif() # TODO: fix level_zero adapter conversion warnings - # C4267: The compiler detected a conversion from size_t to a smaller type. target_compile_options(ur_adapter_level_zero PRIVATE - $<$:/wd4805 /wd4244 /wd4267> + $<$:/wd4805 /wd4244> ) set_target_properties(ur_adapter_level_zero PROPERTIES @@ -99,9 +98,9 @@ if(UR_BUILD_ADAPTER_L0) SOVERSION "${PROJECT_VERSION_MAJOR}" ) - if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC) - # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800) + if (WIN32) + # 0x800: Search for the DLL only in the System32 folder + target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero PRIVATE @@ -184,9 +183,8 @@ if(UR_BUILD_ADAPTER_L0_V2) target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2) # TODO: fix level_zero adapter conversion warnings - # C4267: The compiler detected a conversion from size_t to a smaller type. target_compile_options(ur_adapter_level_zero_v2 PRIVATE - $<$:/wd4805 /wd4244 /wd4100 /wd4267> + $<$:/wd4805 /wd4244> ) set_target_properties(ur_adapter_level_zero_v2 PROPERTIES @@ -194,9 +192,9 @@ if(UR_BUILD_ADAPTER_L0_V2) SOVERSION "${PROJECT_VERSION_MAJOR}" ) - if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC) - # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800) + if (WIN32) + # 0x800: Search for the DLL only in the System32 folder + target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero_v2 PRIVATE diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 7d3d571c68..9dd2a31268 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -514,5 +514,7 @@ ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName, default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + return UR_RESULT_SUCCESS; } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index d18aeb684f..41c7593237 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -756,7 +756,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( // queue's map to hold the fence and other associated command // list information. auto &QGroup = Queue->getQueueGroup(UseCopyEngine); - uint32_t QueueGroupOrdinal = 0; + uint32_t QueueGroupOrdinal; auto &ZeCommandQueue = ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal); diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index cfa97e7179..94dad86070 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -155,7 +155,7 @@ ur_result_t urDeviceGet( } } - uint32_t ZeDeviceCount = static_cast(MatchedDevices.size()); + uint32_t ZeDeviceCount = MatchedDevices.size(); auto N = (std::min)(ZeDeviceCount, NumEntries); if (Devices) @@ -318,10 +318,9 @@ ur_result_t urDeviceGetInfo( Device->QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute] .ZeIndex >= 0; if (RepresentsCSlice) - MaxComputeUnits /= - static_cast(Device->RootDevice->SubDevices.size()); + MaxComputeUnits /= Device->RootDevice->SubDevices.size(); - return ReturnValue(MaxComputeUnits); + return ReturnValue(uint32_t{MaxComputeUnits}); } case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: // Level Zero spec defines only three dimensions @@ -423,8 +422,7 @@ ur_result_t urDeviceGetInfo( return Res; } - uint32_t ZeSubDeviceCount = - static_cast(Device->SubDevices.size()); + uint32_t ZeSubDeviceCount = Device->SubDevices.size(); if (pSize && ZeSubDeviceCount < 2) { *pSize = 0; return UR_RESULT_SUCCESS; @@ -1159,6 +1157,8 @@ ur_result_t urDeviceGetInfo( logger::toHex(ParamName)); return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } + + return UR_RESULT_SUCCESS; } bool CopyEngineRequested(const ur_device_handle_t &Device) { @@ -1206,7 +1206,7 @@ ur_result_t urDevicePartition( return Res; } - auto EffectiveNumDevices = [&]() -> uint32_t { + auto EffectiveNumDevices = [&]() -> decltype(Device->SubDevices.size()) { if (Device->SubDevices.size() == 0) return 0; @@ -1229,7 +1229,7 @@ ur_result_t urDevicePartition( } } - return static_cast(Device->SubDevices.size()); + return Device->SubDevices.size(); }(); // TODO: Consider support for partitioning to <= total sub-devices. diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index f4fbd1db39..408580dd80 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -498,6 +498,8 @@ ur_result_t urEventGetInfo( PropName, logger::toHex(PropName)); return UR_RESULT_ERROR_INVALID_VALUE; } + + return UR_RESULT_SUCCESS; } ur_result_t urEventGetProfilingInfo( diff --git a/source/adapters/level_zero/platform.cpp b/source/adapters/level_zero/platform.cpp index 506b0ee35b..721db3c359 100644 --- a/source/adapters/level_zero/platform.cpp +++ b/source/adapters/level_zero/platform.cpp @@ -99,6 +99,8 @@ ur_result_t urPlatformGetInfo( logger::debug("urPlatformGetInfo: unrecognized ParamName"); return UR_RESULT_ERROR_INVALID_VALUE; } + + return UR_RESULT_SUCCESS; } ur_result_t urPlatformGetApiVersion( diff --git a/source/adapters/level_zero/program.hpp b/source/adapters/level_zero/program.hpp index e4818e37bf..4fe8c24acd 100644 --- a/source/adapters/level_zero/program.hpp +++ b/source/adapters/level_zero/program.hpp @@ -46,8 +46,7 @@ struct ur_program_handle_t_ : _ur_object { class SpecConstantShim { public: SpecConstantShim(ur_program_handle_t_ *Program) { - ZeSpecConstants.numConstants = - static_cast(Program->SpecConstants.size()); + ZeSpecConstants.numConstants = Program->SpecConstants.size(); ZeSpecContantsIds.reserve(ZeSpecConstants.numConstants); ZeSpecContantsValues.reserve(ZeSpecConstants.numConstants); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 7bce4fc687..c4598f3472 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -2272,7 +2272,7 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeStruct ZeFenceDesc; ze_command_list_handle_t ZeCommandList; - uint32_t QueueGroupOrdinal = 0; + uint32_t QueueGroupOrdinal; auto &QGroup = getQueueGroup(UseCopyEngine); auto &ZeCommandQueue = ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal); diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index bf592e0db6..28bdf233e8 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -689,6 +689,7 @@ ur_result_t urUSMGetMemAllocInfo( logger::error("urUSMGetMemAllocInfo: unsupported ParamName"); return UR_RESULT_ERROR_INVALID_VALUE; } + return UR_RESULT_SUCCESS; } ur_result_t urUSMPoolCreate( diff --git a/source/adapters/level_zero/v2/event.cpp b/source/adapters/level_zero/v2/event.cpp index 8bbedf6308..a0efad0692 100644 --- a/source/adapters/level_zero/v2/event.cpp +++ b/source/adapters/level_zero/v2/event.cpp @@ -205,6 +205,8 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName, propName, logger::toHex(propName)); return UR_RESULT_ERROR_INVALID_VALUE; } + + return UR_RESULT_SUCCESS; } ur_result_t urEventGetProfilingInfo( diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index 13e5adaf95..de2e37e9bb 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -531,6 +531,7 @@ ur_result_t urKernelGetGroupInfo( return UR_RESULT_ERROR_INVALID_VALUE; } } + return UR_RESULT_SUCCESS; } ur_result_t urKernelGetSubGroupInfo( @@ -559,6 +560,7 @@ ur_result_t urKernelGetSubGroupInfo( returnValue(uint32_t{props.requiredSubgroupSize}); } else { die("urKernelGetSubGroupInfo: parameter not implemented"); + return {}; } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/v2/memory.cpp b/source/adapters/level_zero/v2/memory.cpp index f5f4a7b05b..245516a41f 100644 --- a/source/adapters/level_zero/v2/memory.cpp +++ b/source/adapters/level_zero/v2/memory.cpp @@ -529,6 +529,8 @@ ur_result_t urMemGetInfo(ur_mem_handle_t hMemory, ur_mem_info_t propName, return UR_RESULT_ERROR_INVALID_ENUMERATION; } } + + return UR_RESULT_SUCCESS; } ur_result_t urMemRetain(ur_mem_handle_t hMem) { diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 64e303a709..08fae0719f 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -140,6 +140,8 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, propName, logger::toHex(propName)); return UR_RESULT_ERROR_INVALID_VALUE; } + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::queueRetain() { diff --git a/source/adapters/level_zero/v2/usm.cpp b/source/adapters/level_zero/v2/usm.cpp index 9b2ae2a0d7..f23a6c6fe8 100644 --- a/source/adapters/level_zero/v2/usm.cpp +++ b/source/adapters/level_zero/v2/usm.cpp @@ -378,5 +378,6 @@ ur_result_t urUSMGetMemAllocInfo( return UR_RESULT_ERROR_INVALID_VALUE; } } + return UR_RESULT_SUCCESS; } } // namespace ur::level_zero diff --git a/source/adapters/level_zero/virtual_mem.cpp b/source/adapters/level_zero/virtual_mem.cpp index 68c457d181..e89899ded7 100644 --- a/source/adapters/level_zero/virtual_mem.cpp +++ b/source/adapters/level_zero/virtual_mem.cpp @@ -38,6 +38,7 @@ ur_result_t urVirtualMemGranularityGetInfo( propName, propName); return UR_RESULT_ERROR_INVALID_VALUE; } + return UR_RESULT_SUCCESS; } ur_result_t urVirtualMemReserve(ur_context_handle_t hContext, @@ -118,5 +119,7 @@ ur_result_t urVirtualMemGetInfo(ur_context_handle_t hContext, propName, propName); return UR_RESULT_ERROR_INVALID_VALUE; } + + return UR_RESULT_SUCCESS; } } // namespace ur::level_zero diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 015c4facfd..e794c308f9 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -131,4 +131,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, default: return UR_RESULT_ERROR_INVALID_ENUMERATION; } + + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 1792d0f110..45550a68e8 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -10,7 +10,6 @@ #include "common.hpp" -#include #include #include #include @@ -33,7 +32,8 @@ cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { return CL_EVENT_REFERENCE_COUNT; break; default: - return std::numeric_limits::max(); + return -1; + break; } } @@ -51,7 +51,7 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { case UR_PROFILING_INFO_COMMAND_END: return CL_PROFILING_COMMAND_END; default: - return std::numeric_limits::max(); + return -1; } } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 89d9f1d383..201df1f678 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// #include "common.hpp" -#include cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_format CLImageFormat; @@ -60,8 +59,7 @@ cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { CLImageFormat.image_channel_order = CL_sRGBA; break; default: - CLImageFormat.image_channel_order = - std::numeric_limits::max(); + CLImageFormat.image_channel_order = -1; break; } @@ -112,8 +110,7 @@ cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { CLImageFormat.image_channel_data_type = CL_FLOAT; break; default: - CLImageFormat.image_channel_data_type = - std::numeric_limits::max(); + CLImageFormat.image_channel_data_type = -1; break; } @@ -142,7 +139,7 @@ cl_image_desc mapURImageDescToCL(const ur_image_desc_t *PImageDesc) { CLImageDesc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; break; default: - CLImageDesc.image_type = std::numeric_limits::max(); + CLImageDesc.image_type = -1; break; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 9018ee43f2..20aaa8fd3a 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -161,10 +161,10 @@ urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, std::unique_ptr> DevicesInProgram; UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - CL_RETURN_ON_FAILURE(clCompileProgram( - cl_adapter::cast(hProgram), - static_cast(DevicesInProgram->size()), DevicesInProgram->data(), - pOptions, 0, nullptr, nullptr, nullptr, nullptr)); + CL_RETURN_ON_FAILURE(clCompileProgram(cl_adapter::cast(hProgram), + DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, 0, + nullptr, nullptr, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -219,10 +219,9 @@ urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, std::unique_ptr> DevicesInProgram; UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); - CL_RETURN_ON_FAILURE( - clBuildProgram(cl_adapter::cast(hProgram), - static_cast(DevicesInProgram->size()), - DevicesInProgram->data(), pOptions, nullptr, nullptr)); + CL_RETURN_ON_FAILURE(clBuildProgram( + cl_adapter::cast(hProgram), DevicesInProgram->size(), + DevicesInProgram->data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 0bb81cb1e5..2e40963ad1 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -8,9 +8,9 @@ #include "common.hpp" #include "platform.hpp" -#include cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { + switch (PropName) { case UR_QUEUE_INFO_CONTEXT: return CL_QUEUE_CONTEXT; @@ -25,7 +25,7 @@ cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { case UR_QUEUE_INFO_SIZE: return CL_QUEUE_SIZE; default: - return std::numeric_limits::max(); + return -1; } } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 4119f4ddfd..dfcc1dfafa 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -614,14 +614,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } cl_int ClResult = CL_SUCCESS; if (blocking) { - ClResult = - clWaitForEvents(static_cast(Events.size()), Events.data()); + ClResult = clWaitForEvents(Events.size(), Events.data()); } if (phEvent && ClResult == CL_SUCCESS) { ClResult = clEnqueueBarrierWithWaitList( - cl_adapter::cast(hQueue), - static_cast(Events.size()), Events.data(), - cl_adapter::cast(phEvent)); + cl_adapter::cast(hQueue), Events.size(), + Events.data(), cl_adapter::cast(phEvent)); } for (const auto &E : Events) { CL_RETURN_ON_FAILURE(clReleaseEvent(E)); diff --git a/source/adapters/opencl/usm_p2p.cpp b/source/adapters/opencl/usm_p2p.cpp index 66387f5226..b0f51eac2b 100644 --- a/source/adapters/opencl/usm_p2p.cpp +++ b/source/adapters/opencl/usm_p2p.cpp @@ -8,12 +8,13 @@ // //===----------------------------------------------------------------------===// -#include "logger/ur_logger.hpp" +#include "common.hpp" UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, [[maybe_unused]] ur_device_handle_t peerDevice) { - logger::warning( + + cl_adapter::die( "Experimental P2P feature is not implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -21,7 +22,8 @@ urUsmP2PEnablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp([[maybe_unused]] ur_device_handle_t commandDevice, [[maybe_unused]] ur_device_handle_t peerDevice) { - logger::warning( + + cl_adapter::die( "Experimental P2P feature is not implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -32,7 +34,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( [[maybe_unused]] ur_exp_peer_info_t propName, [[maybe_unused]] size_t propSize, [[maybe_unused]] void *pPropValue, [[maybe_unused]] size_t *pPropSizeRet) { - logger::warning( + + cl_adapter::die( "Experimental P2P feature is not implemented for OpenCL adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/common/logger/ur_logger.hpp b/source/common/logger/ur_logger.hpp index 1039d16d3f..c4dc655444 100644 --- a/source/common/logger/ur_logger.hpp +++ b/source/common/logger/ur_logger.hpp @@ -116,9 +116,8 @@ template inline std::string toHex(T t) { inline Logger create_logger(std::string logger_name, bool skip_prefix, bool skip_linebreak, logger::Level default_log_level) { - std::transform( - logger_name.begin(), logger_name.end(), logger_name.begin(), - [](char c) -> char { return static_cast(::toupper(c)); }); + std::transform(logger_name.begin(), logger_name.end(), logger_name.begin(), + ::toupper); std::stringstream env_var_name; const auto default_flush_level = logger::Level::ERR; const std::string default_output = "stderr"; diff --git a/source/common/ur_util.cpp b/source/common/ur_util.cpp index 78651c4212..176a2e028e 100644 --- a/source/common/ur_util.cpp +++ b/source/common/ur_util.cpp @@ -15,7 +15,7 @@ #include int ur_getpid(void) { return static_cast(GetCurrentProcessId()); } -int ur_close_fd(int fd [[maybe_unused]]) { return -1; } +int ur_close_fd(int fd) { return -1; } int ur_duplicate_fd(int pid, int fd_in) { // TODO: find another way to obtain a duplicate of another process's file descriptor diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 878123b6f0..0ede3c93dc 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -98,9 +98,8 @@ std::optional ur_getenv(const char *name); inline bool getenv_tobool(const char *name, bool def = false) { if (auto env = ur_getenv(name); env) { - std::transform(env->begin(), env->end(), env->begin(), [](char c) { - return static_cast(std::tolower(c)); - }); + std::transform(env->begin(), env->end(), env->begin(), + [](unsigned char c) { return std::tolower(c); }); auto true_str = {"y", "yes", "t", "true", "1"}; return std::find(true_str.begin(), true_str.end(), *env) != true_str.end(); diff --git a/source/loader/layers/tracing/ur_tracing_layer.cpp b/source/loader/layers/tracing/ur_tracing_layer.cpp index 3a1cada334..614f649a3c 100644 --- a/source/loader/layers/tracing/ur_tracing_layer.cpp +++ b/source/loader/layers/tracing/ur_tracing_layer.cpp @@ -12,16 +12,8 @@ #include "ur_tracing_layer.hpp" #include "ur_api.h" #include "ur_util.hpp" - -#ifdef _MSC_VER -#pragma warning(disable : 4245) -#endif #include "xpti/xpti_data_types.h" #include "xpti/xpti_trace_framework.h" -#ifdef _MSC_VER -#pragma warning(default : 4245) -#endif - #include #include #include diff --git a/source/loader/ur_adapter_registry.hpp b/source/loader/ur_adapter_registry.hpp index 201b57c6f0..7df799ab1e 100644 --- a/source/loader/ur_adapter_registry.hpp +++ b/source/loader/ur_adapter_registry.hpp @@ -225,13 +225,12 @@ class AdapterRegistry { } // case-insensitive comparison by converting both tolower - std::transform( - platformBackendName.begin(), platformBackendName.end(), - platformBackendName.begin(), - [](char c) { return static_cast(std::tolower(c)); }); - std::transform( - backend.begin(), backend.end(), backend.begin(), - [](char c) { return static_cast(std::tolower(c)); }); + std::transform(platformBackendName.begin(), + platformBackendName.end(), + platformBackendName.begin(), + [](unsigned char c) { return std::tolower(c); }); + std::transform(backend.begin(), backend.end(), backend.begin(), + [](unsigned char c) { return std::tolower(c); }); std::size_t nameFound = platformBackendName.find(backend); bool backendFound = nameFound != std::string::npos; diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index fb8035b428..e1de6d6237 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -17,7 +17,6 @@ #define NOMINMAX #include "ur_api.h" #include "ur_ldrddi.hpp" -#include #endif // !NOMINMAX #include "logger/ur_logger.hpp" @@ -413,7 +412,7 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, using DeviceIdType = unsigned long; constexpr DeviceIdType DeviceIdTypeALL = - std::numeric_limits::max(); + -1; // ULONG_MAX but without #include struct DeviceSpec { DevicePartLevel level; @@ -427,9 +426,8 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, auto getRootHardwareType = [](const std::string &input) -> DeviceHardwareType { std::string lowerInput(input); - std::transform( - lowerInput.cbegin(), lowerInput.cend(), lowerInput.begin(), - [](char c) { return static_cast(std::tolower(c)); }); + std::transform(lowerInput.cbegin(), lowerInput.cend(), + lowerInput.begin(), ::tolower); if (lowerInput == "cpu") { return ::UR_DEVICE_TYPE_CPU; } @@ -484,8 +482,9 @@ ur_result_t urDeviceGetSelected(ur_platform_handle_t hPlatform, platformBackendName.cend(), backend.cbegin(), backend.cend(), [](const auto &a, const auto &b) { // case-insensitive comparison by converting both tolower - return std::tolower(static_cast(a)) == - std::tolower(static_cast(b)); + return std::tolower( + static_cast(a)) == + std::tolower(static_cast(b)); })) { // irrelevant term for current request: different backend -- silently ignore logger::error("unrecognised backend '{}'", backend); diff --git a/source/loader/windows/adapter_search.cpp b/source/loader/windows/adapter_search.cpp index f850ec5de7..b514897d91 100644 --- a/source/loader/windows/adapter_search.cpp +++ b/source/loader/windows/adapter_search.cpp @@ -40,8 +40,7 @@ std::optional getLoaderLibPath() { return std::nullopt; } -std::optional getAdapterNameAsPath(std::string adapterName - [[maybe_unused]]) { +std::optional getAdapterNameAsPath(std::string adapterName) { return std::nullopt; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b9a7f5a0d0..e7514cefd8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,7 +32,6 @@ add_subdirectory(mock) if(UR_BUILD_TOOLS) add_subdirectory(tools) endif() -if(CMAKE_CXX_COMPILER_ID STREQUAL Clang AND UR_DPCXX AND UR_TEST_FUZZTESTS AND - CMAKE_SYSTEM_NAME STREQUAL Linux) +if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND UR_DPCXX AND UR_TEST_FUZZTESTS) add_subdirectory(fuzz) endif() diff --git a/test/conformance/device/urDevicePartition.cpp b/test/conformance/device/urDevicePartition.cpp index 1241900a1a..2b2939066d 100644 --- a/test/conformance/device/urDevicePartition.cpp +++ b/test/conformance/device/urDevicePartition.cpp @@ -138,7 +138,7 @@ TEST_F(urDevicePartitionTest, PartitionByCounts) { uint32_t sum = 0; for (auto sub_device : sub_devices) { ASSERT_NE(sub_device, nullptr); - uint32_t n_cu_in_sub_device = 0; + uint32_t n_cu_in_sub_device; ASSERT_NO_FATAL_FAILURE( getNumberComputeUnits(sub_device, n_cu_in_sub_device)); sum += n_cu_in_sub_device; diff --git a/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp b/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp index d247ee6e96..799ce7a67d 100644 --- a/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp +++ b/test/conformance/enqueue/urEnqueueDeviceGlobalVariableRead.cpp @@ -14,7 +14,7 @@ TEST_P(urEnqueueDeviceGetGlobalVariableReadTest, Success) { 0, &global_var.value, 0, nullptr, nullptr)); size_t global_offset = 0; - uint32_t n_dimensions = 1; + size_t n_dimensions = 1; size_t global_size = 1; // execute the kernel diff --git a/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp b/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp index 8ee6dc5365..1e281b0632 100644 --- a/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWaitMultiDevice.cpp @@ -175,9 +175,8 @@ TEST_P(urEnqueueEventsWaitMultiDeviceMTTest, EnqueueWaitOnAllQueues) { doComputation(work); uur::raii::Event gatherEvent; - ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], - static_cast(devices.size()), - events.data(), gatherEvent.ptr())); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], devices.size(), events.data(), + gatherEvent.ptr())); ASSERT_SUCCESS(urEventWait(1, gatherEvent.ptr())); for (size_t i = 0; i < devices.size(); i++) { @@ -202,9 +201,9 @@ TEST_P(urEnqueueEventsWaitMultiDeviceMTTest, doComputation(work); uur::raii::Event hGatherEvent; - ASSERT_SUCCESS(urEnqueueEventsWait( - queues[0], static_cast(eventHandles.size()), - eventHandles.data(), hGatherEvent.ptr())); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], eventHandles.size(), + eventHandles.data(), + hGatherEvent.ptr())); ASSERT_SUCCESS(urEventWait(1, hGatherEvent.ptr())); for (auto &event : eventHandles) { diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index eefd204cc3..7ffa072466 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -15,7 +15,7 @@ struct urEnqueueKernelLaunchTest : uur::urKernelExecutionTest { uint32_t val = 42; size_t global_size = 32; size_t global_offset = 0; - uint32_t n_dimensions = 1; + size_t n_dimensions = 1; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchTest); @@ -29,7 +29,7 @@ struct urEnqueueKernelLaunchKernelWgSizeTest : uur::urKernelExecutionTest { std::array global_offset{0, 0, 0}; // This must match the size in fixed_wg_size.cpp std::array wg_size{4, 4, 4}; - uint32_t n_dimensions = 3; + size_t n_dimensions = 3; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelWgSizeTest); @@ -42,7 +42,7 @@ struct urEnqueueKernelLaunchKernelSubGroupTest : uur::urKernelExecutionTest { std::array global_size{32, 32, 32}; std::array global_offset{0, 0, 0}; - uint32_t n_dimensions = 3; + size_t n_dimensions = 3; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelSubGroupTest); @@ -52,7 +52,7 @@ struct urEnqueueKernelLaunchKernelStandardTest : uur::urKernelExecutionTest { UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); } - uint32_t n_dimensions = 1; + size_t n_dimensions = 1; size_t global_size = 1; size_t offset = 0; }; @@ -210,7 +210,7 @@ TEST_P(urEnqueueKernelLaunchKernelStandardTest, Success) { struct testParametersEnqueueKernel { size_t X, Y, Z; - uint32_t Dims; + size_t Dims; }; template @@ -261,7 +261,7 @@ struct urEnqueueKernelLaunchTestWithParam uint32_t val = 42; size_t global_range[3]; size_t global_offset[3] = {0, 0, 0}; - uint32_t n_dimensions; + size_t n_dimensions; size_t buffer_size; }; @@ -333,7 +333,7 @@ struct urEnqueueKernelLaunchWithUSM : uur::urKernelExecutionTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchWithUSM); TEST_P(urEnqueueKernelLaunchWithUSM, Success) { - uint32_t work_dim = 1; + size_t work_dim = 1; size_t global_offset = 0; size_t global_size = alloc_size / sizeof(uint32_t); uint32_t fill_val = 42; @@ -424,7 +424,7 @@ struct urEnqueueKernelLaunchWithVirtualMemory : uur::urKernelExecutionTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchWithVirtualMemory); TEST_P(urEnqueueKernelLaunchWithVirtualMemory, Success) { - uint32_t work_dim = 1; + size_t work_dim = 1; size_t global_offset = 0; size_t global_size = alloc_size / sizeof(uint32_t); uint32_t fill_val = 42; diff --git a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp index f20e4f1873..b6306f1693 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp @@ -354,7 +354,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { for (size_t i = 0; i < numThreads; i++) { threads.emplace_back([this, i, queuePerThread, useEvents]() { constexpr size_t global_offset = 0; - constexpr uint32_t n_dimensions = 1; + constexpr size_t n_dimensions = 1; auto queue = queuePerThread ? queues[i] : queues.back(); auto kernel = kernels[i]; @@ -362,7 +362,7 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) { std::vector Events(numOpsPerThread + 1); for (size_t j = 0; j < numOpsPerThread; j++) { - uint32_t waitNum = 0; + size_t waitNum = 0; ur_event_handle_t *lastEvent = nullptr; ur_event_handle_t *signalEvent = nullptr; diff --git a/test/conformance/exp_command_buffer/commands.cpp b/test/conformance/exp_command_buffer/commands.cpp index 4ca2d263cb..49b2444176 100644 --- a/test/conformance/exp_command_buffer/commands.cpp +++ b/test/conformance/exp_command_buffer/commands.cpp @@ -148,8 +148,8 @@ struct urCommandBufferAppendKernelLaunchExpTest int32_t *ptrX = static_cast(shared_ptrs[1]); int32_t *ptrY = static_cast(shared_ptrs[2]); for (size_t i = 0; i < global_size; i++) { - ptrX[i] = static_cast(i); - ptrY[i] = static_cast(i * 2); + ptrX[i] = i; + ptrY[i] = i * 2; } // Index 0 is output @@ -200,7 +200,7 @@ TEST_P(urCommandBufferAppendKernelLaunchExpTest, Basic) { int32_t *ptrZ = static_cast(shared_ptrs[0]); for (size_t i = 0; i < global_size; i++) { - int32_t result = static_cast((A * i) + (i * 2)); + uint32_t result = (A * i) + (i * 2); ASSERT_EQ(result, ptrZ[i]); } } diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index 9f9455ce98..42bee05b5a 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -8,8 +8,6 @@ #include -#include - namespace uur { namespace command_buffer { diff --git a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp index 1a56e356be..3e13a895ff 100644 --- a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp @@ -336,7 +336,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp(command_handle, &first_update_desc)); - uint32_t second_val = 99; + uint32_t second_val = -99; ur_exp_command_buffer_update_value_arg_desc_t second_input_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype nullptr, // pNext @@ -393,7 +393,7 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { &first_val, // hArgValue }; - uint32_t second_val = 99; + uint32_t second_val = -99; input_descs[1] = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype nullptr, // pNext diff --git a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp index 5484289cec..858b6b5680 100644 --- a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp @@ -31,7 +31,7 @@ struct BufferSaxpyKernelTest } // Variable that is incremented as arguments are added to the kernel - uint32_t current_arg_index = 0; + size_t current_arg_index = 0; // Index 0 is output buffer for HIP/Non-HIP ASSERT_SUCCESS(urKernelSetArgMemObj(kernel, current_arg_index++, nullptr, buffers[0])); diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp index 15ea4787c0..9fb408fb42 100644 --- a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -269,8 +269,7 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, Success) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), - static_cast(KernelAlternatives.size()), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); @@ -299,8 +298,7 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, UpdateAgain) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), - static_cast(KernelAlternatives.size()), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); @@ -338,8 +336,7 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, RestoreOriginalKernel) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), - static_cast(KernelAlternatives.size()), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); @@ -397,9 +394,9 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), &(SaxpyKernel->LocalSize), - static_cast(KernelAlternatives.size()), - KernelAlternatives.data(), 0, nullptr, 0, nullptr, - nullptr, nullptr, &CommandHandle)); + KernelAlternatives.size(), KernelAlternatives.data(), + 0, nullptr, 0, nullptr, nullptr, nullptr, + &CommandHandle)); } using urCommandBufferValidUpdateParametersTest = @@ -454,8 +451,7 @@ TEST_P(urCommandBufferValidUpdateParametersTest, UpdateOnlyLocalWorkSize) { ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), - &(SaxpyKernel->LocalSize), - static_cast(KernelAlternatives.size()), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), KernelAlternatives.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, CommandHandle.ptr())); ASSERT_NE(CommandHandle, nullptr); diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index a28414c94d..85e6beccf9 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -222,7 +222,7 @@ struct USMMultipleFillCommandTest std::memset(shared_ptr, 0, allocation_size); // Append multiple kernel commands to command-buffer - for (uint32_t k = 0; k < num_kernels; k++) { + for (size_t k = 0; k < num_kernels; k++) { // Calculate offset into output allocation, and set as // kernel output. void *offset_ptr = (uint32_t *)shared_ptr + (k * elements); @@ -270,7 +270,7 @@ struct USMMultipleFillCommandTest static constexpr size_t global_offset = 0; static constexpr size_t n_dimensions = 1; static constexpr size_t allocation_size = sizeof(val) * global_size; - static constexpr uint32_t num_kernels = 8; + static constexpr size_t num_kernels = 8; static constexpr size_t elements = global_size / num_kernels; void *shared_ptr = nullptr; @@ -290,7 +290,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { uint32_t *output = (uint32_t *)shared_ptr; for (size_t i = 0; i < global_size; i++) { - const uint32_t expected = val + (static_cast(i) / elements); + const uint32_t expected = val + (i / elements); ASSERT_EQ(expected, output[i]); } @@ -314,7 +314,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { }; // Update fill value - uint32_t new_fill_val = new_val + static_cast(k); + uint32_t new_fill_val = new_val + k; ur_exp_command_buffer_update_value_arg_desc_t new_input_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype nullptr, // pNext @@ -352,7 +352,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { // Verify that update occurred correctly uint32_t *updated_output = (uint32_t *)new_shared_ptr; for (size_t i = 0; i < global_size; i++) { - uint32_t expected = new_val + (static_cast(i) / elements); + uint32_t expected = new_val + (i / elements); ASSERT_EQ(expected, updated_output[i]) << i; } } diff --git a/test/conformance/exp_launch_properties/launch_properties.cpp b/test/conformance/exp_launch_properties/launch_properties.cpp index fcb1bdc78c..a54a44ecaf 100644 --- a/test/conformance/exp_launch_properties/launch_properties.cpp +++ b/test/conformance/exp_launch_properties/launch_properties.cpp @@ -14,7 +14,7 @@ struct urEnqueueKernelLaunchCustomTest : uur::urKernelExecutionTest { uint32_t val = 42; size_t global_size = 32; size_t global_offset = 0; - uint32_t n_dimensions = 1; + size_t n_dimensions = 1; }; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchCustomTest); diff --git a/test/conformance/integration/QueueBuffer.cpp b/test/conformance/integration/QueueBuffer.cpp index 02d5b7b1b9..d801ebf684 100644 --- a/test/conformance/integration/QueueBuffer.cpp +++ b/test/conformance/integration/QueueBuffer.cpp @@ -4,7 +4,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" -#include +#include +#include struct QueueBufferTestWithParam : uur::IntegrationQueueTestWithParam { void SetUp() override { @@ -46,8 +47,8 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) { std::vector EventsFill; ur_event_handle_t Event; - size_t Buffer1Index = 0; - size_t Buffer2Index = 0; + size_t Buffer1Index; + size_t Buffer2Index; ASSERT_NO_FATAL_FAILURE( AddBuffer1DArg(ArraySize * sizeof(uint32_t), &Buffer1, &Buffer1Index)); ASSERT_NO_FATAL_FAILURE( @@ -74,10 +75,10 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) { for (uint32_t i = 0; i < NumIterations; ++i) { /* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */ - ASSERT_SUCCESS(urKernelSetArgMemObj( - kernel, static_cast(Buffer2Index), nullptr, Buffer2)); - ASSERT_SUCCESS(urKernelSetArgMemObj( - kernel, static_cast(Buffer1Index), nullptr, Buffer1)); + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer2)); + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer1)); ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions, &GlobalOffset, &ArraySize, nullptr, @@ -87,10 +88,10 @@ TEST_P(QueueBufferTestWithParam, QueueBufferTest) { CurValueMem2 = CurValueMem1 * 2; /* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */ - ASSERT_SUCCESS(urKernelSetArgMemObj( - kernel, static_cast(Buffer1Index), nullptr, Buffer2)); - ASSERT_SUCCESS(urKernelSetArgMemObj( - kernel, static_cast(Buffer2Index), nullptr, Buffer1)); + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer2)); + ASSERT_SUCCESS( + urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer1)); ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions, &GlobalOffset, &ArraySize, nullptr, diff --git a/test/conformance/integration/fixtures.h b/test/conformance/integration/fixtures.h index d4d71fa9fa..aca70a5245 100644 --- a/test/conformance/integration/fixtures.h +++ b/test/conformance/integration/fixtures.h @@ -35,8 +35,7 @@ struct IntegrationQueueTestWithParam void submitBarrierIfNeeded(std::vector &(Events)) { if (QueueFlags == UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier( - Queue, static_cast(Events.size()), Events.data(), - nullptr)); + Queue, Events.size(), Events.data(), nullptr)); AllEvents.insert(AllEvents.end(), Events.begin(), Events.end()); } } diff --git a/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp b/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp index 4b39755400..4eeabf5573 100644 --- a/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp +++ b/test/conformance/kernel/urKernelGetSuggestedLocalWorkSize.cpp @@ -12,7 +12,7 @@ struct urKernelGetSuggestedLocalWorkSizeTest : uur::urKernelExecutionTest { } size_t global_size = 32; size_t global_offset = 0; - uint32_t n_dimensions = 1; + size_t n_dimensions = 1; size_t suggested_local_work_size; }; diff --git a/test/conformance/kernel/urKernelSetArgSampler.cpp b/test/conformance/kernel/urKernelSetArgSampler.cpp index 9a8a16599e..83a65613e5 100644 --- a/test/conformance/kernel/urKernelSetArgSampler.cpp +++ b/test/conformance/kernel/urKernelSetArgSampler.cpp @@ -105,7 +105,7 @@ UUR_INSTANTIATE_KERNEL_TEST_SUITE_P(urKernelSetArgSamplerTest); TEST_P(urKernelSetArgSamplerTest, SuccessWithProps) { ur_kernel_arg_sampler_properties_t props{ UR_STRUCTURE_TYPE_KERNEL_ARG_SAMPLER_PROPERTIES, nullptr}; - uint32_t arg_index = 2; + size_t arg_index = 2; ASSERT_SUCCESS(urKernelSetArgSampler(kernel, arg_index, &props, sampler)); } diff --git a/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp b/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp index bf2a44300a..f7617a2940 100644 --- a/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp +++ b/test/conformance/memory-migrate/urMemBufferMigrateAcrossDevices.cpp @@ -88,7 +88,7 @@ struct urMultiDeviceContextMemBufferTest : urMultiDeviceContextTest { } // Adds a kernel arg representing a sycl buffer constructed with a 1D range. - void AddBuffer1DArg(ur_kernel_handle_t kernel, uint32_t current_arg_index, + void AddBuffer1DArg(ur_kernel_handle_t kernel, size_t current_arg_index, ur_mem_handle_t buffer) { ASSERT_SUCCESS( urKernelSetArgMemObj(kernel, current_arg_index, nullptr, buffer)); diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp index 1b8e380d27..95a135af1c 100644 --- a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp +++ b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -39,8 +39,8 @@ struct urMultiDeviceProgramCreateWithBinaryTest // Now create a program with multiple device binaries. ASSERT_SUCCESS(urProgramCreateWithBinary( - context, static_cast(devices.size()), devices.data(), - binary_sizes.data(), pointers.data(), nullptr, &binary_program)); + context, devices.size(), devices.data(), binary_sizes.data(), + pointers.data(), nullptr, &binary_program)); } void TearDown() override { @@ -61,7 +61,7 @@ struct urMultiDeviceProgramCreateWithBinaryTest TEST_F(urMultiDeviceProgramCreateWithBinaryTest, CreateAndRunKernelOnAllDevices) { constexpr size_t global_offset = 0; - constexpr uint32_t n_dimensions = 1; + constexpr size_t n_dimensions = 1; constexpr size_t global_size = 100; constexpr size_t local_size = 100; @@ -112,9 +112,8 @@ TEST_F(urMultiDeviceProgramCreateWithBinaryTest, pointers_with_invalid_binary.push_back(nullptr); } uur::raii::Program invalid_bin_program; - ASSERT_EQ(urProgramCreateWithBinary(context, - static_cast(devices.size()), - devices.data(), binary_sizes.data(), + ASSERT_EQ(urProgramCreateWithBinary(context, devices.size(), devices.data(), + binary_sizes.data(), pointers_with_invalid_binary.data(), nullptr, invalid_bin_program.ptr()), UR_RESULT_ERROR_INVALID_VALUE); @@ -133,23 +132,20 @@ TEST_F(urMultiDeviceProgramCreateWithBinaryTest, MultipleBuildCalls) { devices.begin(), devices.begin() + devices.size() / 2); auto second_subset = std::vector( devices.begin() + devices.size() / 2, devices.end()); - ASSERT_SUCCESS(urProgramBuildExp(binary_program, - static_cast(first_subset.size()), + ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), first_subset.data(), nullptr)); auto kernelName = uur::KernelsEnvironment::instance->GetEntryPointNames("foo")[0]; uur::raii::Kernel kernel; ASSERT_SUCCESS( urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); - ASSERT_SUCCESS(urProgramBuildExp( - binary_program, static_cast(second_subset.size()), - second_subset.data(), nullptr)); + ASSERT_SUCCESS(urProgramBuildExp(binary_program, second_subset.size(), + second_subset.data(), nullptr)); ASSERT_SUCCESS( urKernelCreate(binary_program, kernelName.data(), kernel.ptr())); // Building for the same subset of devices should not fail. - ASSERT_SUCCESS(urProgramBuildExp(binary_program, - static_cast(first_subset.size()), + ASSERT_SUCCESS(urProgramBuildExp(binary_program, first_subset.size(), first_subset.data(), nullptr)); } diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index dc2ade0310..8548b12d11 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -176,6 +176,8 @@ ur_result_t GetObjectReferenceCount(T object, uint32_t &out_ref_count) { object, UR_EXP_COMMAND_BUFFER_COMMAND_INFO_REFERENCE_COUNT, out_ref_count); } + + return UR_RESULT_ERROR_INVALID_VALUE; } inline std::string GetPlatformName(ur_platform_handle_t hPlatform) { diff --git a/test/conformance/usm/urUSMDeviceAlloc.cpp b/test/conformance/usm/urUSMDeviceAlloc.cpp index 8f3bc680ce..bfc48c2aa6 100644 --- a/test/conformance/usm/urUSMDeviceAlloc.cpp +++ b/test/conformance/usm/urUSMDeviceAlloc.cpp @@ -116,10 +116,9 @@ TEST_P(urUSMDeviceAllocTest, InvalidNullPtrResult) { TEST_P(urUSMDeviceAllocTest, InvalidUSMSize) { void *ptr = nullptr; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_USM_SIZE, - urUSMDeviceAlloc(context, device, nullptr, pool, - std::numeric_limits::max(), - &ptr)); + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_USM_SIZE, + urUSMDeviceAlloc(context, device, nullptr, pool, -1, &ptr)); } TEST_P(urUSMDeviceAllocTest, InvalidValueAlignPowerOfTwo) { diff --git a/test/conformance/usm/urUSMHostAlloc.cpp b/test/conformance/usm/urUSMHostAlloc.cpp index 8e24c2d6b5..f59af21897 100644 --- a/test/conformance/usm/urUSMHostAlloc.cpp +++ b/test/conformance/usm/urUSMHostAlloc.cpp @@ -5,7 +5,6 @@ #include "helpers.h" #include -#include #include struct urUSMHostAllocTest @@ -129,8 +128,7 @@ TEST_P(urUSMHostAllocTest, InvalidNullPtrMem) { TEST_P(urUSMHostAllocTest, InvalidUSMSize) { void *ptr = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_USM_SIZE, - urUSMHostAlloc(context, nullptr, pool, - std::numeric_limits::max(), &ptr)); + urUSMHostAlloc(context, nullptr, pool, -1, &ptr)); } TEST_P(urUSMHostAllocTest, InvalidValueAlignPowerOfTwo) { diff --git a/test/conformance/usm/urUSMSharedAlloc.cpp b/test/conformance/usm/urUSMSharedAlloc.cpp index f42e714fd5..e543602fbc 100644 --- a/test/conformance/usm/urUSMSharedAlloc.cpp +++ b/test/conformance/usm/urUSMSharedAlloc.cpp @@ -4,7 +4,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "helpers.h" -#include #include struct urUSMSharedAllocTest @@ -146,10 +145,9 @@ TEST_P(urUSMSharedAllocTest, InvalidNullPtrMem) { TEST_P(urUSMSharedAllocTest, InvalidUSMSize) { void *ptr = nullptr; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_USM_SIZE, - urUSMSharedAlloc(context, device, nullptr, pool, - std::numeric_limits::max(), - &ptr)); + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_USM_SIZE, + urUSMSharedAlloc(context, device, nullptr, pool, -1, &ptr)); } TEST_P(urUSMSharedAllocTest, InvalidValueAlignPowerOfTwo) { diff --git a/test/layers/tracing/test_collector.cpp b/test/layers/tracing/test_collector.cpp index fa5a8bee4e..db0940ad14 100644 --- a/test/layers/tracing/test_collector.cpp +++ b/test/layers/tracing/test_collector.cpp @@ -19,14 +19,7 @@ #include #include "ur_api.h" - -#ifdef _MSC_VER -#pragma warning(disable : 4245) -#endif #include "xpti/xpti_trace_framework.h" -#ifdef _MSC_VER -#pragma warning(default : 4245) -#endif constexpr uint16_t TRACE_FN_BEGIN = static_cast(xpti::trace_point_type_t::function_with_args_begin); diff --git a/tools/urinfo/utils.hpp b/tools/urinfo/utils.hpp index c879d7fb56..d7819b2947 100644 --- a/tools/urinfo/utils.hpp +++ b/tools/urinfo/utils.hpp @@ -51,7 +51,7 @@ inline std::string getAdapterBackend(ur_adapter_handle_t adapter) { stripPrefix(adapterBackendStream.str(), "UR_ADAPTER_BACKEND_"); std::transform(adapterBackendStr.begin(), adapterBackendStr.end(), adapterBackendStr.begin(), - [](char c) { return static_cast(std::tolower(c)); }); + [](unsigned char c) { return std::tolower(c); }); return adapterBackendStr; } @@ -65,7 +65,7 @@ inline std::string getDeviceType(ur_device_handle_t device) { stripPrefix(deviceTypeStream.str(), "UR_DEVICE_TYPE_"); std::transform(deviceTypeStr.begin(), deviceTypeStr.end(), deviceTypeStr.begin(), - [](char c) { return static_cast(std::tolower(c)); }); + [](unsigned char c) { return std::tolower(c); }); return deviceTypeStr; } diff --git a/tools/urtrace/collector.cpp b/tools/urtrace/collector.cpp index ea40d63e06..eb8c18d164 100644 --- a/tools/urtrace/collector.cpp +++ b/tools/urtrace/collector.cpp @@ -30,14 +30,7 @@ #include "ur_api.h" #include "ur_print.hpp" #include "ur_util.hpp" - -#ifdef _MSC_VER -#pragma warning(disable : 4245) -#endif #include "xpti/xpti_trace_framework.h" -#ifdef _MSC_VER -#pragma warning(default : 4245) -#endif constexpr uint16_t TRACE_FN_BEGIN = static_cast(xpti::trace_point_type_t::function_with_args_begin); @@ -286,6 +279,7 @@ std::unique_ptr create_writer() { default: ur::unreachable(); } + return nullptr; } static std::unique_ptr &writer() { From 09f29fe0f94f4369ae7d30b00186dabeda9c5680 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Fri, 8 Nov 2024 19:03:48 +0100 Subject: [PATCH 27/37] [Benchmarks] update compute-benchmarks Add more MemcpyExecute cases - with src/dst buffers on USM to decrease run-to-run variance. --- scripts/benchmarks/benches/compute.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py index e08109f77e..57bed7624a 100644 --- a/scripts/benchmarks/benches/compute.py +++ b/scripts/benchmarks/benches/compute.py @@ -19,7 +19,7 @@ def setup(self): if options.sycl is None: return - repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "aa6a3b2108bb86202b654ad28129156fa746d41d") + repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "c80ddec9f0b4905bcbeb0f264f710093dc70340d") build_path = create_build_path(self.directory, 'compute-benchmarks-build') configure_command = [ @@ -59,12 +59,14 @@ def benchmarks(self) -> list[Benchmark]: ExecImmediateCopyQueue(self, 0, 1, 'Device', 'Device', 1024), ExecImmediateCopyQueue(self, 1, 1, 'Device', 'Host', 1024), VectorSum(self), - MemcpyExecute(self, 400, 8, 1024, 100), - MemcpyExecute(self, 400, 8, 102400, 10), - MemcpyExecute(self, 500, 8, 102400, 10), - MemcpyExecute(self, 400, 1, 1024, 1000), - MemcpyExecute(self, 10, 16, 1024, 1000), - MemcpyExecute(self, 10, 16, 102400, 100), + MemcpyExecute(self, 400, 1, 102400, 10, 1, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 1, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 1, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 1, 1), + MemcpyExecute(self, 400, 1, 102400, 10, 0, 1), + MemcpyExecute(self, 100, 8, 102400, 10, 0, 1), + MemcpyExecute(self, 400, 8, 1024, 1000, 0, 1), + MemcpyExecute(self, 10, 16, 1024, 10000, 0, 1), ] if options.ur is not None: @@ -265,15 +267,17 @@ def bin_args(self) -> list[str]: ] class MemcpyExecute(ComputeBenchmark): - def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations): + def __init__(self, bench, numOpsPerThread, numThreads, allocSize, iterations, srcUSM, dstUSM): self.numOpsPerThread = numOpsPerThread self.numThreads = numThreads self.allocSize = allocSize self.iterations = iterations + self.srcUSM = srcUSM + self.dstUSM = dstUSM super().__init__(bench, "multithread_benchmark_ur", "MemcpyExecute") def name(self): - return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize}" + return f"multithread_benchmark_ur MemcpyExecute opsPerThread:{self.numOpsPerThread}, numThreads:{self.numThreads}, allocSize:{self.allocSize} srcUSM:{self.srcUSM} dstUSM:{self.dstUSM}" def bin_args(self) -> list[str]: return [ @@ -284,5 +288,7 @@ def bin_args(self) -> list[str]: f"--AllocSize={self.allocSize}", f"--NumThreads={self.numThreads}", f"--NumOpsPerThread={self.numOpsPerThread}", - f"--iterations={self.iterations}" + f"--iterations={self.iterations}", + f"--SrcUSM={self.srcUSM}", + f"--DstUSM={self.dstUSM}", ] From 2e4c3ce9c0cc7200a46f99531782731a574f3753 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Fri, 8 Nov 2024 19:53:27 +0100 Subject: [PATCH 28/37] [L0 v2] implement enqueueCooperativeKernelLaunchExp --- .../v2/queue_immediate_in_order.cpp | 60 ++++++++++++++++--- 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 08fae0719f..57620f9330 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -971,15 +971,57 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp( const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - std::ignore = hKernel; - std::ignore = workDim; - std::ignore = pGlobalWorkOffset; - std::ignore = pGlobalWorkSize; - std::ignore = pLocalWorkSize; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + TRACK_SCOPE_LATENCY( + "ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp"); + + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hKernel->getProgramHandle(), UR_RESULT_ERROR_INVALID_NULL_POINTER); + + UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + + ze_kernel_handle_t hZeKernel = hKernel->getZeHandle(hDevice); + + std::scoped_lock Lock(this->Mutex, + hKernel->Mutex); + + ze_group_count_t zeThreadGroupDimensions{1, 1, 1}; + uint32_t WG[3]{}; + UR_CALL(calculateKernelWorkDimensions(hZeKernel, hDevice, + zeThreadGroupDimensions, WG, workDim, + pGlobalWorkSize, pLocalWorkSize)); + + auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); + + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); + + bool memoryMigrated = false; + auto memoryMigrate = [&](void *src, void *dst, size_t size) { + ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, + (handler.commandList.get(), dst, src, size, nullptr, + waitList.second, waitList.first)); + memoryMigrated = true; + }; + + UR_CALL(hKernel->prepareForSubmission(hContext, hDevice, pGlobalWorkOffset, + workDim, WG[0], WG[1], WG[2], + memoryMigrate)); + + if (memoryMigrated) { + // If memory was migrated, we don't need to pass the wait list to + // the copy command again. + waitList.first = nullptr; + waitList.second = 0; + } + + TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::" + "zeCommandListAppendLaunchCooperativeKernel"); + auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; + ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel, + (handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions, + zeSignalEvent, waitList.second, waitList.first)); + + return UR_RESULT_SUCCESS; } ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( From 9339e374200fdf0e9884f600ff8c92335bdd51c9 Mon Sep 17 00:00:00 2001 From: Gergely Meszaros Date: Fri, 8 Nov 2024 05:14:28 -0800 Subject: [PATCH 29/37] Wrap linker flags on Windows for IntelLLLVM The Intel C++ compiler requires linker flags to be wrapped, because CMake passes them through the compiler driver. Prefix linker options with `LINKER:`. CMake will transorm it to the appropriate flag for the compiler driver: (Nothing for MSVC or clang-cl, and /Qoption,link for icx), so this will work for other compilers and for earlier CMake versions too. Signed-off-by: Gergely Meszaros --- cmake/helpers.cmake | 10 +++++----- source/adapters/CMakeLists.txt | 2 +- source/adapters/level_zero/CMakeLists.txt | 4 ++-- source/loader/CMakeLists.txt | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index a6e3a344a4..41b7171fc4 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -131,9 +131,9 @@ function(add_ur_target_link_options name) endif() elseif(MSVC) target_link_options(${name} PRIVATE - /DYNAMICBASE - /HIGHENTROPYVA - /NXCOMPAT + LINKER:/DYNAMICBASE + LINKER:/HIGHENTROPYVA + LINKER:/NXCOMPAT ) endif() endfunction() @@ -141,7 +141,7 @@ endfunction() function(add_ur_target_exec_options name) if(MSVC) target_link_options(${name} PRIVATE - /ALLOWISOLATION + LINKER:/ALLOWISOLATION ) endif() endfunction() @@ -159,7 +159,7 @@ function(add_ur_library name) add_ur_target_link_options(${name}) if(MSVC) target_link_options(${name} PRIVATE - $<$,link.exe>:/DEPENDENTLOADFLAG:0x2000> + $<$,link.exe>:LINKER:/DEPENDENTLOADFLAG:0x2000> ) endif() endfunction() diff --git a/source/adapters/CMakeLists.txt b/source/adapters/CMakeLists.txt index f981c17dd5..66cd8b7648 100644 --- a/source/adapters/CMakeLists.txt +++ b/source/adapters/CMakeLists.txt @@ -13,7 +13,7 @@ function(add_ur_adapter name) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../adapter.def.in ${ADAPTER_VERSION_SCRIPT} @ONLY) set_target_properties(${name} PROPERTIES - LINK_FLAGS "/DEF:${ADAPTER_VERSION_SCRIPT}" + LINK_OPTIONS "LINKER:/DEF:${ADAPTER_VERSION_SCRIPT}" ) elseif(APPLE) target_compile_options(${name} PRIVATE "-fvisibility=hidden") diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index d700fbb2c3..6465ebaa51 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -100,7 +100,7 @@ if(UR_BUILD_ADAPTER_L0) if (WIN32) # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero PRIVATE /DEPENDENTLOADFLAG:0x800) + target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero PRIVATE @@ -194,7 +194,7 @@ if(UR_BUILD_ADAPTER_L0_V2) if (WIN32) # 0x800: Search for the DLL only in the System32 folder - target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800) + target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800) endif() target_link_libraries(ur_adapter_level_zero_v2 PRIVATE diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index 07dab17943..b740411c61 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -28,7 +28,7 @@ if (MSVC) set(LOADER_VERSION_SCRIPT ${CMAKE_CURRENT_BINARY_DIR}/ur_loader.def) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/loader.def.in ${LOADER_VERSION_SCRIPT} @ONLY) set_target_properties(ur_loader PROPERTIES - LINK_FLAGS "/DEF:${LOADER_VERSION_SCRIPT}" + LINK_OPTIONS "LINKER:/DEF:${LOADER_VERSION_SCRIPT}" ) elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") set(TARGET_LIBNAME libur_loader_${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}) From 1696524d0b90160dff87507653bce1152cf6d4af Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 28 Oct 2024 11:37:30 +0100 Subject: [PATCH 30/37] add low-power events experimental extension spec --- include/ur_api.h | 106 +++++++++++- include/ur_api_funcs.def | 1 + include/ur_ddi.h | 10 ++ include/ur_print.h | 24 +++ include/ur_print.hpp | 161 ++++++++++++++++++ scripts/core/EXP-LOW-POWER-EVENTS.rst | 87 ++++++++++ scripts/core/exp-low-power-events.yml | 114 +++++++++++++ scripts/core/registry.yml | 3 + source/adapters/cuda/device.cpp | 3 +- source/adapters/cuda/enqueue.cpp | 8 + source/adapters/hip/device.cpp | 3 + source/adapters/hip/enqueue.cpp | 8 + source/adapters/level_zero/device.cpp | 2 + source/adapters/level_zero/event.cpp | 19 +++ .../level_zero/ur_interface_loader.cpp | 2 + .../level_zero/ur_interface_loader.hpp | 5 + source/adapters/level_zero/v2/queue_api.cpp | 8 + source/adapters/level_zero/v2/queue_api.hpp | 4 + .../v2/queue_immediate_in_order.cpp | 7 + .../v2/queue_immediate_in_order.hpp | 4 + source/adapters/mock/ur_mockddi.cpp | 67 ++++++++ source/adapters/native_cpu/device.cpp | 2 + source/adapters/native_cpu/enqueue.cpp | 8 + source/adapters/opencl/device.cpp | 2 + source/adapters/opencl/enqueue.cpp | 8 + source/common/stype_map_helpers.def | 2 + source/loader/layers/tracing/ur_trcddi.cpp | 59 +++++++ source/loader/layers/validation/ur_valddi.cpp | 70 +++++++- source/loader/loader.def.in | 4 + source/loader/loader.map.in | 4 + source/loader/ur_ldrddi.cpp | 67 ++++++++ source/loader/ur_libapi.cpp | 65 ++++++- source/loader/ur_print.cpp | 24 +++ source/ur_api.cpp | 57 ++++++- tools/urinfo/urinfo.hpp | 2 + 35 files changed, 1014 insertions(+), 6 deletions(-) create mode 100644 scripts/core/EXP-LOW-POWER-EVENTS.rst create mode 100644 scripts/core/exp-low-power-events.yml diff --git a/include/ur_api.h b/include/ur_api.h index 60d6fc2f70..3205fcb207 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -230,6 +230,7 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP = 243, ///< Enumerator for ::urCommandBufferUpdateSignalEventExp UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp + UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, ///< Enumerator for ::urEnqueueEventsWaitWithBarrierExt /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -288,6 +289,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES = 0x2006, ///< ::ur_exp_sampler_cubemap_properties_t UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION = 0x2007, ///< ::ur_exp_image_copy_region_t UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES = 0x3000, ///< ::ur_exp_enqueue_native_command_properties_t + UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES = 0x4000, ///< ::ur_exp_enqueue_ext_properties_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1702,6 +1704,7 @@ typedef enum ur_device_info_t { ///< backed 2D sampled image data. UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native ///< work + UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports low-power events. /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1727,7 +1730,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -5447,13 +5450,17 @@ typedef enum ur_queue_flag_t { ///< ignore this flag. UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM = UR_BIT(10), ///< Synchronize with the default stream. Only meaningful for CUDA. Other ///< platforms may ignore this flag. + UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP = UR_BIT(11), ///< Hint: use low-power events. Only meaningful for Level Zero, where the + ///< implementation may use interrupt-driven events. May reduce CPU + ///< utilization at the cost of increased event completion latency. Other + ///< platforms may ignore this flag. /// @cond UR_QUEUE_FLAG_FORCE_UINT32 = 0x7fffffff /// @endcond } ur_queue_flag_t; /// @brief Bit Mask for validating ur_queue_flags_t -#define UR_QUEUE_FLAGS_MASK 0xfffff800 +#define UR_QUEUE_FLAGS_MASK 0xfffff000 /////////////////////////////////////////////////////////////////////////////// /// @brief Query information about a command queue @@ -9974,6 +9981,89 @@ urUsmP2PPeerAccessGetInfoExp( size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in bytes of the queried propName. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental API for low-power events API +#if !defined(__GNUC__) +#pragma region low_power_events_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Extended enqueue properties +typedef uint32_t ur_exp_enqueue_ext_flags_t; +typedef enum ur_exp_enqueue_ext_flag_t { + UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS = UR_BIT(11), ///< Hint: use low-power events. Only meaningful for Level Zero, where the + ///< implementation may use interrupt-driven events. May reduce CPU + ///< utilization at the cost of increased event completion latency. Other + ///< platforms may ignore this flag. + /// @cond + UR_EXP_ENQUEUE_EXT_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_enqueue_ext_flag_t; +/// @brief Bit Mask for validating ur_exp_enqueue_ext_flags_t +#define UR_EXP_ENQUEUE_EXT_FLAGS_MASK 0xfffff7ff + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Extended enqueue properties +typedef struct ur_exp_enqueue_ext_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + ur_exp_enqueue_ext_flags_t flags; ///< [in] extended enqueue flags + +} ur_exp_enqueue_ext_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a barrier command which waits a list of events to complete +/// before it completes, with optional extended properties +/// +/// @details +/// - If the event list is empty, it waits for all previously enqueued +/// commands to complete. +/// - It blocks command execution - any following commands enqueued after it +/// do not execute until it completes. +/// - It returns an event which can be waited on. +/// +/// @remarks +/// _Analogues_ +/// - **clEnqueueBarrierWithWaitList** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t *pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -11450,6 +11540,18 @@ typedef struct ur_enqueue_kernel_launch_custom_exp_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_kernel_launch_custom_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueEventsWaitWithBarrierExt +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_events_wait_with_barrier_ext_params_t { + ur_queue_handle_t *phQueue; + const ur_exp_enqueue_ext_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + ur_event_handle_t **pphEvent; +} ur_enqueue_events_wait_with_barrier_ext_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index a7ca4d88a0..4920245369 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -126,6 +126,7 @@ _UR_API(urEnqueueDeviceGlobalVariableWrite) _UR_API(urEnqueueDeviceGlobalVariableRead) _UR_API(urEnqueueReadHostPipe) _UR_API(urEnqueueWriteHostPipe) +_UR_API(urEnqueueEventsWaitWithBarrierExt) _UR_API(urEnqueueKernelLaunchCustomExp) _UR_API(urEnqueueCooperativeKernelLaunchExp) _UR_API(urEnqueueTimestampRecordingExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 80a0003fca..40a6c5c269 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1398,6 +1398,15 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueWriteHostPipe_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueEventsWaitWithBarrierExt +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueEventsWaitWithBarrierExt_t)( + ur_queue_handle_t, + const ur_exp_enqueue_ext_properties_t *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of Enqueue functions pointers typedef struct ur_enqueue_dditable_t { @@ -1426,6 +1435,7 @@ typedef struct ur_enqueue_dditable_t { ur_pfnEnqueueDeviceGlobalVariableRead_t pfnDeviceGlobalVariableRead; ur_pfnEnqueueReadHostPipe_t pfnReadHostPipe; ur_pfnEnqueueWriteHostPipe_t pfnWriteHostPipe; + ur_pfnEnqueueEventsWaitWithBarrierExt_t pfnEventsWaitWithBarrierExt; } ur_enqueue_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.h b/include/ur_print.h index 1dd874e5a5..93597d232f 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1058,6 +1058,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpLaunchProperty(const struct ur_exp /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_ext_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueExtFlags(enum ur_exp_enqueue_ext_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_ext_properties_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueExtProperties(const struct ur_exp_enqueue_ext_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_enqueue_native_command_flag_t enum /// @returns @@ -2034,6 +2050,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueKernelLaunchCustomExpParams(const struct ur_enqueue_kernel_launch_custom_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_events_wait_with_barrier_ext_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueEventsWaitWithBarrierExtParams(const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_enqueue_cooperative_kernel_launch_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 09431d4352..58964cc7ab 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -213,6 +213,9 @@ inline ur_result_t printUnion( template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_info_t value, size_t size); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -348,6 +351,8 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, enum ur_exp_launch_property_id_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_launch_property_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_peer_info_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_ext_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_ext_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_native_command_flag_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_native_command_properties_t params); @@ -954,6 +959,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP"; break; + case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: + os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; + break; default: os << "unknown enumerator"; break; @@ -1113,6 +1121,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value case UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES"; + break; default: os << "unknown enumerator"; break; @@ -1374,6 +1385,11 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { const ur_exp_enqueue_native_command_properties_t *pstruct = (const ur_exp_enqueue_native_command_properties_t *)ptr; printPtr(os, pstruct); } break; + + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES: { + const ur_exp_enqueue_ext_properties_t *pstruct = (const ur_exp_enqueue_ext_properties_t *)ptr; + printPtr(os, pstruct); + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -2646,6 +2662,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP: os << "UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + os << "UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP"; + break; default: os << "unknown enumerator"; break; @@ -4440,6 +4459,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -8661,6 +8692,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_queue_flag_t value) { case UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM: os << "UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM"; break; + case UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP: + os << "UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP"; + break; default: os << "unknown enumerator"; break; @@ -8785,6 +8819,16 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { } os << UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM; } + + if ((val & UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP) == (uint32_t)UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP) { + val ^= (uint32_t)UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP; + } if (val != 0) { std::bitset<32> bits(val); if (!first) { @@ -10369,6 +10413,77 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_peer_in } } // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_ext_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_ext_flag_t value) { + switch (value) { + case UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS: + os << "UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_ext_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS) == (uint32_t)UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS) { + val ^= (uint32_t)UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_ext_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_enqueue_ext_properties_t params) { + os << "(struct ur_exp_enqueue_ext_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, + (params.flags)); + + os << "}"; + return os; +} /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_enqueue_native_command_flag_t type /// @returns @@ -14776,6 +14891,49 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_events_wait_with_barrier_ext_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_enqueue_cooperative_kernel_launch_exp_params_t type /// @returns @@ -18463,6 +18621,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP: { os << (const struct ur_enqueue_kernel_launch_custom_exp_params_t *)params; } break; + case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: { + os << (const struct ur_enqueue_events_wait_with_barrier_ext_params_t *)params; + } break; case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: { os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-LOW-POWER-EVENTS.rst b/scripts/core/EXP-LOW-POWER-EVENTS.rst new file mode 100644 index 0000000000..43f2032527 --- /dev/null +++ b/scripts/core/EXP-LOW-POWER-EVENTS.rst @@ -0,0 +1,87 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-low-power-events: + +================================================================================ +Low Power Events +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- + +By default, level-zero uses busy polling for waiting on event completion when +performing host-based synchronization through APIs such as `${x}QueueFinish`. +This provides the lowest possible latency for the calling thread, but +it may lead to increased CPU utilization. + +This extension introduces a new hint flag for `${x}QueueCreate`, allowing users to +indicate to the runtime that they are willing to sacrifice event completion +latency in order to reduce CPU utilization. This may be implemented using +interrupt-driven event completion, where the calling thread yields until +woken up by the driver. + +For applications that want to selectively choose which events should utilize +the low-power mode, this extension also adds a new `${x}EnqueueEventsWaitWithBarrierExt` function. +This enqueue method can be used with an analogous property flag that may cause +its output event to be low-power. This barrier is meant to be used on a regular event +just before calling synchronization APIs (such as `${x}QueueFinish`) to introduce a low-power event. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}_device_info_t + * ${X}_DEVICE_INFO_LOW_POWER_EVENTS_EXP +* ${x}_queue_flags_t + * ${X}_QUEUE_FLAG_LOW_POWER_EVENTS_EXP +* ${x}_exp_enqueue_ext_flags_t + * ${X}_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS +* ${x}_structure_type_t + * {X}_STRUCTURE_TYPE_EXP_ENQUEUE_EXT_PROPERTIES + +Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_enqueue_ext_properties_t + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}EnqueueEventsWaitWithBarrierExt + +Changelog +-------------------------------------------------------------------------------- + ++-----------+---------------------------+ +| Revision | Changes | ++===========+===========================+ +| 1.0 | Initial Draft | ++-----------+---------------------------+ + + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return true for the new +``${X}_DEVICE_INFO_LOW_POWER_EVENTS_EXP`` device info query. + + +Contributors +-------------------------------------------------------------------------------- + +* Piotr Balcer `piotr.balcer@intel.com `_ diff --git a/scripts/core/exp-low-power-events.yml b/scripts/core/exp-low-power-events.yml new file mode 100644 index 0000000000..f116eaf73a --- /dev/null +++ b/scripts/core/exp-low-power-events.yml @@ -0,0 +1,114 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental API for low-power events API" +ordinal: "100" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support low-power events." +name: $x_device_info_t +etors: + - name: LOW_POWER_EVENTS_EXP + value: "0x2021" + desc: "[$x_bool_t] returns true if the device supports low-power events." + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Extension enums to $x_queue_flags_t to support low power events." +name: $x_queue_flags_t +etors: + - name: LOW_POWER_EVENTS_EXP + desc: > + Hint: use low-power events. Only meaningful for Level Zero, where the implementation may use interrupt-driven events. + May reduce CPU utilization at the cost of increased event completion latency. + Other platforms may ignore this flag. + value: "$X_BIT(11)" + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Extended enqueue properties" +name: $x_exp_enqueue_ext_flags_t +etors: + - name: LOW_POWER_EVENTS + desc: > + Hint: use low-power events. Only meaningful for Level Zero, where the implementation may use interrupt-driven events. + May reduce CPU utilization at the cost of increased event completion latency. + Other platforms may ignore this flag. + value: "$X_BIT(11)" + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Extended enqueue properties" +name: $x_exp_enqueue_ext_properties_t +base: $x_base_properties_t +members: + - type: $x_exp_enqueue_ext_flags_t + name: flags + desc: "[in] extended enqueue flags" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Structure type experimental enumerations" +name: $x_structure_type_t +etors: + - name: EXP_ENQUEUE_EXT_PROPERTIES + desc: $x_exp_enqueue_ext_properties_t + value: "0x4000" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue a barrier command which waits a list of events to complete before it completes, with optional extended properties" +class: $xEnqueue +name: EventsWaitWithBarrierExt +ordinal: "0" +details: + - "If the event list is empty, it waits for all previously enqueued commands to complete." + - "It blocks command execution - any following commands enqueued after it do not execute until it completes." + - "It returns an event which can be waited on." +analogue: + - "**clEnqueueBarrierWithWaitList**" +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: const $x_exp_enqueue_ext_properties_t* + name: pProperties + desc: "[in][optional] pointer to the extended enqueue properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: "const $x_event_handle_t*" + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before this command can be executed. + If nullptr, the numEventsInWaitList must be 0, indicating that all previously enqueued commands + must be complete. + - type: $x_event_handle_t* + name: phEvent + desc: | + [out][optional] return an event object that identifies this particular command instance. If phEventWaitList and phEvent are not NULL, phEvent must not refer to an element of the phEventWaitList array. +returns: + - $X_RESULT_ERROR_INVALID_QUEUE + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: + - "`phEventWaitList == NULL && numEventsInWaitList > 0`" + - "`phEventWaitList != NULL && numEventsInWaitList == 0`" + - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index f4ba983bfc..2133e1c889 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -604,6 +604,9 @@ etors: - name: BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP desc: Enumerator for $xBindlessImagesMapExternalLinearMemoryExp value: '245' +- name: ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT + desc: Enumerator for $xEnqueueEventsWaitWithBarrierExt + value: '246' --- type: enum desc: Defines structure types diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index be5867628d..cb6b757dd3 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1107,7 +1107,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >= 9; return ReturnValue(static_cast(Value)); } - + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: break; } diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index 0e00f680f6..f5ae19b965 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -402,6 +402,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( } } +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); +} + /// Enqueues a wait on the given CUstream for all events. /// See \ref enqueueEventWait /// TODO: Add support for multiple streams once the Event class is properly diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index dbac5d37f1..5271f73709 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -935,6 +935,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: { + return ReturnValue(false); + } default: break; } diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 77984ac571..ae362372d3 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -436,6 +436,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( } } +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); +} + /// General 3D memory copy operation. /// This function requires the corresponding HIP context to be at the top of /// the context stack diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 94dad86070..865edebc08 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -1151,6 +1151,8 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_USM_POOL_SUPPORT: return ReturnValue(true); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: logger::error("Unsupported ParamName in urGetDeviceInfo"); logger::error("ParamNameParamName={}(0x{})", ParamName, diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 408580dd80..5f7d444cda 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -419,6 +419,25 @@ ur_result_t urEnqueueEventsWaitWithBarrier( return UR_RESULT_SUCCESS; } +ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t + *, ///< [in][optional] pointer to the extended enqueue properties + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating that + ///< all previously enqueued commands must be complete. + ur_event_handle_t + *OutEvent ///< [in,out][optional] return an event object that identifies + ///< this particular command instance. +) { + return ur::level_zero::urEnqueueEventsWaitWithBarrier( + Queue, NumEventsInWaitList, EventWaitList, OutEvent); +} + ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_event_info_t PropName, ///< [in] the name of the event property to query diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 1c2f68c07c..0a36b3ecad 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -200,6 +200,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( ur::level_zero::urEnqueueDeviceGlobalVariableRead; pDdiTable->pfnReadHostPipe = ur::level_zero::urEnqueueReadHostPipe; pDdiTable->pfnWriteHostPipe = ur::level_zero::urEnqueueWriteHostPipe; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur::level_zero::urEnqueueEventsWaitWithBarrierExt; return result; } diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 1207f7776b..1215d6449e 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -723,6 +723,11 @@ ur_result_t urUsmP2PPeerAccessGetInfoExp(ur_device_handle_t commandDevice, ur_exp_peer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet); +ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, + const ur_exp_enqueue_ext_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index ea2e931bfe..e919af685e 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -327,6 +327,14 @@ ur_result_t urEnqueueKernelLaunchCustomExp( numPropsInLaunchPropList, launchPropList, numEventsInWaitList, phEventWaitList, phEvent); } +ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, + const ur_exp_enqueue_ext_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueEventsWaitWithBarrierExt( + pProperties, numEventsInWaitList, phEventWaitList, phEvent); +} ur_result_t urEnqueueNativeCommandExp( ur_queue_handle_t hQueue, ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data, diff --git a/source/adapters/level_zero/v2/queue_api.hpp b/source/adapters/level_zero/v2/queue_api.hpp index 577f6c5aba..7cb039ccdd 100644 --- a/source/adapters/level_zero/v2/queue_api.hpp +++ b/source/adapters/level_zero/v2/queue_api.hpp @@ -148,6 +148,10 @@ struct ur_queue_handle_t_ { const ur_exp_launch_property_t *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; virtual ur_result_t + enqueueEventsWaitWithBarrierExt(const ur_exp_enqueue_ext_properties_t *, + uint32_t, const ur_event_handle_t *, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, uint32_t, const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 08fae0719f..5ec5229f21 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -290,6 +290,13 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier( return enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent); } +ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierExt( + const ur_exp_enqueue_ext_properties_t *, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + return enqueueEventsWaitWithBarrier(numEventsInWaitList, phEventWaitList, + phEvent); +} + ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( ur_mem_handle_t src, ur_mem_handle_t dst, bool blocking, size_t srcOffset, size_t dstOffset, size_t size, uint32_t numEventsInWaitList, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 3fac90110a..33e060ded3 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -108,6 +108,10 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { enqueueEventsWaitWithBarrier(uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override; + ur_result_t enqueueEventsWaitWithBarrierExt( + const ur_exp_enqueue_ext_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; ur_result_t enqueueMemBufferRead(ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index dea28a4658..42c342444d 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -10592,6 +10592,70 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_events_wait_with_barrier_ext_params_t params = { + &hQueue, &pProperties, &numEventsInWaitList, &phEventWaitList, + &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback( + "urEnqueueEventsWaitWithBarrierExt")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urEnqueueEventsWaitWithBarrierExt")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback( + "urEnqueueEventsWaitWithBarrierExt")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -10996,6 +11060,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( pDdiTable->pfnWriteHostPipe = driver::urEnqueueWriteHostPipe; + pDdiTable->pfnEventsWaitWithBarrierExt = + driver::urEnqueueEventsWaitWithBarrierExt; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index d744d6290b..df5647b525 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -417,6 +417,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_USM_POOL_SUPPORT: return ReturnValue(false); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: DIE_NO_IMPLEMENTATION; } diff --git a/source/adapters/native_cpu/enqueue.cpp b/source/adapters/native_cpu/enqueue.cpp index 33d8c35c36..49a1ca7c90 100644 --- a/source/adapters/native_cpu/enqueue.cpp +++ b/source/adapters/native_cpu/enqueue.cpp @@ -272,6 +272,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( DIE_NO_IMPLEMENTATION; } +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); +} + template static inline ur_result_t enqueueMemBufferReadWriteRect_impl( ur_queue_handle_t, ur_mem_handle_t Buff, bool, diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index e17211826f..863a07d72a 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1119,6 +1119,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(false); default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 6596a01317..45e5fbb5c4 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -96,6 +96,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( return UR_RESULT_SUCCESS; } +UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, const ur_exp_enqueue_ext_properties_t *, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return urEnqueueEventsWaitWithBarrier(hQueue, numEventsInWaitList, + phEventWaitList, phEvent); +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, diff --git a/source/common/stype_map_helpers.def b/source/common/stype_map_helpers.def index c938ca6b95..ec2856e60d 100644 --- a/source/common/stype_map_helpers.def +++ b/source/common/stype_map_helpers.def @@ -99,4 +99,6 @@ template <> struct stype_map : stype_map_impl {}; template <> struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 9cc18c66c4..64489c39ac 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9099,6 +9099,60 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + auto pfnEventsWaitWithBarrierExt = + getContext()->urDdiTable.Enqueue.pfnEventsWaitWithBarrierExt; + + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_events_wait_with_barrier_ext_params_t params = { + &hQueue, &pProperties, &numEventsInWaitList, &phEventWaitList, + &phEvent}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT, + "urEnqueueEventsWaitWithBarrierExt", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueEventsWaitWithBarrierExt\n"); + + ur_result_t result = pfnEventsWaitWithBarrierExt( + hQueue, pProperties, numEventsInWaitList, phEventWaitList, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT, + "urEnqueueEventsWaitWithBarrierExt", ¶ms, + &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT, + ¶ms); + logger.info(" <--- urEnqueueEventsWaitWithBarrierExt({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -9609,6 +9663,11 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueProcAddrTable( dditable.pfnWriteHostPipe = pDdiTable->pfnWriteHostPipe; pDdiTable->pfnWriteHostPipe = ur_tracing_layer::urEnqueueWriteHostPipe; + dditable.pfnEventsWaitWithBarrierExt = + pDdiTable->pfnEventsWaitWithBarrierExt; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur_tracing_layer::urEnqueueEventsWaitWithBarrierExt; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index fdfce7951b..195c1d3c69 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -518,7 +518,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName) { + if (UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -10143,6 +10143,69 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + auto pfnEventsWaitWithBarrierExt = + getContext()->urDdiTable.Enqueue.pfnEventsWaitWithBarrierExt; + + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL != pProperties && + UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList == NULL && numEventsInWaitList > 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList == 0) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + ur_result_t result = pfnEventsWaitWithBarrierExt( + hQueue, pProperties, numEventsInWaitList, phEventWaitList, phEvent); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -10661,6 +10724,11 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( dditable.pfnWriteHostPipe = pDdiTable->pfnWriteHostPipe; pDdiTable->pfnWriteHostPipe = ur_validation_layer::urEnqueueWriteHostPipe; + dditable.pfnEventsWaitWithBarrierExt = + pDdiTable->pfnEventsWaitWithBarrierExt; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur_validation_layer::urEnqueueEventsWaitWithBarrierExt; + return result; } diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index c34bde6fd2..b5c3bde6ea 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -69,6 +69,7 @@ EXPORTS urEnqueueDeviceGlobalVariableWrite urEnqueueEventsWait urEnqueueEventsWaitWithBarrier + urEnqueueEventsWaitWithBarrierExt urEnqueueKernelLaunch urEnqueueKernelLaunchCustomExp urEnqueueMemBufferCopy @@ -266,6 +267,7 @@ EXPORTS urPrintEnqueueDeviceGlobalVariableReadParams urPrintEnqueueDeviceGlobalVariableWriteParams urPrintEnqueueEventsWaitParams + urPrintEnqueueEventsWaitWithBarrierExtParams urPrintEnqueueEventsWaitWithBarrierParams urPrintEnqueueKernelLaunchCustomExpParams urPrintEnqueueKernelLaunchParams @@ -310,6 +312,8 @@ EXPORTS urPrintExpCommandBufferUpdateMemobjArgDesc urPrintExpCommandBufferUpdatePointerArgDesc urPrintExpCommandBufferUpdateValueArgDesc + urPrintExpEnqueueExtFlags + urPrintExpEnqueueExtProperties urPrintExpEnqueueNativeCommandFlags urPrintExpEnqueueNativeCommandProperties urPrintExpExternalMemDesc diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 8333ee2fa4..778a5da065 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -69,6 +69,7 @@ urEnqueueDeviceGlobalVariableWrite; urEnqueueEventsWait; urEnqueueEventsWaitWithBarrier; + urEnqueueEventsWaitWithBarrierExt; urEnqueueKernelLaunch; urEnqueueKernelLaunchCustomExp; urEnqueueMemBufferCopy; @@ -266,6 +267,7 @@ urPrintEnqueueDeviceGlobalVariableReadParams; urPrintEnqueueDeviceGlobalVariableWriteParams; urPrintEnqueueEventsWaitParams; + urPrintEnqueueEventsWaitWithBarrierExtParams; urPrintEnqueueEventsWaitWithBarrierParams; urPrintEnqueueKernelLaunchCustomExpParams; urPrintEnqueueKernelLaunchParams; @@ -310,6 +312,8 @@ urPrintExpCommandBufferUpdateMemobjArgDesc; urPrintExpCommandBufferUpdatePointerArgDesc; urPrintExpCommandBufferUpdateValueArgDesc; + urPrintExpEnqueueExtFlags; + urPrintExpEnqueueExtProperties; urPrintExpEnqueueNativeCommandFlags; urPrintExpEnqueueNativeCommandProperties; urPrintExpExternalMemDesc; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index a67879a9eb..86a6ad95a0 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -9220,6 +9220,71 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueEventsWaitWithBarrierExt +__urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnEventsWaitWithBarrierExt = + dditable->ur.Enqueue.pfnEventsWaitWithBarrierExt; + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = + pfnEventsWaitWithBarrierExt(hQueue, pProperties, numEventsInWaitList, + phEventWaitListLocal.data(), phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueNativeCommandExp __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( @@ -9712,6 +9777,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( ur_loader::urEnqueueDeviceGlobalVariableRead; pDdiTable->pfnReadHostPipe = ur_loader::urEnqueueReadHostPipe; pDdiTable->pfnWriteHostPipe = ur_loader::urEnqueueWriteHostPipe; + pDdiTable->pfnEventsWaitWithBarrierExt = + ur_loader::urEnqueueEventsWaitWithBarrierExt; } else { // return pointers directly to platform's DDIs *pDdiTable = diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 9a8e4c2e12..8dca26d4ba 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -870,7 +870,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -9436,6 +9436,69 @@ ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a barrier command which waits a list of events to complete +/// before it completes, with optional extended properties +/// +/// @details +/// - If the event list is empty, it waits for all previously enqueued +/// commands to complete. +/// - It blocks command execution - any following commands enqueued after it +/// do not execute until it completes. +/// - It returns an event which can be waited on. +/// +/// @remarks +/// _Analogues_ +/// - **clEnqueueBarrierWithWaitList** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. + ) try { + auto pfnEventsWaitWithBarrierExt = + ur_lib::getContext()->urDdiTable.Enqueue.pfnEventsWaitWithBarrierExt; + if (nullptr == pfnEventsWaitWithBarrierExt) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnEventsWaitWithBarrierExt(hQueue, pProperties, numEventsInWaitList, + phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Immediately enqueue work through a native backend API /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 3a14d9a9de..d8206edb3f 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1069,6 +1069,22 @@ ur_result_t urPrintExpPeerInfo(enum ur_exp_peer_info_t value, char *buffer, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpEnqueueExtFlags(enum ur_exp_enqueue_ext_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpEnqueueExtProperties( + const struct ur_exp_enqueue_ext_properties_t params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpEnqueueNativeCommandFlags( enum ur_exp_enqueue_native_command_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { @@ -1746,6 +1762,14 @@ ur_result_t urPrintEnqueueKernelLaunchCustomExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintEnqueueEventsWaitWithBarrierExtParams( + const struct ur_enqueue_events_wait_with_barrier_ext_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintEnqueueCooperativeKernelLaunchExpParams( const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 92b02b7176..22c76f122e 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -771,7 +771,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -8002,6 +8002,61 @@ ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue a barrier command which waits a list of events to complete +/// before it completes, with optional extended properties +/// +/// @details +/// - If the event list is empty, it waits for all previously enqueued +/// commands to complete. +/// - It blocks command execution - any following commands enqueued after it +/// do not execute until it completes. +/// - It returns an event which can be waited on. +/// +/// @remarks +/// _Analogues_ +/// - **clEnqueueBarrierWithWaitList** +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_EXT_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_QUEUE +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +/// + `phEventWaitList == NULL && numEventsInWaitList > 0` +/// + `phEventWaitList != NULL && numEventsInWaitList == 0` +/// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t * + pProperties, ///< [in][optional] pointer to the extended enqueue properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that all + ///< previously enqueued commands + ///< must be complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. If phEventWaitList and phEvent are not NULL, phEvent + ///< must not refer to an element of the phEventWaitList array. +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Immediately enqueue work through a native backend API /// diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d949b1f5df..ee7fe52834 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -418,5 +418,7 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo( hDevice, UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP); } } // namespace urinfo From 7251d1a8d8cf790ff891e926dcfe8deb1a5fee27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 7 Nov 2024 15:20:02 +0100 Subject: [PATCH 31/37] [CI] Move coverity job to public GHA runner There's no need for self-hosted. For this change to happen it was required to re-write the workflow: - install dependencies for adapters and UMF, - download coverity tool from the project's page, - push tarball to Coverity's scan webpage via curl. --- ...-fix-travisci_build_coverity_scan.sh.patch | 27 ------ .github/workflows/coverity.yml | 82 +++++++++++-------- README.md | 3 +- 3 files changed, 52 insertions(+), 60 deletions(-) delete mode 100644 .github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch diff --git a/.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch b/.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch deleted file mode 100644 index 9738942aa4..0000000000 --- a/.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch +++ /dev/null @@ -1,27 +0,0 @@ -From b5179dc4822eaab192361da05aa95d98f523960f Mon Sep 17 00:00:00 2001 -From: Lukasz Dorau -Date: Mon, 7 May 2018 12:05:40 +0200 -Subject: [PATCH] travis: fix travisci_build_coverity_scan.sh - ---- - travisci_build_coverity_scan.sh | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/travisci_build_coverity_scan.sh b/travisci_build_coverity_scan.sh -index ad9d4afcf..562b08bcc 100644 ---- a/travisci_build_coverity_scan.sh -+++ b/travisci_build_coverity_scan.sh -@@ -92,8 +92,8 @@ response=$(curl \ - --form description="Travis CI build" \ - $UPLOAD_URL) - status_code=$(echo "$response" | sed -n '$p') --if [ "$status_code" != "201" ]; then -+if [ "$status_code" != "200" ]; then - TEXT=$(echo "$response" | sed '$d') -- echo -e "\033[33;1mCoverity Scan upload failed: $TEXT.\033[0m" -+ echo -e "\033[33;1mCoverity Scan upload failed: $response.\033[0m" - exit 1 - fi --- -2.13.6 - diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index ba0230d600..43f8d1c62d 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -1,12 +1,5 @@ -# -# Copyright (C) 2023-2024 Intel Corporation -# -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# +# Coverity - static analysis build. It requires Coverity's token (set in CI's secret). name: coverity-unified-runtime -# It runs static analysis build - Coverity. It requires special token (set in CI's secret). on: workflow_dispatch: @@ -14,50 +7,75 @@ on: # Run every day at 22:00 UTC - cron: '0 22 * * *' -env: - WORKDIR: ${{ github.workspace }} - COVERITY_SCAN_NOTIFICATION_EMAIL: ${{ secrets.COVERITY_SCAN_NOTIFICATION_EMAIL }} - COVERITY_SCAN_TOKEN: ${{ secrets.COVERITY_SCAN_TOKEN }} - COVERITY_SCAN_PROJECT_NAME: ${{ github.repository }} - COVERITY_SCAN_BUILD_COMMAND: "cmake --build ${{github.workspace}}/build" - COVERITY_SCAN_BRANCH_PATTERN: "main" - TRAVIS_BRANCH: ${{ github.ref_name }} - permissions: contents: read jobs: - linux: + coverity: name: Coverity - runs-on: coverity + # run only on upstream; forks don't have token for upstream's cov project + if: github.repository == 'oneapi-src/unified-memory-framework' + runs-on: ubuntu-latest steps: - - name: Clone the git repo + - name: Checkout repository uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y libhwloc-dev libtbb-dev cuda-toolkit-12-6 - name: Install pip packages run: pip install -r third_party/requirements.txt + - name: Download Coverity + run: | + wget -O coverity_tool.tgz -nv https://scan.coverity.com/download/linux64 \ + --post-data "token=${{ secrets.COVERITY_SCAN_TOKEN }}&project=oneapi-src%2Funified-runtime" + + - name: Extract Coverity + run: tar xzf coverity_tool.tgz + + # TODO: enable HIP adapter as well (requires proper package(s) installation) - name: Configure CMake run: > cmake - -B $WORKDIR/build + -B ${{github.workspace}}/build + -DCMAKE_BUILD_TYPE=Release + -DUR_DEVELOPER_MODE=OFF + -DUR_FORMAT_CPP_STYLE=ON -DUR_ENABLE_TRACING=ON - -DUR_DEVELOPER_MODE=ON -DUR_BUILD_TESTS=ON - -DUMF_ENABLE_POOL_TRACKING=ON - -DUR_FORMAT_CPP_STYLE=ON - -DCMAKE_BUILD_TYPE=Debug -DUR_BUILD_ADAPTER_L0=ON -DUR_BUILD_ADAPTER_CUDA=ON - -DCUDA_CUDA_LIBRARY=/usr/local/cuda/lib64/stubs/libcuda.so + -DCUDA_CUDA_LIBRARY=/usr/local/cuda-12.6/targets/x86_64-linux/lib/stubs/libcuda.so -DUR_BUILD_ADAPTER_NATIVE_CPU=ON - -DUR_BUILD_ADAPTER_HIP=ON + -DUR_BUILD_ADAPTER_HIP=OFF -DUR_BUILD_ADAPTER_OPENCL=ON - - name: Run Coverity + - name: Build + run: | + export COVERITY_DIR=$(find . -maxdepth 1 -type d -name "cov-analysis-linux64-*" | head -n 1) + if [ -n "$COVERITY_DIR" ]; then + export PATH="$PATH:$COVERITY_DIR/bin" + fi + cov-build --dir ${{github.workspace}}/coverity-files cmake --build ${{github.workspace}}/build --config Release -j$(nproc) + + - name: Create tarball to analyze + run: tar czvf ur-coverity-files.tgz coverity-files + + - name: Push tarball to scan run: | - cd $WORKDIR/build - wget https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh - patch < "../.github/scripts/0001-travis-fix-travisci_build_coverity_scan.sh.patch" - bash ./travisci_build_coverity_scan.sh + BRANCH_NAME=$(echo ${GITHUB_REF_NAME}) + COMMIT_ID=$(echo $GITHUB_SHA) + curl --form token=${{ secrets.COVERITY_SCAN_TOKEN }} \ + --form email=bb-ur@intel.com \ + --form file=@ur-coverity-files.tgz \ + --form version="$COMMIT_ID" \ + --form description="$BRANCH_NAME:$COMMIT_ID" \ + https://scan.coverity.com/builds\?project\=oneapi-src%2Funified-runtime diff --git a/README.md b/README.md index dc70f43876..5f1e7df3ff 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,8 @@ [![Build and test](https://github.com/oneapi-src/unified-runtime/actions/workflows/cmake.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/cmake.yml) [![Bandit](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/bandit.yml) [![CodeQL](https://github.com/oneapi-src/unified-runtime/actions/workflows/codeql.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/codeql.yml) -[![Coverity](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) +[![Coverity build](https://github.com/oneapi-src/unified-runtime/actions/workflows/coverity.yml/badge.svg?branch=main)](https://github.com/oneapi-src/unified-runtime/actions/workflows/coverity.yml) +[![Coverity report](https://scan.coverity.com/projects/28213/badge.svg)](https://scan.coverity.com/projects/oneapi-src-unified-runtime) [![Nightly](https://github.com/oneapi-src/unified-runtime/actions/workflows/nightly.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/nightly.yml) [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/unified-runtime/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/unified-runtime) [![Trivy](https://github.com/oneapi-src/unified-runtime/actions/workflows/trivy.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/trivy.yml) From 3e1a0ea44842b10cfdc0e1e98af2eee8fcd8e937 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Wed, 6 Nov 2024 16:05:50 -0800 Subject: [PATCH 32/37] [L0] Fix binary sizes and binaries returned by urProgramGetInfo Currently urProgramGetInfo will return UR_RESULT_ERROR_INVALID_PROGRAM if program is built only for a subset of associated devices, i.e. not all devices have level zero module and binaries. This PR fixes this behaviour. Such urProgramGetInfo will return UR_RESULT_SUCCESS and 0 will be returned as binary size for devices which don't have a binary and binary data will not be copied for them. --- source/adapters/level_zero/program.cpp | 39 +++++------- test/conformance/program/CMakeLists.txt | 1 + .../urMultiDeviceProgramCreateWithBinary.cpp | 7 +-- .../urMultiDeviceProgramCreateWithIL.cpp | 63 +++++++++++++++++++ .../testing/include/uur/fixtures.h | 5 ++ 5 files changed, 84 insertions(+), 31 deletions(-) create mode 100644 test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp diff --git a/source/adapters/level_zero/program.cpp b/source/adapters/level_zero/program.cpp index d7adc5eb37..6ca6e94b25 100644 --- a/source/adapters/level_zero/program.cpp +++ b/source/adapters/level_zero/program.cpp @@ -668,17 +668,16 @@ ur_result_t urProgramGetInfo( binarySizes.push_back(Program->getCodeSize(Device->ZeDevice)); continue; } - auto ZeModule = Program->getZeModuleHandle(Device->ZeDevice); - if (!ZeModule) - return UR_RESULT_ERROR_INVALID_PROGRAM; - if (State == ur_program_handle_t_::IL || State == ur_program_handle_t_::Object) { - // We don't have a binary for this device, so return size of the spirv - // code. This is an array of 1 element, initialized as if it were - // scalar. - return ReturnValue(size_t{Program->getCodeSize()}); + // We don't have a binary for this device, so return 0. + binarySizes.push_back(0); + continue; } else if (State == ur_program_handle_t_::Exe) { + auto ZeModule = Program->getZeModuleHandle(Device->ZeDevice); + if (!ZeModule) + return UR_RESULT_ERROR_INVALID_PROGRAM; + size_t binarySize = 0; ZE2UR_CALL(zeModuleGetNativeBinary, (ZeModule, &binarySize, nullptr)); binarySizes.push_back(binarySize); @@ -718,27 +717,17 @@ ur_result_t urProgramGetInfo( SzBinary += Program->getCodeSize(ZeDevice); continue; } - auto ZeModule = Program->getZeModuleHandle(ZeDevice); - if (!ZeModule) { - return UR_RESULT_ERROR_INVALID_PROGRAM; - } - // If the caller is using a Program which is IL or an object, then - // the program has not been built for multiple devices so a single IL is - // returned. - // TODO: currently if program is not compiled for any of the associated - // devices, we just return spirv code, assuming that we either have the - // program built for all associated devices or for none. It is possible - // that program is compiled for subset of associated devices, so that case - // probably should be explicitely specified and handled better. if (State == ur_program_handle_t_::IL || State == ur_program_handle_t_::Object) { + // We don't have a binary for this device, so don't update the output + // pointer to the binary, only set return size to 0. if (PropSizeRet) - *PropSizeRet = Program->getCodeSize(); - if (PBinary) { - std::memcpy(PBinary[0], Program->getCode(), Program->getCodeSize()); - } - break; + *PropSizeRet = 0; } else if (State == ur_program_handle_t_::Exe) { + auto ZeModule = Program->getZeModuleHandle(ZeDevice); + if (!ZeModule) { + return UR_RESULT_ERROR_INVALID_PROGRAM; + } size_t binarySize = 0; if (PBinary) { NativeBinaryPtr = PBinary[deviceIndex]; diff --git a/test/conformance/program/CMakeLists.txt b/test/conformance/program/CMakeLists.txt index 31235eaf71..4db93881f4 100644 --- a/test/conformance/program/CMakeLists.txt +++ b/test/conformance/program/CMakeLists.txt @@ -8,6 +8,7 @@ add_conformance_test_with_kernels_environment(program urProgramCompile.cpp urProgramCreateWithBinary.cpp urMultiDeviceProgramCreateWithBinary.cpp + urMultiDeviceProgramCreateWithIL.cpp urProgramCreateWithIL.cpp urProgramCreateWithNativeHandle.cpp urProgramGetBuildInfo.cpp diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp index 95a135af1c..9ff11d9016 100644 --- a/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp +++ b/test/conformance/program/urMultiDeviceProgramCreateWithBinary.cpp @@ -12,11 +12,7 @@ struct urMultiDeviceProgramCreateWithBinaryTest void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urMultiDeviceProgramTest::SetUp()); - // First obtain binaries for all devices from the compiler SPIRV program. - devices = uur::DevicesEnvironment::instance->devices; - if (devices.size() < 2) { - GTEST_SKIP(); - } + // First obtain binaries for all devices from the compiled SPIRV program. ASSERT_SUCCESS(urProgramBuild(context, program, nullptr)); size_t binary_sizes_len = 0; ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, @@ -51,7 +47,6 @@ struct urMultiDeviceProgramCreateWithBinaryTest } std::vector> binaries; - std::vector devices; std::vector pointers; std::vector binary_sizes; ur_program_handle_t binary_program = nullptr; diff --git a/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp b/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp new file mode 100644 index 0000000000..652de93540 --- /dev/null +++ b/test/conformance/program/urMultiDeviceProgramCreateWithIL.cpp @@ -0,0 +1,63 @@ + +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +using urMultiDeviceProgramTest = uur::urMultiDeviceProgramTest; + +// Test binary sizes and binaries obtained from urProgramGetInfo when program is built for a subset of devices in the context. +TEST_F(urMultiDeviceProgramTest, urMultiDeviceProgramGetInfo) { + // Run test only for level zero backend which supports urProgramBuildExp. + ur_platform_backend_t backend; + ASSERT_SUCCESS(urPlatformGetInfo(platform, UR_PLATFORM_INFO_BACKEND, + sizeof(backend), &backend, nullptr)); + if (backend != UR_PLATFORM_BACKEND_LEVEL_ZERO) { + GTEST_SKIP(); + } + + std::vector associated_devices(devices.size()); + ASSERT_SUCCESS( + urProgramGetInfo(program, UR_PROGRAM_INFO_DEVICES, + associated_devices.size() * sizeof(ur_device_handle_t), + associated_devices.data(), nullptr)); + + // Build program for the first half of devices. + auto subset = std::vector( + associated_devices.begin(), + associated_devices.begin() + associated_devices.size() / 2); + ASSERT_SUCCESS( + urProgramBuildExp(program, subset.size(), subset.data(), nullptr)); + + std::vector binary_sizes(associated_devices.size()); + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARY_SIZES, + binary_sizes.size() * sizeof(size_t), + binary_sizes.data(), nullptr)); + + std::vector> binaries(associated_devices.size()); + std::vector pointers(associated_devices.size()); + for (size_t i = 0; i < associated_devices.size() / 2; i++) { + ASSERT_NE(binary_sizes[i], 0); + binaries[i].resize(binary_sizes[i]); + pointers[i] = binaries[i].data(); + } + for (size_t i = associated_devices.size() / 2; + i < associated_devices.size(); i++) { + ASSERT_EQ(binary_sizes[i], 0); + pointers[i] = binaries[i].data(); + } + + ASSERT_SUCCESS(urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, + sizeof(uint8_t *) * pointers.size(), + pointers.data(), nullptr)); + for (size_t i = 0; i < associated_devices.size() / 2; i++) { + ASSERT_NE(binaries[i].size(), 0); + } + for (size_t i = associated_devices.size() / 2; + i < associated_devices.size(); i++) { + ASSERT_EQ(binaries[i].size(), 0); + } +} diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index d158105818..8ebc70cd54 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -1586,6 +1586,10 @@ struct urMultiDeviceProgramTest : urMultiDeviceQueueTest { backend == UR_PLATFORM_BACKEND_CUDA) { GTEST_SKIP(); } + devices = uur::DevicesEnvironment::instance->devices; + if (devices.size() < 2) { + GTEST_SKIP(); + } UUR_RETURN_ON_FATAL_FAILURE( uur::KernelsEnvironment::instance->LoadSource(program_name, il_binary)); @@ -1611,6 +1615,7 @@ struct urMultiDeviceProgramTest : urMultiDeviceQueueTest { std::string program_name = "foo"; ur_program_handle_t program = nullptr; std::vector metadatas{}; + std::vector devices; }; } // namespace uur From c622b7142a2a4a390b880b33eaea12e5a50df773 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 13 Nov 2024 11:08:41 +0100 Subject: [PATCH 33/37] [L0 v2] fix native kernel handle interop Fixes handling optional program argument and kernel to device mapping. --- source/adapters/level_zero/v2/kernel.cpp | 33 +++++++++---------- source/adapters/level_zero/v2/kernel.hpp | 2 +- .../urKernelCreateWithNativeHandle.cpp | 9 ++++- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/source/adapters/level_zero/v2/kernel.cpp b/source/adapters/level_zero/v2/kernel.cpp index de2e37e9bb..ae4b197cbd 100644 --- a/source/adapters/level_zero/v2/kernel.cpp +++ b/source/adapters/level_zero/v2/kernel.cpp @@ -70,8 +70,10 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(ur_program_handle_t hProgram, ur_kernel_handle_t_::ur_kernel_handle_t_( ur_native_handle_t hNativeKernel, ur_program_handle_t hProgram, + ur_context_handle_t context, const ur_kernel_native_properties_t *pProperties) - : hProgram(hProgram), deviceKernels(1) { + : hProgram(hProgram), + deviceKernels(context ? context->getPlatform()->getNumDevices() : 0) { ur::level_zero::urProgramRetain(hProgram); auto ownZeHandle = pProperties ? pProperties->isNativeHandleOwned : false; @@ -82,7 +84,12 @@ ur_kernel_handle_t_::ur_kernel_handle_t_( throw UR_RESULT_ERROR_INVALID_KERNEL; } - deviceKernels.back().emplace(nullptr, zeKernel, ownZeHandle); + for (auto &Dev : context->getDevices()) { + deviceKernels[*Dev->Id].emplace(Dev, zeKernel, ownZeHandle); + + // owned only by the first entry + ownZeHandle = false; + } completeInitialization(); } @@ -128,20 +135,6 @@ size_t ur_kernel_handle_t_::deviceIndex(ur_device_handle_t hDevice) const { hDevice = hDevice->RootDevice; } - // supports kernels created from native handle - if (deviceKernels.size() == 1) { - assert(deviceKernels[0].has_value()); - assert(deviceKernels[0].value().hKernel.get()); - - auto &kernel = deviceKernels[0].value(); - - if (kernel.hDevice != hDevice) { - throw UR_RESULT_ERROR_INVALID_DEVICE; - } - - return 0; - } - if (!deviceKernels[hDevice->Id.value()].has_value()) { throw UR_RESULT_ERROR_INVALID_DEVICE; } @@ -341,8 +334,12 @@ urKernelCreateWithNativeHandle(ur_native_handle_t hNativeKernel, ur_program_handle_t hProgram, const ur_kernel_native_properties_t *pProperties, ur_kernel_handle_t *phKernel) { - std::ignore = hContext; - *phKernel = new ur_kernel_handle_t_(hNativeKernel, hProgram, pProperties); + if (!hProgram) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + *phKernel = + new ur_kernel_handle_t_(hNativeKernel, hProgram, hContext, pProperties); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/v2/kernel.hpp b/source/adapters/level_zero/v2/kernel.hpp index 735150838b..798c9f18df 100644 --- a/source/adapters/level_zero/v2/kernel.hpp +++ b/source/adapters/level_zero/v2/kernel.hpp @@ -39,7 +39,7 @@ struct ur_kernel_handle_t_ : _ur_object { // From native handle ur_kernel_handle_t_(ur_native_handle_t hNativeKernel, - ur_program_handle_t hProgram, + ur_program_handle_t hProgram, ur_context_handle_t context, const ur_kernel_native_properties_t *pProperties); // Get L0 kernel handle for a given device diff --git a/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp b/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp index b3918c7818..065b3520b8 100644 --- a/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp +++ b/test/adapters/level_zero/urKernelCreateWithNativeHandle.cpp @@ -8,7 +8,7 @@ #include "ze_api.h" #include -using urLevelZeroKernelNativeHandleTest = uur::urContextTest; +using urLevelZeroKernelNativeHandleTest = uur::urQueueTest; UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urLevelZeroKernelNativeHandleTest); TEST_P(urLevelZeroKernelNativeHandleTest, OwnedHandleRelease) { @@ -60,6 +60,13 @@ TEST_P(urLevelZeroKernelNativeHandleTest, OwnedHandleRelease) { ASSERT_SUCCESS(urKernelCreateWithNativeHandle( (ur_native_handle_t)native_kernel, context, program, &kprops, &kernel)); + size_t global_offset = 0; + size_t local_size = 1; + size_t global_size = 1; + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, 1, &global_offset, + &local_size, &global_size, 0, nullptr, + nullptr)); + ASSERT_SUCCESS(urKernelRelease(kernel)); ASSERT_SUCCESS(urProgramRelease(program)); } From 2e819c314a5aa7d24be382c433619ffe26ec40d7 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Wed, 13 Nov 2024 11:41:09 +0000 Subject: [PATCH 34/37] [NFC] Improve documentation of cts_exe.py --- scripts/core/CONTRIB.rst | 7 +++++++ test/conformance/cts_exe.py | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index df46652617..e1d5e17103 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -401,6 +401,13 @@ failure) for the given adapter. However this can be disabled by prepending ``{{OPT}}`` to the match line. This can be used if the test is flaky or depends on a particular environment. +This matching is done via ``test/conformance/cts_exe.py``, which is designed to be +called from ctest. However, it can be run manually as follows: + +.. code-block:: console + + test/conformance/cts_exe.py --test_command build/bin/test-adapter --failslist test/conformance/adapter/adapter_adapter_mytarget.match -- --backend=BACKEND + Experimental Features ===================== diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py index 84a5d6e031..6862811f09 100755 --- a/test/conformance/cts_exe.py +++ b/test/conformance/cts_exe.py @@ -122,6 +122,10 @@ def _run_cmd(cmd, comment, filter): parser.add_argument("rest", nargs=argparse.REMAINDER) args = parser.parse_args() + if args.test_command is None or args.failslist is None: + print("Usage: cts_exe.py --test_command (test binary) --failslist (match file) -- (test arguments)") + sys.exit(1) + base_invocation = [args.test_command] + args.rest if os.environ.get("GTEST_OUTPUT") is not None: From abb5d122f333c74d7ad0beef55dc32bfa4de15f2 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Wed, 13 Nov 2024 12:00:13 +0000 Subject: [PATCH 35/37] [NFC] Make macro in conf. test no longer leak `UUR_ASSERT_SUCCESS_OR_UNSUPPORTED` previously leaked a `status` variable to the calling scope. This change makes it no longer do that. --- .../urContextCreateWithNativeHandle.cpp | 20 +++++++----------- .../device/urDeviceCreateWithNativeHandle.cpp | 21 ++++++++----------- .../event/urEventCreateWithNativeHandle.cpp | 7 +++---- .../kernel/urKernelCreateWithNativeHandle.cpp | 7 +++---- .../urPlatformCreateWithNativeHandle.cpp | 21 ++++++++----------- .../queue/urQueueCreateWithNativeHandle.cpp | 7 +++---- .../urSamplerCreateWithNativeHandle.cpp | 21 ++++++++----------- .../testing/include/uur/fixtures.h | 16 +++++++------- 8 files changed, 53 insertions(+), 67 deletions(-) diff --git a/test/conformance/context/urContextCreateWithNativeHandle.cpp b/test/conformance/context/urContextCreateWithNativeHandle.cpp index 9b1c61f14a..599f402f77 100644 --- a/test/conformance/context/urContextCreateWithNativeHandle.cpp +++ b/test/conformance/context/urContextCreateWithNativeHandle.cpp @@ -11,10 +11,8 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urContextCreateWithNativeHandleTest); TEST_P(urContextCreateWithNativeHandleTest, Success) { ur_native_handle_t native_context = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urContextGetNativeHandle(context, &native_context)); - } + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -35,10 +33,9 @@ TEST_P(urContextCreateWithNativeHandleTest, Success) { TEST_P(urContextCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { ur_native_handle_t native_context = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urContextGetNativeHandle(context, &native_context)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); ur_context_handle_t ctx = nullptr; ur_context_native_properties_t props{ @@ -50,10 +47,9 @@ TEST_P(urContextCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { TEST_P(urContextCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { ur_native_handle_t native_context = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urContextGetNativeHandle(context, &native_context)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urContextGetNativeHandle(context, &native_context)); ur_context_handle_t ctx = nullptr; ur_context_native_properties_t props{ diff --git a/test/conformance/device/urDeviceCreateWithNativeHandle.cpp b/test/conformance/device/urDeviceCreateWithNativeHandle.cpp index 8cffc72cf1..d72435c274 100644 --- a/test/conformance/device/urDeviceCreateWithNativeHandle.cpp +++ b/test/conformance/device/urDeviceCreateWithNativeHandle.cpp @@ -9,10 +9,9 @@ using urDeviceCreateWithNativeHandleTest = uur::urAllDevicesTest; TEST_F(urDeviceCreateWithNativeHandleTest, Success) { for (auto device : devices) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urDeviceGetNativeHandle(device, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetNativeHandle(device, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -32,10 +31,9 @@ TEST_F(urDeviceCreateWithNativeHandleTest, Success) { TEST_F(urDeviceCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { for (auto device : devices) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urDeviceGetNativeHandle(device, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetNativeHandle(device, &native_handle)); ur_device_handle_t dev = nullptr; ur_device_native_properties_t props{ @@ -49,10 +47,9 @@ TEST_F(urDeviceCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { TEST_F(urDeviceCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { for (auto device : devices) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urDeviceGetNativeHandle(device, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urDeviceGetNativeHandle(device, &native_handle)); ur_device_handle_t dev = nullptr; ur_device_native_properties_t props{ diff --git a/test/conformance/event/urEventCreateWithNativeHandle.cpp b/test/conformance/event/urEventCreateWithNativeHandle.cpp index 36ff0b44dc..bfb7113053 100644 --- a/test/conformance/event/urEventCreateWithNativeHandle.cpp +++ b/test/conformance/event/urEventCreateWithNativeHandle.cpp @@ -11,10 +11,9 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventCreateWithNativeHandleTest); TEST_P(urEventCreateWithNativeHandleTest, Success) { ur_native_handle_t native_event = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urEventGetNativeHandle(event, &native_event)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urEventGetNativeHandle(event, &native_event)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. diff --git a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp index 8640463334..1c7acf5fab 100644 --- a/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp +++ b/test/conformance/kernel/urKernelCreateWithNativeHandle.cpp @@ -8,10 +8,9 @@ struct urKernelCreateWithNativeHandleTest : uur::urKernelTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urKernelTest::SetUp()); - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urKernelGetNativeHandle(kernel, &native_kernel_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urKernelGetNativeHandle(kernel, &native_kernel_handle)); } void TearDown() override { diff --git a/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp b/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp index 41fe59442d..6b56f9b661 100644 --- a/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp +++ b/test/conformance/platform/urPlatformCreateWithNativeHandle.cpp @@ -10,10 +10,9 @@ using urPlatformCreateWithNativeHandleTest = uur::platform::urPlatformTest; TEST_F(urPlatformCreateWithNativeHandleTest, Success) { for (auto platform : platforms) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urPlatformGetNativeHandle(platform, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urPlatformGetNativeHandle(platform, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -33,10 +32,9 @@ TEST_F(urPlatformCreateWithNativeHandleTest, Success) { TEST_F(urPlatformCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { for (auto platform : platforms) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urPlatformGetNativeHandle(platform, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urPlatformGetNativeHandle(platform, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -58,10 +56,9 @@ TEST_F(urPlatformCreateWithNativeHandleTest, SuccessWithOwnedNativeHandle) { TEST_F(urPlatformCreateWithNativeHandleTest, SuccessWithUnOwnedNativeHandle) { for (auto platform : platforms) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urPlatformGetNativeHandle(platform, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urPlatformGetNativeHandle(platform, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. diff --git a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp index f2fa83ec8e..9f5e3f0f97 100644 --- a/test/conformance/queue/urQueueCreateWithNativeHandle.cpp +++ b/test/conformance/queue/urQueueCreateWithNativeHandle.cpp @@ -9,10 +9,9 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urQueueCreateWithNativeHandleTest); TEST_P(urQueueCreateWithNativeHandleTest, Success) { ur_native_handle_t native_handle = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urQueueGetNativeHandle(queue, nullptr, &native_handle)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urQueueGetNativeHandle(queue, nullptr, &native_handle)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. diff --git a/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp b/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp index 59638105c9..c6bd776435 100644 --- a/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp +++ b/test/conformance/sampler/urSamplerCreateWithNativeHandle.cpp @@ -12,10 +12,9 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urSamplerCreateWithNativeHandleTest); TEST_P(urSamplerCreateWithNativeHandleTest, Success) { ur_native_handle_t native_sampler = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urSamplerGetNativeHandle(sampler, &native_sampler)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urSamplerGetNativeHandle(sampler, &native_sampler)); // We cannot assume anything about a native_handle, not even if it's // `nullptr` since this could be a valid representation within a backend. @@ -36,10 +35,9 @@ TEST_P(urSamplerCreateWithNativeHandleTest, Success) { TEST_P(urSamplerCreateWithNativeHandleTest, InvalidNullHandle) { ur_native_handle_t native_sampler = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urSamplerGetNativeHandle(sampler, &native_sampler)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urSamplerGetNativeHandle(sampler, &native_sampler)); ur_sampler_handle_t hSampler = nullptr; ur_sampler_native_properties_t props{}; @@ -50,10 +48,9 @@ TEST_P(urSamplerCreateWithNativeHandleTest, InvalidNullHandle) { TEST_P(urSamplerCreateWithNativeHandleTest, InvalidNullPointer) { ur_native_handle_t native_sampler = 0; - { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urSamplerGetNativeHandle(sampler, &native_sampler)); - } + + UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( + urSamplerGetNativeHandle(sampler, &native_sampler)); ur_sampler_native_properties_t props{}; ASSERT_EQ(urSamplerCreateWithNativeHandle(native_sampler, context, &props, diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index d158105818..906783c9c5 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -23,13 +23,15 @@ (void)0 #define UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(ret) \ - auto status = ret; \ - if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE || \ - status == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { \ - GTEST_SKIP(); \ - } else { \ - ASSERT_EQ(status, UR_RESULT_SUCCESS); \ - } + do { \ + auto status = ret; \ + if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE || \ + status == UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { \ + GTEST_SKIP(); \ + } else { \ + ASSERT_EQ(status, UR_RESULT_SUCCESS); \ + } \ + } while (0) namespace uur { From a1c9555f0aa518d529e1d8865e2bf3fe747ea379 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Tue, 5 Nov 2024 15:51:51 +0000 Subject: [PATCH 36/37] [NFC] Clean up match files for conf. testing Clean up the match files to hopefully make them more easily readable. As we now use cts_exe.py, we don't need as many workarounds. This changes the following: * Removes the "{{NONDETERMINISTIC}}" tag, which isn't needed any more. * Removes the notice about only being supported in cts_exe.py - all suites now require cts_exe.py. * Device names have been simplified down to a single wildcard - should make it easier to read them and copy between files if needed. * If all tests for a specific enum fail, it has been folded into a single wildcard. --- scripts/core/CONTRIB.rst | 2 - .../adapter/adapter_adapter_native_cpu.match | 1 - .../context/context_adapter_level_zero.match | 3 +- .../context_adapter_level_zero_v2.match | 2 +- .../context/context_adapter_native_cpu.match | 2 +- test/conformance/cts_exe.py | 3 - .../device/device_adapter_cuda.match | 1 - .../device/device_adapter_hip.match | 1 - .../device/device_adapter_level_zero.match | 1 - .../device/device_adapter_level_zero_v2.match | 1 - .../device/device_adapter_native_cpu.match | 1 - .../enqueue/enqueue_adapter_cuda.match | 21 +- .../enqueue/enqueue_adapter_hip.match | 29 +- .../enqueue/enqueue_adapter_level_zero.match | 41 +- .../enqueue_adapter_level_zero_v2.match | 185 +++---- .../enqueue/enqueue_adapter_native_cpu.match | 520 +++++------------- .../event/event_adapter_cuda.match | 13 +- .../conformance/event/event_adapter_hip.match | 13 +- .../event/event_adapter_level_zero.match | 11 +- .../event/event_adapter_level_zero_v2.match | 14 +- .../event/event_adapter_native_cpu.match | 23 +- ...command_buffer_adapter_level_zero_v2.match | 105 ++-- ...xp_command_buffer_adapter_native_cpu.match | 87 ++- ...enqueue_native_adapter_level_zero_v2.match | 9 +- ...launch_properties_adapter_native_cpu.match | 3 +- .../integration_adapter_level_zero.match | 10 +- .../integration_adapter_level_zero_v2.match | 10 +- .../integration_adapter_native_cpu.match | 10 +- .../kernel/kernel_adapter_cuda.match | 13 +- .../kernel/kernel_adapter_hip.match | 15 +- .../kernel/kernel_adapter_level_zero.match | 9 +- .../kernel/kernel_adapter_level_zero_v2.match | 9 +- .../kernel/kernel_adapter_native_cpu.match | 270 +++------ .../kernel/kernel_adapter_opencl.match | 3 +- .../memory/memory_adapter_cuda.match | 13 +- .../memory/memory_adapter_hip.match | 14 +- .../memory/memory_adapter_level_zero.match | 43 +- .../memory/memory_adapter_level_zero_v2.match | 311 +---------- .../memory/memory_adapter_native_cpu.match | 259 +-------- .../program/program_adapter_cuda.match | 25 +- .../program/program_adapter_hip.match | 25 +- .../program/program_adapter_level_zero.match | 7 +- .../program_adapter_level_zero_v2.match | 7 +- .../program/program_adapter_native_cpu.match | 225 +++----- .../queue/queue_adapter_native_cpu.match | 55 +- .../sampler/sampler_adapter_level_zero.match | 13 +- .../sampler_adapter_level_zero_v2.match | 13 +- test/conformance/usm/usm_adapter_cuda.match | 12 +- test/conformance/usm/usm_adapter_hip.match | 169 +++--- .../usm/usm_adapter_level_zero.match | 4 +- .../usm/usm_adapter_level_zero_v2.match | 12 +- .../usm/usm_adapter_native_cpu.match | 35 +- test/conformance/usm/usm_adapter_opencl.match | 3 +- .../virtual_memory_adapter_level_zero.match | 16 +- ...virtual_memory_adapter_level_zero_v2.match | 118 ++-- 55 files changed, 852 insertions(+), 1968 deletions(-) diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index df46652617..db5f165e67 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -393,8 +393,6 @@ The format of the match files are as follows: matches a single character. * Empty lines or lines beginning with ``#`` are ignored. * A line beginning with ``{{OPT}}`` is a optional test; see below. -* For compatibility with an older version of the matching logic, ``{{.*}}`` is - interpreted as ``*`` and ``{{NONDETERMINISTIC}}`` is ignored. Normally tests in the match file must fail (either by crashing or having a test failure) for the given adapter. However this can be disabled by prepending diff --git a/test/conformance/adapter/adapter_adapter_native_cpu.match b/test/conformance/adapter/adapter_adapter_native_cpu.match index 91d8e62787..ea65399d2f 100644 --- a/test/conformance/adapter/adapter_adapter_native_cpu.match +++ b/test/conformance/adapter/adapter_adapter_native_cpu.match @@ -1,4 +1,3 @@ -{{NONDETERMINISTIC}} # These pass when the adapter is launched by the loader {{OPT}}urAdapterGetLastErrorTest.Success {{OPT}}urAdapterGetLastErrorTest.InvalidHandle diff --git a/test/conformance/context/context_adapter_level_zero.match b/test/conformance/context/context_adapter_level_zero.match index a78979330d..82c2e813e5 100644 --- a/test/conformance/context/context_adapter_level_zero.match +++ b/test/conformance/context/context_adapter_level_zero.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -{{OPT}}urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urContextSetExtendedDeleterTest.Success/* diff --git a/test/conformance/context/context_adapter_level_zero_v2.match b/test/conformance/context/context_adapter_level_zero_v2.match index 93333a9178..82c2e813e5 100644 --- a/test/conformance/context/context_adapter_level_zero_v2.match +++ b/test/conformance/context/context_adapter_level_zero_v2.match @@ -1 +1 @@ -{{OPT}}urContextSetExtendedDeleterTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__ +{{OPT}}urContextSetExtendedDeleterTest.Success/* diff --git a/test/conformance/context/context_adapter_native_cpu.match b/test/conformance/context/context_adapter_native_cpu.match index b5665a8f1a..82c2e813e5 100644 --- a/test/conformance/context/context_adapter_native_cpu.match +++ b/test/conformance/context/context_adapter_native_cpu.match @@ -1 +1 @@ -{{OPT}}urContextSetExtendedDeleterTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urContextSetExtendedDeleterTest.Success/* diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py index 84a5d6e031..cf73c4e8d9 100755 --- a/test/conformance/cts_exe.py +++ b/test/conformance/cts_exe.py @@ -142,12 +142,9 @@ def _run_cmd(cmd, comment, filter): for l in f: optional = "{{OPT}}" in l l = l.replace("{{OPT}}", "") - l = l.replace("{{.*}}", "*") if l.startswith("#"): continue - if l.startswith("{{NONDETERMINISTIC}}"): - continue if l.strip() == "": continue diff --git a/test/conformance/device/device_adapter_cuda.match b/test/conformance/device/device_adapter_cuda.match index ff961cc6f5..48e00debe4 100644 --- a/test/conformance/device/device_adapter_cuda.match +++ b/test/conformance/device/device_adapter_cuda.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/device_adapter_hip.match b/test/conformance/device/device_adapter_hip.match index ff961cc6f5..48e00debe4 100644 --- a/test/conformance/device/device_adapter_hip.match +++ b/test/conformance/device/device_adapter_hip.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/device_adapter_level_zero.match b/test/conformance/device/device_adapter_level_zero.match index ff961cc6f5..48e00debe4 100644 --- a/test/conformance/device/device_adapter_level_zero.match +++ b/test/conformance/device/device_adapter_level_zero.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/device/device_adapter_level_zero_v2.match b/test/conformance/device/device_adapter_level_zero_v2.match index 2b9ecbef70..87140fb10c 100644 --- a/test/conformance/device/device_adapter_level_zero_v2.match +++ b/test/conformance/device/device_adapter_level_zero_v2.match @@ -1,3 +1,2 @@ -{{NONDETERMINISTIC}} {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime {{OPT}}urDeviceGetInfoTest.Success/UR_DEVICE_INFO_GLOBAL_MEM_FREE diff --git a/test/conformance/device/device_adapter_native_cpu.match b/test/conformance/device/device_adapter_native_cpu.match index 2129478fb8..00b3642e71 100644 --- a/test/conformance/device/device_adapter_native_cpu.match +++ b/test/conformance/device/device_adapter_native_cpu.match @@ -1,4 +1,3 @@ -{{NONDETERMINISTIC}} urDeviceCreateWithNativeHandleTest.InvalidNullHandlePlatform urDeviceCreateWithNativeHandleTest.InvalidNullPointerDevice {{OPT}}urDeviceGetGlobalTimestampTest.SuccessSynchronizedTime diff --git a/test/conformance/enqueue/enqueue_adapter_cuda.match b/test/conformance/enqueue/enqueue_adapter_cuda.match index 40de7158d0..8aa9600f5e 100644 --- a/test/conformance/enqueue/enqueue_adapter_cuda.match +++ b/test/conformance/enqueue/enqueue_adapter_cuda.match @@ -1,11 +1,10 @@ -{{NONDETERMINISTIC}} -urEnqueueKernelLaunchTest.InvalidKernelArgs/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelSubGroupTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueMemBufferMapTestWithWriteFlagParam.SuccessWrite/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MAP_FLAG_WRITE_INVALIDATE_REGION -urEnqueueUSMAdviseWithParamTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_ADVICE_FLAG_DEFAULT -urEnqueueUSMAdviseTest.MultipleParamsSuccess/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueUSMPrefetchWithParamTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/NVIDIA_CUDA_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueTimestampRecordingExpTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEnqueueTimestampRecordingExpTest.SuccessBlocking/NVIDIA_CUDA_BACKEND___{{.*}}_ +urEnqueueKernelLaunchTest.InvalidKernelArgs/* +urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/* +urEnqueueKernelLaunchKernelSubGroupTest.Success/* +urEnqueueMemBufferMapTestWithWriteFlagParam.SuccessWrite/*__UR_MAP_FLAG_WRITE_INVALIDATE_REGION +urEnqueueUSMAdviseWithParamTest.Success/*__UR_USM_ADVICE_FLAG_DEFAULT +urEnqueueUSMAdviseTest.MultipleParamsSuccess/* +urEnqueueUSMPrefetchWithParamTest.Success/*__UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/*__UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueTimestampRecordingExpTest.Success/* +urEnqueueTimestampRecordingExpTest.SuccessBlocking/* diff --git a/test/conformance/enqueue/enqueue_adapter_hip.match b/test/conformance/enqueue/enqueue_adapter_hip.match index b841a25cf4..c59d228ca8 100644 --- a/test/conformance/enqueue/enqueue_adapter_hip.match +++ b/test/conformance/enqueue/enqueue_adapter_hip.match @@ -1,22 +1,21 @@ -{{NONDETERMINISTIC}} # HIP can't check kernel arguments -urEnqueueKernelLaunchTest.InvalidKernelArgs/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueKernelLaunchKernelSubGroupTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueKernelLaunchUSMLinkedList.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___copy_row_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___copy_3d_2d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_row_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/AMD_HIP_BACKEND___{{.*}}___write_3d_2d +urEnqueueKernelLaunchTest.InvalidKernelArgs/* +urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/* +urEnqueueKernelLaunchKernelSubGroupTest.Success/* +urEnqueueKernelLaunchUSMLinkedList.Success/*__UsePoolEnabled +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_row_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_2d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_row_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_2d # HIP doesn't ignore unsupported USM advice or prefetching. Instead of # returning UR_RESULT_SUCCESS as per the spec, it instead returns # UR_RESULT_ERROR_ADAPTER_SPECIFIC to issue a warning. These tests will fail # until this is rectified. -urEnqueueUSMAdviseWithParamTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_ADVICE_FLAG_DEFAULT -urEnqueueUSMAdviseTest.MultipleParamsSuccess/AMD_HIP_BACKEND___{{.*}}_ -urEnqueueUSMPrefetchWithParamTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/AMD_HIP_BACKEND___{{.*}}___UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueUSMAdviseWithParamTest.Success/*__UR_USM_ADVICE_FLAG_DEFAULT +urEnqueueUSMAdviseTest.MultipleParamsSuccess/* +urEnqueueUSMPrefetchWithParamTest.Success/*__UR_USM_MIGRATION_FLAG_DEFAULT +urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/*__UR_USM_MIGRATION_FLAG_DEFAULT -urEnqueueTimestampRecordingExpTest.Success/AMD_HIP_BACKEND___{{.*}} -urEnqueueTimestampRecordingExpTest.SuccessBlocking/AMD_HIP_BACKEND___{{.*}} +urEnqueueTimestampRecordingExpTest.Success/* +urEnqueueTimestampRecordingExpTest.SuccessBlocking/* diff --git a/test/conformance/enqueue/enqueue_adapter_level_zero.match b/test/conformance/enqueue/enqueue_adapter_level_zero.match index 5a9ce841b8..9394f1b0a0 100644 --- a/test/conformance/enqueue/enqueue_adapter_level_zero.match +++ b/test/conformance/enqueue/enqueue_adapter_level_zero.match @@ -1,26 +1,23 @@ -# Note: This file is only for use with cts_exe.py -{{OPT}}urEnqueueEventsWaitTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_2d_3d -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_2d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_column_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d +{{OPT}}urEnqueueEventsWaitTest.Success/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/* +{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/* +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_2d_3d +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_2d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_column_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_2d {{OPT}}urEnqueueMemBufferMapMultiDeviceTest.* -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_2d +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/* {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest/* {{OPT}}urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/* {{OPT}}urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesBarrierOnly/* diff --git a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match index c72bf51398..44895d10fa 100644 --- a/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match +++ b/test/conformance/enqueue/enqueue_adapter_level_zero_v2.match @@ -1,104 +1,85 @@ -{{NONDETERMINISTIC}} -urEnqueueKernelLaunchTest.InvalidKernelArgs/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchKernelWgSizeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueKernelLaunchWithVirtualMemory.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UseEventsEnabled -{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UseEventsDisabled +urEnqueueKernelLaunchTest.InvalidKernelArgs/* +urEnqueueKernelLaunchKernelWgSizeTest.Success/* +urEnqueueKernelLaunchWithVirtualMemory.Success/* +{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/*__UseEventsEnabled +{{OPT}}urEnqueueKernelLaunchIncrementTest.Success/*__UseEventsDisabled {{OPT}}urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/UseEventsNoQueuePerThread {{OPT}}urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/NoUseEventsNoQueuePerThread -{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_whole_buffer_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_column_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_row_2D -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_with_offsets -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_2d_3d -{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___copy_3d_2d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_whole_buffer_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_column_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_row_2D -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_whole_buffer_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_non_zero_offsets_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_different_buffer_sizes_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_column_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_row_2D -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_with_offsets -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_2d_3d -{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___write_3d_2d -{{OPT}}urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3D -{{OPT}}urEnqueueMemImageReadTest.Success1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.Success2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.Success3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullPointerDst/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidRegion1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidRegion2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageReadTest.InvalidRegion3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.Success1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.Success2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.Success3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPointerSrc/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPtrEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion1D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion2D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion3D/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMFill2DNegativeTest.OutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMAdviseTest.InvalidSizeTooLarge/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullHandleProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueReadHostPipeTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEnqueueWriteHostPipeTest.InvalidEventWaitList/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urEnqueueTimestampRecordingExpTest.SuccessBlocking/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/* +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_whole_buffer_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_column_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_row_2D +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_with_offsets +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_2d_3d +{{OPT}}urEnqueueMemBufferCopyRectTestWithParam.Success/*__copy_3d_2d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_whole_buffer_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_column_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_row_2D +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferReadRectTestWithParam.Success/*__write_3d_2d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_whole_buffer_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_non_zero_offsets_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_different_buffer_sizes_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_column_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_row_2D +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_with_offsets +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_2d_3d +{{OPT}}urEnqueueMemBufferWriteRectTestWithParam.Success/*__write_3d_2d +{{OPT}}urEnqueueMemImageCopyTest.Success/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/* +{{OPT}}urEnqueueMemImageReadTest.Success1D/* +{{OPT}}urEnqueueMemImageReadTest.Success2D/* +{{OPT}}urEnqueueMemImageReadTest.Success3D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullHandleImage/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullPointerDst/* +{{OPT}}urEnqueueMemImageReadTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin1D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin2D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidOrigin3D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion1D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion2D/* +{{OPT}}urEnqueueMemImageReadTest.InvalidRegion3D/* +{{OPT}}urEnqueueMemImageWriteTest.Success1D/* +{{OPT}}urEnqueueMemImageWriteTest.Success2D/* +{{OPT}}urEnqueueMemImageWriteTest.Success3D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullHandleImage/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPointerSrc/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin1D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin2D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidOrigin3D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion1D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion2D/* +{{OPT}}urEnqueueMemImageWriteTest.InvalidRegion3D/* +{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/* +urEnqueueUSMFill2DNegativeTest.OutOfBounds/* +urEnqueueUSMAdviseTest.InvalidSizeTooLarge/* +urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/* +urEnqueueReadHostPipeTest.InvalidNullHandleQueue/* +urEnqueueReadHostPipeTest.InvalidNullHandleProgram/* +urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/* +urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/* +urEnqueueReadHostPipeTest.InvalidEventWaitList/* +urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/* +urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/* +urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/* +urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/* +urEnqueueWriteHostPipeTest.InvalidEventWaitList/* +{{OPT}}urEnqueueTimestampRecordingExpTest.SuccessBlocking/* diff --git a/test/conformance/enqueue/enqueue_adapter_native_cpu.match b/test/conformance/enqueue/enqueue_adapter_native_cpu.match index bf0ecdee39..18abf6abfe 100644 --- a/test/conformance/enqueue/enqueue_adapter_native_cpu.match +++ b/test/conformance/enqueue/enqueue_adapter_native_cpu.match @@ -1,390 +1,154 @@ -# Note: This file is only for use with cts_exe.py {{OPT}}urEnqueueEventsWaitMultiDeviceTest.EmptyWaitList {{OPT}}urEnqueueEventsWaitMultiDeviceTest.EmptyWaitListWithEvent {{OPT}}urEnqueueEventsWaitMultiDeviceTest.EnqueueWaitOnADifferentQueue -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListNullEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListZeroSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitInvalidEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListNullEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListZeroSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueEventsWaitTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueEventsWaitTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.Success/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerName/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidNullPointerDst/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListNullEvents/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitListZeroSize/* +{{OPT}}urEnqueueDeviceGetGlobalVariableReadTest.InvalidEventWaitInvalidEvent/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerName/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidNullPointerSrc/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListNullEvents/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitListZeroSize/* +{{OPT}}urEnqueueDeviceGetGlobalVariableWriteTest.InvalidEventWaitInvalidEvent/* +{{OPT}}urEnqueueEventsWaitTest.Success/* +{{OPT}}urEnqueueEventsWaitTest.InvalidNullPtrEventWaitList/* {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitSingleQueueMultiOps/MultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitSingleQueueMultiOps/NoMultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueues/MultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueues/NoMultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueuesCommonDependency/MultiThread {{OPT}}urEnqueueEventsWaitMultiDeviceMTTest.EnqueueWaitOnAllQueuesCommonDependency/NoMultiThread -{{OPT}}urEnqueueEventsWaitWithBarrierTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueEventsWaitWithBarrierTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesBarrierOnly/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesLaunchOnly/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -urEnqueueEventsWaitWithBarrierOrderingTest.SuccessNonEventDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}_ -{{OPT}}urEnqueueKernelLaunchTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkDimension/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchKernelStandardTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_31 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_1027 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_32 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D_256 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_1_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_31_7 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_1027_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_1_32 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D_256_79 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_1_1_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_31_7_1 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_1027_1_19 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_1_53_19 -{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D_256_79_8 -{{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchWithUSM.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolEnabled -{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096 -{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000 -{{OPT}}urEnqueueMemBufferFillNegativeTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferReadRectTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueMemImageCopyTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2D -{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__3D -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_WRITE -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_READ_ONLY -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__1024_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__2500_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__4096_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__6000_UR_MEM_FLAG_ALLOC_HOST_POINTER -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__1__patternSize__1 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__256 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__1024__patternSize__256 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__4 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__8 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__16 -{{OPT}}urEnqueueUSMFillTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__size__256__patternSize__32 -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullQueueHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullPtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.OutOfBounds/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.invalidPatternSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFillNegativeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullQueueHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidPitch/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidWidth/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidHeight/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.invalidPatternSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseWithParamTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ADVICE_FLAG_DEFAULT -{{OPT}}urEnqueueUSMAdviseTest.MultipleParamsSuccess/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidNullPointerMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMAdviseTest.NonCoherentDeviceMemorySuccessOrWarning/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.Blocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.BlockingWithEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.NonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.WaitForDependencies/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullQueueHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullDst/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullSrc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullPtrEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__1024__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1024__width__256__height__256__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__23__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_HOST__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_HOST -{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__234__width__233__height__1__src__UR_USM_TYPE_SHARED__dst__UR_USM_TYPE_SHARED -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE -{{OPT}}urEnqueueUSMPrefetchWithParamTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_MIGRATION_FLAG_DEFAULT -{{OPT}}urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_MIGRATION_FLAG_DEFAULT -{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullPointerMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueUSMPrefetchTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueReadHostPipeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urEnqueueWriteHostPipeTest.InvalidEventWaitList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urEnqueueEventsWaitWithBarrierTest.Success/* +{{OPT}}urEnqueueEventsWaitWithBarrierTest.InvalidNullPtrEventWaitList/* +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesBarrierOnly/*_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependenciesLaunchOnly/*_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessEventDependencies/*_ +urEnqueueEventsWaitWithBarrierOrderingTest.SuccessNonEventDependencies/*_ +{{OPT}}urEnqueueKernelLaunchTest.Success/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidNullHandleKernel/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkDimension/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidWorkGroupSize/* +{{OPT}}urEnqueueKernelLaunchTest.InvalidKernelArgs/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.Success/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.SuccessWithExplicitLocalSize/* +{{OPT}}urEnqueueKernelLaunchKernelWgSizeTest.NonMatchingLocalSize/* +{{OPT}}urEnqueueKernelLaunchKernelSubGroupTest.Success/* +{{OPT}}urEnqueueKernelLaunchKernelStandardTest.Success/* +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_31 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_1027 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_32 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__1D_256 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_1_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_31_7 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_1027_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_1_32 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__2D_256_79 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_1_1_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_31_7_1 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_1027_1_19 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_1_53_19 +{{OPT}}urEnqueueKernelLaunchTestWithParam.Success/*__3D_256_79_8 +{{OPT}}urEnqueueKernelLaunchWithVirtualMemory.Success/* +{{OPT}}urEnqueueKernelLaunchWithUSM.Success/* +{{OPT}}urEnqueueKernelLaunchMultiDeviceTest.KernelLaunchReadDifferentQueues/* +{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/*__UsePoolEnabled +{{OPT}}urEnqueueKernelLaunchUSMLinkedList.Success/*__UsePoolDisabled +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferCopyRectTest.InvalidSize/* +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__1024 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__2500 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__4096 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidNullPtrEventWaitList/*__6000 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__1024 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__2500 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__4096 +{{OPT}}urEnqueueMemBufferCopyTestWithParam.InvalidSize/*__6000 +{{OPT}}urEnqueueMemBufferFillNegativeTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferMapTestWithParam.InvalidSize/* +{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferReadTestWithParam.InvalidSize/* +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferReadRectTest.InvalidSize/* +{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferWriteTestWithParam.InvalidSize/* +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemBufferWriteRectTest.InvalidSize/* +{{OPT}}urEnqueueMemImageCopyTest.Success/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopy/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithSrcOffset/* +{{OPT}}urEnqueueMemImageCopyTest.SuccessPartialCopyWithDstOffset/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageSrc/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullHandleImageDst/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueMemImageCopyTest.InvalidSize/* +{{OPT}}urEnqueueMemUnmapTestWithParam.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueUSMFillTestWithParam.Success/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullQueueHandle/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidNullPtr/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidSize/* +{{OPT}}urEnqueueUSMFillNegativeTest.OutOfBounds/* +{{OPT}}urEnqueueUSMFillNegativeTest.invalidPatternSize/* +{{OPT}}urEnqueueUSMFillNegativeTest.InvalidEventWaitList/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullQueueHandle/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtr/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidPitch/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidWidth/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidHeight/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidSize/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.OutOfBounds/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.invalidPatternSize/* +{{OPT}}urEnqueueUSMFill2DNegativeTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueUSMAdviseWithParamTest.Success/*__UR_USM_ADVICE_FLAG_DEFAULT +{{OPT}}urEnqueueUSMAdviseTest.MultipleParamsSuccess/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidNullPointerMem/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidEnumeration/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeZero/* +{{OPT}}urEnqueueUSMAdviseTest.InvalidSizeTooLarge/* +{{OPT}}urEnqueueUSMAdviseTest.NonCoherentDeviceMemorySuccessOrWarning/* +{{OPT}}urEnqueueUSMMemcpyTest.Blocking/* +{{OPT}}urEnqueueUSMMemcpyTest.BlockingWithEvent/* +{{OPT}}urEnqueueUSMMemcpyTest.NonBlocking/* +{{OPT}}urEnqueueUSMMemcpyTest.WaitForDependencies/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullQueueHandle/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullDst/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullSrc/* +{{OPT}}urEnqueueUSMMemcpyTest.InvalidNullPtrEventWaitList/* +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessBlocking/* +{{OPT}}urEnqueueUSMMemcpy2DTestWithParam.SuccessNonBlocking/* +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullHandleQueue/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidNullPointer/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidSize/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMMemcpy2DNegativeTest.InvalidEventWaitList/*__pitch__1__width__1__height__1__src__UR_USM_TYPE_DEVICE__dst__UR_USM_TYPE_DEVICE +{{OPT}}urEnqueueUSMPrefetchWithParamTest.Success/*__UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}urEnqueueUSMPrefetchWithParamTest.CheckWaitEvent/*__UR_USM_MIGRATION_FLAG_DEFAULT +{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidNullPointerMem/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidEnumeration/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeZero/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidSizeTooLarge/* +{{OPT}}urEnqueueUSMPrefetchTest.InvalidEventWaitList/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerPipeSymbol/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidNullPointerBuffer/* +{{OPT}}urEnqueueReadHostPipeTest.InvalidEventWaitList/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleQueue/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullHandleProgram/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerPipeSymbol/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidNullPointerBuffer/* +{{OPT}}urEnqueueWriteHostPipeTest.InvalidEventWaitList/* urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest.Success/* urEnqueueKernelLaunchIncrementMultiDeviceTest.Success/* urEnqueueKernelLaunchIncrementTest.Success/* diff --git a/test/conformance/event/event_adapter_cuda.match b/test/conformance/event/event_adapter_cuda.match index d9e14551da..daa3c281bc 100644 --- a/test/conformance/event/event_adapter_cuda.match +++ b/test/conformance/event/event_adapter_cuda.match @@ -1,7 +1,6 @@ -{{NONDETERMINISTIC}} -urEventGetProfilingInfoTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_PROFILING_INFO_COMMAND_COMPLETE -urEventGetProfilingInfoWithTimingComparisonTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.ValidateParameters/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.AllStates/NVIDIA_CUDA_BACKEND___{{.*}}_ -urEventSetCallbackTest.EventAlreadyCompleted/NVIDIA_CUDA_BACKEND___{{.*}}_ +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_hip.match b/test/conformance/event/event_adapter_hip.match index 6bc909c5fd..daa3c281bc 100644 --- a/test/conformance/event/event_adapter_hip.match +++ b/test/conformance/event/event_adapter_hip.match @@ -1,7 +1,6 @@ -{{NONDETERMINISTIC}} -urEventGetProfilingInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROFILING_INFO_COMMAND_COMPLETE -urEventGetProfilingInfoWithTimingComparisonTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.ValidateParameters/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.AllStates/AMD_HIP_BACKEND___{{.*}}_ -urEventSetCallbackTest.EventAlreadyCompleted/AMD_HIP_BACKEND___{{.*}}_ +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_level_zero.match b/test/conformance/event/event_adapter_level_zero.match index 5adfbd0fd9..e7fe14c234 100644 --- a/test/conformance/event/event_adapter_level_zero.match +++ b/test/conformance/event/event_adapter_level_zero.match @@ -1,9 +1,8 @@ -# Note: This file is only for use with cts_exe.py -{{OPT}}urEventGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_EVENT_INFO_COMMAND_TYPE -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT -{{OPT}}urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_COMPLETE -{{OPT}}urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urEventGetInfoTest.Success/*__UR_EVENT_INFO_COMMAND_TYPE +{{OPT}}urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_QUEUED +{{OPT}}urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_SUBMIT +{{OPT}}urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +{{OPT}}urEventGetProfilingInfoWithTimingComparisonTest.Success/* urEventCreateWithNativeHandleTest.Success/* urEventSetCallbackTest.AllStates/* urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_level_zero_v2.match b/test/conformance/event/event_adapter_level_zero_v2.match index 767bb53c6e..dd70962041 100644 --- a/test/conformance/event/event_adapter_level_zero_v2.match +++ b/test/conformance/event/event_adapter_level_zero_v2.match @@ -1,7 +1,7 @@ -urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_QUEUED -urEventGetProfilingInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_PROFILING_INFO_COMMAND_SUBMIT -urEventGetProfilingInfoWithTimingComparisonTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.ValidateParameters/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.AllStates/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urEventSetCallbackTest.EventAlreadyCompleted/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_QUEUED +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_SUBMIT +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* diff --git a/test/conformance/event/event_adapter_native_cpu.match b/test/conformance/event/event_adapter_native_cpu.match index 03e653a2eb..1716dea0ca 100644 --- a/test/conformance/event/event_adapter_native_cpu.match +++ b/test/conformance/event/event_adapter_native_cpu.match @@ -1,15 +1,14 @@ -# Note: This file is only for use with cts_exe.py -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_QUEUED -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_SUBMIT -urEventGetProfilingInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROFILING_INFO_COMMAND_COMPLETE -urEventGetProfilingInfoWithTimingComparisonTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetProfilingInfoNegativeTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventGetProfilingInfoNegativeTest.InvalidValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventWaitTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.ValidateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.AllStates/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urEventSetCallbackTest.EventAlreadyCompleted/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_QUEUED +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_SUBMIT +urEventGetProfilingInfoTest.Success/*__UR_PROFILING_INFO_COMMAND_COMPLETE +urEventGetProfilingInfoWithTimingComparisonTest.Success/* +urEventGetProfilingInfoNegativeTest.InvalidNullHandle/* +urEventGetProfilingInfoNegativeTest.InvalidValue/* +urEventWaitTest.Success/* +urEventSetCallbackTest.Success/* +urEventSetCallbackTest.ValidateParameters/* +urEventSetCallbackTest.AllStates/* +urEventSetCallbackTest.EventAlreadyCompleted/* # These crash when ran through the loader {{OPT}}urEventRetainTest.InvalidNullHandle/* diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match index 7e7ecf8d4e..5aa63f1cbc 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_level_zero_v2.match @@ -1,59 +1,46 @@ -{{NONDETERMINISTIC}} -urCommandBufferReleaseExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferReleaseExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferRetainExpTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferRetainExpTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendMemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferCommandsTest.urCommandBufferAppendUSMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferAppendKernelLaunchExpTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___ -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urCommandBufferFillCommandsTest.Buffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1__patternSize__1 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__256 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__1024__patternSize__256 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__4 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__8 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__16 -urCommandBufferFillCommandsTest.USM/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___size__256__patternSize__32 -KernelCommandEventSyncTest.Basic/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -KernelCommandEventSyncTest.InterCommandBuffer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.USMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MultipleEventCommands/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncTest.MultipleEventCommandsBetweenCommandBuffers/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMMemcpyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferCopyExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferCopyRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferReadExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferReadRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferWriteExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferWriteRectExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MemBufferFillExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMPrefetchExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.USMAdviseExp/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -CommandEventSyncUpdateTest.MultipleEventCommands/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urCommandBufferReleaseExpTest.Success/* +urCommandBufferReleaseExpTest.InvalidNullHandle/* +urCommandBufferRetainExpTest.Success/* +urCommandBufferRetainExpTest.InvalidNullHandle/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMMemcpyExp/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMFillExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferCopyRectExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferReadRectExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferWriteRectExp/* +urCommandBufferCommandsTest.urCommandBufferAppendMemBufferFillExp/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMPrefetchExp/* +urCommandBufferCommandsTest.urCommandBufferAppendUSMAdviseExp/* +urCommandBufferAppendKernelLaunchExpTest.Basic/* +urCommandBufferFillCommandsTest.Buffer/* +urCommandBufferFillCommandsTest.USM/* +KernelCommandEventSyncTest.Basic/* +KernelCommandEventSyncTest.InterCommandBuffer/* +KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/* +CommandEventSyncTest.USMMemcpyExp/* +CommandEventSyncTest.USMFillExp/* +CommandEventSyncTest.MemBufferCopyExp/* +CommandEventSyncTest.MemBufferCopyRectExp/* +CommandEventSyncTest.MemBufferReadExp/* +CommandEventSyncTest.MemBufferReadRectExp/* +CommandEventSyncTest.MemBufferWriteExp/* +CommandEventSyncTest.MemBufferWriteRectExp/* +CommandEventSyncTest.MemBufferFillExp/* +CommandEventSyncTest.USMPrefetchExp/* +CommandEventSyncTest.USMAdviseExp/* +CommandEventSyncTest.MultipleEventCommands/* +CommandEventSyncTest.MultipleEventCommandsBetweenCommandBuffers/* +CommandEventSyncUpdateTest.USMMemcpyExp/* +CommandEventSyncUpdateTest.USMFillExp/* +CommandEventSyncUpdateTest.MemBufferCopyExp/* +CommandEventSyncUpdateTest.MemBufferCopyRectExp/* +CommandEventSyncUpdateTest.MemBufferReadExp/* +CommandEventSyncUpdateTest.MemBufferReadRectExp/* +CommandEventSyncUpdateTest.MemBufferWriteExp/* +CommandEventSyncUpdateTest.MemBufferWriteRectExp/* +CommandEventSyncUpdateTest.MemBufferFillExp/* +CommandEventSyncUpdateTest.USMPrefetchExp/* +CommandEventSyncUpdateTest.USMAdviseExp/* +CommandEventSyncUpdateTest.MultipleEventCommands/* diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match index ca441e8f04..c6fe7ad962 100644 --- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match +++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match @@ -1,44 +1,43 @@ -{{NONDETERMINISTIC}} -{{OPT}}urCommandBufferReleaseCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferRetainCommandExpTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.UpdateGlobalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.OverrideUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferFillCommandTest.OverrideArgList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMFillCommandTest.UpdateNull/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}BufferSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateNullptrKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Update3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Update2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.Update1D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.ImplToUserDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}NDRangeUpdateTest.UserToImplDefinedLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/SYCL_NATIVE_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}LocalMemoryUpdateTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}LocalMemoryUpdateTest.UpdateParametersAndLocalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}LocalMemoryMultiUpdateTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}LocalMemoryMultiUpdateTest.UpdateWithoutBlocking/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urCommandBufferReleaseCommandExpTest.Success/* +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufBeforeHandle/* +{{OPT}}urCommandBufferReleaseCommandExpTest.ReleaseCmdBufMultipleHandles/* +{{OPT}}urCommandBufferReleaseCommandExpTest.InvalidNullHandle/* +{{OPT}}urCommandBufferRetainCommandExpTest.Success/* +{{OPT}}urCommandBufferRetainCommandExpTest.InvalidNullHandle/* +{{OPT}}urCommandBufferAppendKernelLaunchExpTest.Basic/* +{{OPT}}BufferFillCommandTest.UpdateParameters/* +{{OPT}}BufferFillCommandTest.UpdateGlobalSize/* +{{OPT}}BufferFillCommandTest.SeparateUpdateCalls/* +{{OPT}}BufferFillCommandTest.OverrideUpdate/* +{{OPT}}BufferFillCommandTest.OverrideArgList/* +{{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/* +{{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/* +{{OPT}}InvalidUpdateTest.InvalidDimensions/* +{{OPT}}USMFillCommandTest.UpdateParameters/* +{{OPT}}USMFillCommandTest.UpdateNull/* +{{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/* +{{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/* +{{OPT}}BufferSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateNullptrKernel/* +{{OPT}}NDRangeUpdateTest.Update3D/* +{{OPT}}NDRangeUpdateTest.Update2D/* +{{OPT}}NDRangeUpdateTest.Update1D/* +{{OPT}}NDRangeUpdateTest.ImplToUserDefinedLocalSize/* +{{OPT}}NDRangeUpdateTest.UserToImplDefinedLocalSize/* +{{OPT}}USMSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateParameters/* +{{OPT}}USMMultiSaxpyKernelTest.UpdateWithoutBlocking/* +{{OPT}}KernelCommandEventSyncTest.Basic/* +{{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/* +{{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/* +{{OPT}}KernelCommandEventSyncUpdateTest.Basic/* +{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/* +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/* +{{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/* +{{OPT}}LocalMemoryUpdateTest.UpdateParameters/* +{{OPT}}LocalMemoryUpdateTest.UpdateParametersAndLocalSize/* +{{OPT}}LocalMemoryMultiUpdateTest.UpdateParameters/* +{{OPT}}LocalMemoryMultiUpdateTest.UpdateWithoutBlocking/* diff --git a/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match b/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match index d4645b3ffc..f8cf4c4c88 100644 --- a/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match +++ b/test/conformance/exp_enqueue_native/exp_enqueue_native_adapter_level_zero_v2.match @@ -1,5 +1,4 @@ -{{NONDETERMINISTIC}} -urLevelZeroEnqueueNativeCommandTest.Success{{.*}} -urLevelZeroEnqueueNativeCommandTest.Dependencies{{.*}} -urLevelZeroEnqueueNativeCommandTest.DependenciesURBefore{{.*}} -urLevelZeroEnqueueNativeCommandTest.DependenciesURAfter{{.*}} +urLevelZeroEnqueueNativeCommandTest.Success* +urLevelZeroEnqueueNativeCommandTest.Dependencies* +urLevelZeroEnqueueNativeCommandTest.DependenciesURBefore* +urLevelZeroEnqueueNativeCommandTest.DependenciesURAfter* diff --git a/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match b/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match index f8b1e49e44..4a4837c5ab 100644 --- a/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match +++ b/test/conformance/exp_launch_properties/exp_launch_properties_adapter_native_cpu.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -urEnqueueKernelLaunchCustomTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urEnqueueKernelLaunchCustomTest.Success/* diff --git a/test/conformance/integration/integration_adapter_level_zero.match b/test/conformance/integration/integration_adapter_level_zero.match index a49ad93a94..460aa8cf4e 100644 --- a/test/conformance/integration/integration_adapter_level_zero.match +++ b/test/conformance/integration/integration_adapter_level_zero.match @@ -1,7 +1,3 @@ -{{NONDETERMINISTIC}} -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE +{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/* +{{OPT}}QueueUSMTestWithParam.QueueUSMTest/* +{{OPT}}QueueBufferTestWithParam.QueueBufferTest/* diff --git a/test/conformance/integration/integration_adapter_level_zero_v2.match b/test/conformance/integration/integration_adapter_level_zero_v2.match index a49ad93a94..460aa8cf4e 100644 --- a/test/conformance/integration/integration_adapter_level_zero_v2.match +++ b/test/conformance/integration/integration_adapter_level_zero_v2.match @@ -1,7 +1,3 @@ -{{NONDETERMINISTIC}} -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueUSMTestWithParam.QueueUSMTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___IN_ORDER_QUEUE -{{OPT}}QueueBufferTestWithParam.QueueBufferTest/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___OUT_OF_ORDER_QUEUE +{{OPT}}QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/* +{{OPT}}QueueUSMTestWithParam.QueueUSMTest/* +{{OPT}}QueueBufferTestWithParam.QueueBufferTest/* diff --git a/test/conformance/integration/integration_adapter_native_cpu.match b/test/conformance/integration/integration_adapter_native_cpu.match index 159e57a144..fe5e7567ed 100644 --- a/test/conformance/integration/integration_adapter_native_cpu.match +++ b/test/conformance/integration/integration_adapter_native_cpu.match @@ -1,7 +1,3 @@ -{{NONDETERMINISTIC}} -QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__IN_ORDER_QUEUE -QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__OUT_OF_ORDER_QUEUE -QueueUSMTestWithParam.QueueUSMTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__IN_ORDER_QUEUE -QueueUSMTestWithParam.QueueUSMTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__OUT_OF_ORDER_QUEUE -QueueBufferTestWithParam.QueueBufferTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__IN_ORDER_QUEUE -QueueBufferTestWithParam.QueueBufferTest/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__OUT_OF_ORDER_QUEUE +QueueEmptyStatusTestWithParam.QueueEmptyStatusTest/* +QueueUSMTestWithParam.QueueUSMTest/* +QueueBufferTestWithParam.QueueBufferTest/* diff --git a/test/conformance/kernel/kernel_adapter_cuda.match b/test/conformance/kernel/kernel_adapter_cuda.match index b05b2fda58..cafcdf54c5 100644 --- a/test/conformance/kernel/kernel_adapter_cuda.match +++ b/test/conformance/kernel/kernel_adapter_cuda.match @@ -1,7 +1,6 @@ -{{NONDETERMINISTIC}} -urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgLocalTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urKernelSetArgValueTest.InvalidKernelArgumentIndex/NVIDIA_CUDA_BACKEND___{{.*}}_ +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/* +{{OPT}}urKernelSetArgLocalTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/* +{{OPT}}urKernelSetArgValueTest.InvalidKernelArgumentIndex/* diff --git a/test/conformance/kernel/kernel_adapter_hip.match b/test/conformance/kernel/kernel_adapter_hip.match index 4e6ab18293..f8ea9e3e99 100644 --- a/test/conformance/kernel/kernel_adapter_hip.match +++ b/test/conformance/kernel/kernel_adapter_hip.match @@ -1,8 +1,7 @@ -{{NONDETERMINISTIC}} -urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/AMD_HIP_BACKEND___{{.*}}_ -urKernelGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_KERNEL_INFO_NUM_REGS -urKernelSetArgLocalTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgValueTest.InvalidKernelArgumentIndex/AMD_HIP_BACKEND___{{.*}}_ -urKernelSetArgValueTest.InvalidKernelArgumentSize/AMD_HIP_BACKEND___{{.*}}_ +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/* +urKernelGetInfoTest.Success/*__UR_KERNEL_INFO_NUM_REGS +urKernelSetArgLocalTest.InvalidKernelArgumentIndex/* +urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/* +urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.InvalidKernelArgumentSize/* diff --git a/test/conformance/kernel/kernel_adapter_level_zero.match b/test/conformance/kernel/kernel_adapter_level_zero.match index cf83e73ff3..4a4868eff2 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero.match +++ b/test/conformance/kernel/kernel_adapter_level_zero.match @@ -1,5 +1,4 @@ -{{NONDETERMINISTIC}} -urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoTest.SuccessIndirectAccess/* +urKernelSetExecInfoUSMPointersTest.SuccessHost/* +urKernelSetExecInfoUSMPointersTest.SuccessDevice/* +urKernelSetExecInfoUSMPointersTest.SuccessShared/* diff --git a/test/conformance/kernel/kernel_adapter_level_zero_v2.match b/test/conformance/kernel/kernel_adapter_level_zero_v2.match index cf83e73ff3..4a4868eff2 100644 --- a/test/conformance/kernel/kernel_adapter_level_zero_v2.match +++ b/test/conformance/kernel/kernel_adapter_level_zero_v2.match @@ -1,5 +1,4 @@ -{{NONDETERMINISTIC}} -urKernelSetExecInfoTest.SuccessIndirectAccess/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessHost/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urKernelSetExecInfoUSMPointersTest.SuccessShared/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urKernelSetExecInfoTest.SuccessIndirectAccess/* +urKernelSetExecInfoUSMPointersTest.SuccessHost/* +urKernelSetExecInfoUSMPointersTest.SuccessDevice/* +urKernelSetExecInfoUSMPointersTest.SuccessShared/* diff --git a/test/conformance/kernel/kernel_adapter_native_cpu.match b/test/conformance/kernel/kernel_adapter_native_cpu.match index 368f4ad358..7ca10ec3d2 100644 --- a/test/conformance/kernel/kernel_adapter_native_cpu.match +++ b/test/conformance/kernel/kernel_adapter_native_cpu.match @@ -1,187 +1,83 @@ -{{NONDETERMINISTIC}} -urKernelCreateTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidNullPointerName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidNullPointerKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateTest.InvalidKernelName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateWithNativeHandleTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelCreateWithNativeHandleTest.InvalidNullPointerNativeKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PREFERRED_WORK_GROUP_SIZE_MULTIPLE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE -urKernelGetGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE -urKernelGetGroupInfoSingleTest.CompileWorkGroupSizeEmpty/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetGroupInfoSingleTest.CompileMaxWorkGroupSizeEmpty/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_FUNCTION_NAME -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_ARGS -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_REFERENCE_COUNT -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_CONTEXT -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_PROGRAM -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_ATTRIBUTES -urKernelGetInfoTest.InvalidNullPointerPropSizeRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_INFO_NUM_REGS -urKernelGetInfoSingleTest.KernelNameCorrect/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetInfoSingleTest.KernelContextCorrect/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetNativeHandleTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetNativeHandleTest.InvalidNullPointerNativeKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_SUB_GROUP_SIZE -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_MAX_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_COMPILE_NUM_SUB_GROUPS -urKernelGetSubGroupInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL -urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelReleaseTest.KernelReleaseAfterProgramRelease/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelReleaseTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelRetainTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgLocalTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgLocalTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgLocalTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgMemObjTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgMemObjTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerTest.SuccessHost/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerTest.SuccessDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerTest.SuccessShared/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__NORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_NONE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_CLAMP_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_NEAREST -urKernelSetArgSamplerTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UNNORMALIZED_UR_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT_UR_SAMPLER_FILTER_MODE_LINEAR -urKernelSetArgSamplerTest.SuccessWithProps/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTest.InvalidNullHandleArgValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidNullPointerArgValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidKernelArgumentIndex/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetArgValueTest.InvalidKernelArgumentSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.SuccessIndirectAccess/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoUSMPointersTest.SuccessHost/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoUSMPointersTest.SuccessDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoUSMPointersTest.SuccessShared/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetExecInfoCacheConfigTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_CACHE_CONFIG_DEFAULT -urKernelSetExecInfoCacheConfigTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_CACHE_CONFIG_LARGE_SLM -urKernelSetExecInfoCacheConfigTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_KERNEL_CACHE_CONFIG_LARGE_DATA -urKernelSetSpecializationConstantsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidSizeCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidValueSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidValueId/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsTest.InvalidValuePtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelSetSpecializationConstantsNegativeTest.Unsupported/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.Success2D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.Success3D/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleKernel/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleQueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidWorkDimension/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalOffset/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urKernelGetSuggestedLocalWorkSizeTest.InvalidSuggestedLocalWorkSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urKernelCreateTest.Success/* +urKernelCreateTest.InvalidNullHandleProgram/* +urKernelCreateTest.InvalidNullPointerName/* +urKernelCreateTest.InvalidNullPointerKernel/* +urKernelCreateTest.InvalidKernelName/* +urKernelCreateWithNativeHandleTest.Success/* +urKernelCreateWithNativeHandleTest.InvalidNullHandleContext/* +urKernelCreateWithNativeHandleTest.InvalidNullPointerNativeKernel/* +urKernelGetGroupInfoTest.Success/* +urKernelGetGroupInfoTest.InvalidNullHandleKernel/* +urKernelGetGroupInfoTest.InvalidNullHandleDevice/* +urKernelGetGroupInfoTest.InvalidEnumeration/* +urKernelGetGroupInfoSingleTest.CompileWorkGroupSizeEmpty/* +urKernelGetGroupInfoSingleTest.CompileMaxWorkGroupSizeEmpty/* +urKernelGetGroupInfoWgSizeTest.CompileWorkGroupSize/* +urKernelGetInfoTest.Success/* +urKernelGetInfoTest.InvalidNullHandleKernel/* +urKernelGetInfoTest.InvalidEnumeration/* +urKernelGetInfoTest.InvalidSizeZero/* +urKernelGetInfoTest.InvalidSizeSmall/* +urKernelGetInfoTest.InvalidNullPointerPropValue/* +urKernelGetInfoTest.InvalidNullPointerPropSizeRet/* +urKernelGetInfoSingleTest.KernelNameCorrect/* +urKernelGetInfoSingleTest.KernelContextCorrect/* +urKernelGetNativeHandleTest.Success/* +urKernelGetNativeHandleTest.InvalidNullHandleKernel/* +urKernelGetNativeHandleTest.InvalidNullPointerNativeKernel/* +urKernelGetSubGroupInfoTest.Success/* +urKernelGetSubGroupInfoTest.InvalidNullHandleKernel/* +urKernelGetSubGroupInfoTest.InvalidNullHandleDevice/* +urKernelGetSubGroupInfoTest.InvalidEnumeration/* +urKernelGetSubGroupInfoSingleTest.CompileNumSubgroupsIsZero/* +urKernelReleaseTest.Success/* +urKernelReleaseTest.KernelReleaseAfterProgramRelease/* +urKernelReleaseTest.InvalidNullHandleKernel/* +urKernelRetainTest.Success/* +urKernelRetainTest.InvalidNullHandleKernel/* +urKernelSetArgLocalTest.Success/* +urKernelSetArgLocalTest.InvalidNullHandleKernel/* +urKernelSetArgLocalTest.InvalidKernelArgumentIndex/* +urKernelSetArgMemObjTest.Success/* +urKernelSetArgMemObjTest.InvalidNullHandleKernel/* +urKernelSetArgMemObjTest.InvalidKernelArgumentIndex/* +urKernelSetArgPointerTest.SuccessHost/* +urKernelSetArgPointerTest.SuccessDevice/* +urKernelSetArgPointerTest.SuccessShared/* +urKernelSetArgPointerNegativeTest.InvalidNullHandleKernel/* +urKernelSetArgPointerNegativeTest.InvalidKernelArgumentIndex/* +urKernelSetArgSamplerTestWithParam.Success/* +urKernelSetArgSamplerTest.SuccessWithProps/* +urKernelSetArgSamplerTest.InvalidNullHandleKernel/* +urKernelSetArgSamplerTest.InvalidNullHandleArgValue/* +urKernelSetArgSamplerTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.Success/* +urKernelSetArgValueTest.InvalidNullHandleKernel/* +urKernelSetArgValueTest.InvalidNullPointerArgValue/* +urKernelSetArgValueTest.InvalidKernelArgumentIndex/* +urKernelSetArgValueTest.InvalidKernelArgumentSize/* +urKernelSetExecInfoTest.SuccessIndirectAccess/* +urKernelSetExecInfoTest.InvalidNullHandleKernel/* +urKernelSetExecInfoTest.InvalidEnumeration/* +urKernelSetExecInfoTest.InvalidNullPointerPropValue/* +urKernelSetExecInfoUSMPointersTest.SuccessHost/* +urKernelSetExecInfoUSMPointersTest.SuccessDevice/* +urKernelSetExecInfoUSMPointersTest.SuccessShared/* +urKernelSetExecInfoCacheConfigTest.Success/* +urKernelSetSpecializationConstantsTest.Success/* +urKernelSetSpecializationConstantsTest.InvalidNullHandleKernel/* +urKernelSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/* +urKernelSetSpecializationConstantsTest.InvalidSizeCount/* +urKernelSetSpecializationConstantsTest.InvalidValueSize/* +urKernelSetSpecializationConstantsTest.InvalidValueId/* +urKernelSetSpecializationConstantsTest.InvalidValuePtr/* +urKernelSetSpecializationConstantsNegativeTest.Unsupported/* +urKernelGetSuggestedLocalWorkSizeTest.Success/* +urKernelGetSuggestedLocalWorkSizeTest.Success2D/* +urKernelGetSuggestedLocalWorkSizeTest.Success3D/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleKernel/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidNullHandleQueue/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidWorkDimension/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalOffset/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidGlobalSize/* +urKernelGetSuggestedLocalWorkSizeTest.InvalidSuggestedLocalWorkSize/* diff --git a/test/conformance/kernel/kernel_adapter_opencl.match b/test/conformance/kernel/kernel_adapter_opencl.match index d65c8e51c8..4ccaeb9664 100644 --- a/test/conformance/kernel/kernel_adapter_opencl.match +++ b/test/conformance/kernel/kernel_adapter_opencl.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -urKernelGetInfoTest.Success/Intel_R__OpenCL_{{.*}}_UR_KERNEL_INFO_NUM_REGS +urKernelGetInfoTest.Success/* diff --git a/test/conformance/memory/memory_adapter_cuda.match b/test/conformance/memory/memory_adapter_cuda.match index bc36329e55..255559f575 100644 --- a/test/conformance/memory/memory_adapter_cuda.match +++ b/test/conformance/memory/memory_adapter_cuda.match @@ -1,7 +1,6 @@ -{{NONDETERMINISTIC}} -urMemImageCreateTest.InvalidSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urMemImageCremBufferCrateTestWith1DMemoryTypeParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_TYPE_IMAGE1D_ARRAY -{{OPT}}urMemImageCreateTestWith2DMemoryTypeParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_MEM_TYPE_IMAGE2D_ARRAY -urMemBufferCreateWithNativeHandleTest.Success/NVIDIA_CUDA_BACKEND___{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/NVIDIA_CUDA_BACKEND___{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/NVIDIA_CUDA_BACKEND___{{.*}} +urMemImageCreateTest.InvalidSize/* +{{OPT}}urMemImageCremBufferCrateTestWith1DMemoryTypeParam.Success/*__UR_MEM_TYPE_IMAGE1D_ARRAY +{{OPT}}urMemImageCreateTestWith2DMemoryTypeParam.Success/*__UR_MEM_TYPE_IMAGE2D_ARRAY +urMemBufferCreateWithNativeHandleTest.Success/* +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* diff --git a/test/conformance/memory/memory_adapter_hip.match b/test/conformance/memory/memory_adapter_hip.match index 4c83995b4f..a4181fcc8a 100644 --- a/test/conformance/memory/memory_adapter_hip.match +++ b/test/conformance/memory/memory_adapter_hip.match @@ -1,8 +1,6 @@ -{{NONDETERMINISTIC}} -urMemImageCreateTest.InvalidSize/AMD_HIP_BACKEND___{{.*}} -urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} -urMemImageGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}} -urMemBufferCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/AMD_HIP_BACKEND___{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/AMD_HIP_BACKEND___{{.*}} -urMemImageCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}} +urMemImageCreateTest.InvalidSize/* +urMemImageGetInfoTest.Success/* +urMemBufferCreateWithNativeHandleTest.Success/* +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* +urMemImageCreateWithNativeHandleTest.Success/* diff --git a/test/conformance/memory/memory_adapter_level_zero.match b/test/conformance/memory/memory_adapter_level_zero.match index bce63823cc..d137fc5ac1 100644 --- a/test/conformance/memory/memory_adapter_level_zero.match +++ b/test/conformance/memory/memory_adapter_level_zero.match @@ -1,38 +1,15 @@ -# Note: This file is only for use with cts_exe.py {{OPT}}urMemBufferMultiQueueMemBufferTest.WriteBack -urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__UR_MEM_FLAG_WRITE_ONLY -urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}__UR_MEM_FLAG_READ_ONLY -urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urMemImageCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_WRITE_ONLY +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_READ_ONLY +urMemBufferPartitionTest.InvalidValueCreateType/* +urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/* +{{OPT}}urMemImageCreateWithNativeHandleTest.Success/* +{{OPT}}urMemGetInfoImageTest.Success/*__UR_MEM_INFO_SIZE +{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/*__UR_IMAGE_CHANNEL_ORDER_RGBA__* + # These tests fail in the "Multi device testing" job, but pass in the hardware specific test {{OPT}}urMemImageCreateTest.InvalidImageDescStype/* {{OPT}}urMemImageCreateTest.InvalidSize/* {{OPT}}urMemImageCreateWithHostPtrFlagsTest.Success/* -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/*__UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.Success/*__UR_IMAGE_INFO_WIDTH +{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/* +{{OPT}}urMemImageGetInfoTest.Success/* diff --git a/test/conformance/memory/memory_adapter_level_zero_v2.match b/test/conformance/memory/memory_adapter_level_zero_v2.match index 3ff57fc1e5..d2f34a947d 100644 --- a/test/conformance/memory/memory_adapter_level_zero_v2.match +++ b/test/conformance/memory/memory_adapter_level_zero_v2.match @@ -1,291 +1,20 @@ -{{NONDETERMINISTIC}} -{{OPT}}urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_WRITE_ONLY -{{OPT}}urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_ONLY -{{OPT}}urMemBufferPartitionWithFlagsTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_FLAG_READ_WRITE -{{OPT}}urMemBufferPartitionTest.InvalidValueCreateType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_SIZE -{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_CONTEXT -{{OPT}}urMemGetInfoImageTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_MEM_INFO_REFERENCE_COUNT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_FORMAT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ELEMENT_SIZE -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_ROW_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_SLICE_PITCH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_WIDTH -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_HEIGHT -{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_IMAGE_INFO_DEPTH -{{OPT}}urMemBufferCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemImageCreateWithNativeHandleTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullHandle/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +{{OPT}}urMemBufferPartitionWithFlagsTest.Success/* +{{OPT}}urMemBufferPartitionTest.InvalidValueCreateType/* +{{OPT}}urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/* +{{OPT}}urMemGetInfoImageTest.Success/* +{{OPT}}urMemImageCreateTestWithImageFormatParam.Success/* +{{OPT}}urMemImageGetInfoTest.Success/* +{{OPT}}urMemImageGetInfoTest.InvalidNullHandleImage/* +{{OPT}}urMemImageGetInfoTest.InvalidEnumerationImageInfoType/* +{{OPT}}urMemImageGetInfoTest.InvalidSizeZero/* +{{OPT}}urMemImageGetInfoTest.InvalidSizeSmall/* +{{OPT}}urMemImageGetInfoTest.InvalidNullPointerParamValue/* +{{OPT}}urMemImageGetInfoTest.InvalidNullPointerPropSizeRet/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.Success/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/* +{{OPT}}urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/* +{{OPT}}urMemImageCreateWithNativeHandleTest.Success/* +{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullHandle/* +{{OPT}}urMemImageCreateWithNativeHandleTest.InvalidNullPointer/* diff --git a/test/conformance/memory/memory_adapter_native_cpu.match b/test/conformance/memory/memory_adapter_native_cpu.match index f22ebcc2e9..aafd22075c 100644 --- a/test/conformance/memory/memory_adapter_native_cpu.match +++ b/test/conformance/memory/memory_adapter_native_cpu.match @@ -1,243 +1,16 @@ -{{NONDETERMINISTIC}} -urMemBufferPartitionWithFlagsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_FLAG_WRITE_ONLY -urMemBufferPartitionWithFlagsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_FLAG_READ_ONLY -urMemBufferPartitionTest.InvalidValueCreateType/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemGetInfoTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_SIZE -urMemGetInfoTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_CONTEXT -urMemGetInfoTestWithParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_MEM_INFO_REFERENCE_COUNT -urMemGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_A__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_R__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RG__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_BGRA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ARGB__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_ABGR__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_INTENSITY__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_LUMINANCE__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RX__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGX__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_RGBX__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_INT_101010 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32 -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_HALF_FLOAT -urMemImageCreateTestWithImageFormatParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_IMAGE_CHANNEL_ORDER_SRGBA__UR_IMAGE_CHANNEL_TYPE_FLOAT -urMemReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemReleaseTest.CheckReferenceCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemRetainTest.CheckReferenceCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemBufferCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_WRITE_ONLY +urMemBufferPartitionWithFlagsTest.Success/*__UR_MEM_FLAG_READ_ONLY +urMemBufferPartitionTest.InvalidValueCreateType/* +urMemBufferPartitionTest.InvalidValueBufferCreateInfoOutOfBounds/* +urMemGetInfoTestWithParam.Success/* +urMemGetInfoTest.InvalidSizeSmall/* +urMemImageCreateTestWithImageFormatParam.Success/* +urMemReleaseTest.Success/* +urMemReleaseTest.CheckReferenceCount/* +urMemRetainTest.Success/* +urMemRetainTest.CheckReferenceCount/* +urMemBufferCreateWithNativeHandleTest.Success/* +urMemBufferCreateWithNativeHandleTest.SuccessWithOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/* +urMemBufferCreateWithNativeHandleTest.InvalidNullHandle/* +urMemBufferCreateWithNativeHandleTest.InvalidNullPointer/* diff --git a/test/conformance/program/program_adapter_cuda.match b/test/conformance/program/program_adapter_cuda.match index fac749462e..11bf1c3e67 100644 --- a/test/conformance/program/program_adapter_cuda.match +++ b/test/conformance/program/program_adapter_cuda.match @@ -1,14 +1,13 @@ -{{NONDETERMINISTIC}} -urProgramBuildTest.BuildFailure/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urProgramCreateWithILTest.Success/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/NVIDIA_CUDA_BACKEND___{{.*}} +urProgramBuildTest.BuildFailure/* +{{OPT}}urProgramCreateWithILTest.Success/* +{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/* +{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/* # This test flakily fails -{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.Success/NVIDIA_CUDA_BACKEND___{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/NVIDIA_CUDA_BACKEND___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueSize/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueId/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValuePtr/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/NVIDIA_CUDA_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.SingleCall/NVIDIA_CUDA_BACKEND___{{.*}}_ +{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/* +{{OPT}}urProgramSetSpecializationConstantsTest.Success/* +{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/* +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* +urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/* +urProgramSetMultipleSpecializationConstantsTest.SingleCall/* diff --git a/test/conformance/program/program_adapter_hip.match b/test/conformance/program/program_adapter_hip.match index 2f93f09660..69fe6ac1bb 100644 --- a/test/conformance/program/program_adapter_hip.match +++ b/test/conformance/program/program_adapter_hip.match @@ -1,19 +1,18 @@ -{{NONDETERMINISTIC}} -urProgramBuildTest.BuildFailure/AMD_HIP_BACKEND___{{.*}}_ +urProgramBuildTest.BuildFailure/* # HIP hasn't implemented urProgramCreateWithNativeHandleTest -{{OPT}}urProgramCreateWithNativeHandleTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urProgramCreateWithNativeHandleTest.Success/* # HIP doesn't expose kernel numbers or names -urProgramGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_NUM_KERNELS -urProgramGetInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_PROGRAM_INFO_KERNEL_NAMES +urProgramGetInfoTest.Success/*__UR_PROGRAM_INFO_NUM_KERNELS +urProgramGetInfoTest.Success/*__UR_PROGRAM_INFO_KERNEL_NAMES # HIP hasn't implemented urProgramLink -{{OPT}}urProgramLinkTest.Success/AMD_HIP_BACKEND___{{.*}}_ +{{OPT}}urProgramLinkTest.Success/* # Hip doesn't support specialization constants -urProgramSetSpecializationConstantsTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.UseDefaultValue/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueSize/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValueId/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValuePtr/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/AMD_HIP_BACKEND___{{.*}}_ -urProgramSetMultipleSpecializationConstantsTest.SingleCall/AMD_HIP_BACKEND___{{.*}}_ +urProgramSetSpecializationConstantsTest.Success/* +urProgramSetSpecializationConstantsTest.UseDefaultValue/* +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* +urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/* +urProgramSetMultipleSpecializationConstantsTest.SingleCall/* diff --git a/test/conformance/program/program_adapter_level_zero.match b/test/conformance/program/program_adapter_level_zero.match index bd7e269d9f..97d6869b81 100644 --- a/test/conformance/program/program_adapter_level_zero.match +++ b/test/conformance/program/program_adapter_level_zero.match @@ -1,4 +1,3 @@ -{{NONDETERMINISTIC}} -urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* diff --git a/test/conformance/program/program_adapter_level_zero_v2.match b/test/conformance/program/program_adapter_level_zero_v2.match index 892b7cfb51..97d6869b81 100644 --- a/test/conformance/program/program_adapter_level_zero_v2.match +++ b/test/conformance/program/program_adapter_level_zero_v2.match @@ -1,4 +1,3 @@ -{{NONDETERMINISTIC}} -urProgramSetSpecializationConstantsTest.InvalidValueSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}} -urProgramSetSpecializationConstantsTest.InvalidValueId/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urProgramSetSpecializationConstantsTest.InvalidValuePtr/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urProgramSetSpecializationConstantsTest.InvalidValueSize/* +urProgramSetSpecializationConstantsTest.InvalidValueId/* +urProgramSetSpecializationConstantsTest.InvalidValuePtr/* diff --git a/test/conformance/program/program_adapter_native_cpu.match b/test/conformance/program/program_adapter_native_cpu.match index 47163ce042..a0e19369a6 100644 --- a/test/conformance/program/program_adapter_native_cpu.match +++ b/test/conformance/program/program_adapter_native_cpu.match @@ -1,147 +1,78 @@ -{{NONDETERMINISTIC}} -{{OPT}}urProgramBuildTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.SuccessWithOptions/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramBuildTest.BuildFailure/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCompileTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCompileTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCompileTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerBinary/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerMetadata/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.InvalidSizePropertyCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithBinaryTest.BuildInvalidProgramBinary/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidNullHandle/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidNullPointerSource/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidSizeLength/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_STATUS -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_OPTIONS -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_LOG -{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_BUILD_INFO_BINARY_TYPE -{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidKernelName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionPointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleDevice/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidVariableName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariableName/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariablePointer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidSizeZero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_REFERENCE_COUNT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_CONTEXT -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_DEVICES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_IL -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARY_SIZES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_BINARIES -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_NUM_KERNELS -{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_PROGRAM_INFO_KERNEL_NAMES -{{OPT}}urProgramGetInfoSingleTest.NumDevicesIsNonzero/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesDeviceArray/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesContextNumDevices/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetNativeHandleTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetNativeHandleTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramGetNativeHandleTest.InvalidNullPointerNativeProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidNullPointerProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidNullPointerInputPrograms/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.InvalidSizeCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkTest.SetOutputOnZeroCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkErrorTest.LinkFailure/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramReleaseTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramRetainTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullHandleProgram/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidSizeCount/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueId/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValuePtr/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -{{OPT}}urProgramSetMultipleSpecializationConstantsTest.SingleCall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +{{OPT}}urProgramBuildTest.Success/* +{{OPT}}urProgramBuildTest.SuccessWithOptions/* +{{OPT}}urProgramBuildTest.InvalidNullHandleContext/* +{{OPT}}urProgramBuildTest.InvalidNullHandleProgram/* +{{OPT}}urProgramBuildTest.BuildFailure/* +{{OPT}}urProgramCompileTest.Success/* +{{OPT}}urProgramCompileTest.InvalidNullHandleContext/* +{{OPT}}urProgramCompileTest.InvalidNullHandleProgram/* +{{OPT}}urProgramCreateWithBinaryTest.Success/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleContext/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullHandleDevice/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerBinary/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerProgram/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidNullPointerMetadata/* +{{OPT}}urProgramCreateWithBinaryTest.InvalidSizePropertyCount/* +{{OPT}}urProgramCreateWithBinaryTest.BuildInvalidProgramBinary/* +{{OPT}}urProgramCreateWithILTest.Success/* +{{OPT}}urProgramCreateWithILTest.SuccessWithProperties/* +{{OPT}}urProgramCreateWithILTest.InvalidNullHandle/* +{{OPT}}urProgramCreateWithILTest.InvalidNullPointerSource/* +{{OPT}}urProgramCreateWithILTest.InvalidSizeLength/* +{{OPT}}urProgramCreateWithILTest.InvalidNullPointerProgram/* +{{OPT}}urProgramCreateWithILTest.BuildInvalidProgram/* +{{OPT}}urProgramCreateWithNativeHandleTest.Success/* +{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullHandleContext/* +{{OPT}}urProgramCreateWithNativeHandleTest.InvalidNullPointerProgram/* +{{OPT}}urProgramGetBuildInfoTest.Success/* +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetBuildInfoTest.InvalidNullHandleDevice/* +{{OPT}}urProgramGetBuildInfoTest.InvalidEnumeration/* +{{OPT}}urProgramGetBuildInfoSingleTest.LogIsNullTerminated/* +{{OPT}}urProgramGetFunctionPointerTest.Success/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidKernelName/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleDevice/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionName/* +{{OPT}}urProgramGetFunctionPointerTest.InvalidNullPointerFunctionPointer/* +{{OPT}}urProgramGetGlobalVariablePointerTest.Success/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleDevice/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidVariableName/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariableName/* +{{OPT}}urProgramGetGlobalVariablePointerTest.InvalidNullPointerVariablePointer/* +{{OPT}}urProgramGetInfoTest.Success/* +{{OPT}}urProgramGetInfoTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetInfoTest.InvalidEnumeration/* +{{OPT}}urProgramGetInfoTest.InvalidSizeZero/* +{{OPT}}urProgramGetInfoTest.InvalidSizeSmall/* +{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValue/* +{{OPT}}urProgramGetInfoTest.InvalidNullPointerPropValueRet/* +{{OPT}}urProgramGetInfoSingleTest.NumDevicesIsNonzero/* +{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesDeviceArray/* +{{OPT}}urProgramGetInfoSingleTest.NumDevicesMatchesContextNumDevices/* +{{OPT}}urProgramGetNativeHandleTest.Success/* +{{OPT}}urProgramGetNativeHandleTest.InvalidNullHandleProgram/* +{{OPT}}urProgramGetNativeHandleTest.InvalidNullPointerNativeProgram/* +{{OPT}}urProgramLinkTest.Success/* +{{OPT}}urProgramLinkTest.InvalidNullHandleContext/* +{{OPT}}urProgramLinkTest.InvalidNullPointerProgram/* +{{OPT}}urProgramLinkTest.InvalidNullPointerInputPrograms/* +{{OPT}}urProgramLinkTest.InvalidSizeCount/* +{{OPT}}urProgramLinkTest.SetOutputOnZeroCount/* +{{OPT}}urProgramLinkErrorTest.LinkFailure/* +{{OPT}}urProgramLinkErrorTest.SetOutputOnLinkError/* +{{OPT}}urProgramReleaseTest.Success/* +{{OPT}}urProgramReleaseTest.InvalidNullHandleProgram/* +{{OPT}}urProgramRetainTest.Success/* +{{OPT}}urProgramRetainTest.InvalidNullHandleProgram/* +{{OPT}}urProgramSetSpecializationConstantsTest.Success/* +{{OPT}}urProgramSetSpecializationConstantsTest.UseDefaultValue/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullHandleProgram/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidNullPointerSpecConstants/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidSizeCount/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueSize/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValueId/* +{{OPT}}urProgramSetSpecializationConstantsTest.InvalidValuePtr/* +{{OPT}}urProgramSetMultipleSpecializationConstantsTest.MultipleCalls/* +{{OPT}}urProgramSetMultipleSpecializationConstantsTest.SingleCall/* diff --git a/test/conformance/queue/queue_adapter_native_cpu.match b/test/conformance/queue/queue_adapter_native_cpu.match index 5d39450e12..9ae8f7c03a 100644 --- a/test/conformance/queue/queue_adapter_native_cpu.match +++ b/test/conformance/queue/queue_adapter_native_cpu.match @@ -1,41 +1,14 @@ -{{NONDETERMINISTIC}} -urQueueCreateTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueCreateTest.CheckContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PROFILING_ENABLE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE_DEFAULT -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_DISCARD_EVENTS -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_LOW -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_HIGH -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_BATCHED -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_USE_DEFAULT_STREAM -urQueueCreateWithParamTest.SuccessWithProperties/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PROFILING_ENABLE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_ON_DEVICE_DEFAULT -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_DISCARD_EVENTS -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_LOW -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_PRIORITY_HIGH -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_BATCHED -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_USE_DEFAULT_STREAM -urQueueCreateWithParamTest.MatchingDeviceHandles/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_FLAG_SYNC_WITH_DEFAULT_STREAM -urQueueFlushTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_CONTEXT -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_FLAGS -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_REFERENCE_COUNT -urQueueGetInfoTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_EMPTY -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_CONTEXT -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_DEVICE_DEFAULT -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_FLAGS -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_REFERENCE_COUNT -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_SIZE -urQueueGetInfoDeviceQueueTestWithInfoParam.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_QUEUE_INFO_EMPTY -urQueueGetInfoTest.InvalidSizeSmall/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueRetainTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urQueueReleaseTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} +urQueueCreateTest.Success/* +urQueueCreateTest.CheckContext/* +urQueueCreateWithParamTest.SuccessWithProperties/* +urQueueCreateWithParamTest.MatchingDeviceHandles/* +urQueueFlushTest.Success/* +urQueueGetInfoTestWithInfoParam.Success/*__UR_QUEUE_INFO_CONTEXT +urQueueGetInfoTestWithInfoParam.Success/*__UR_QUEUE_INFO_DEVICE +urQueueGetInfoTestWithInfoParam.Success/*__UR_QUEUE_INFO_FLAGS +urQueueGetInfoTestWithInfoParam.Success/*__UR_QUEUE_INFO_REFERENCE_COUNT +urQueueGetInfoTestWithInfoParam.Success/*__UR_QUEUE_INFO_EMPTY +urQueueGetInfoDeviceQueueTestWithInfoParam.Success/* +urQueueGetInfoTest.InvalidSizeSmall/* +urQueueRetainTest.Success/* +urQueueReleaseTest.Success/* diff --git a/test/conformance/sampler/sampler_adapter_level_zero.match b/test/conformance/sampler/sampler_adapter_level_zero.match index bf65bfcea6..db656e2b95 100644 --- a/test/conformance/sampler/sampler_adapter_level_zero.match +++ b/test/conformance/sampler/sampler_adapter_level_zero.match @@ -1,9 +1,4 @@ -{{NONDETERMINISTIC}} -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_REFERENCE_COUNT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_CONTEXT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_NORMALIZED_COORDS -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_ADDRESSING_MODE -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_FILTER_MODE -{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urSamplerGetInfoTestWithParam.Success/* +{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/* +{{OPT}}urSamplerReleaseTest.Success/* +{{OPT}}urSamplerRetainTest.Success/* diff --git a/test/conformance/sampler/sampler_adapter_level_zero_v2.match b/test/conformance/sampler/sampler_adapter_level_zero_v2.match index bf65bfcea6..db656e2b95 100644 --- a/test/conformance/sampler/sampler_adapter_level_zero_v2.match +++ b/test/conformance/sampler/sampler_adapter_level_zero_v2.match @@ -1,9 +1,4 @@ -{{NONDETERMINISTIC}} -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_REFERENCE_COUNT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_CONTEXT -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_NORMALIZED_COORDS -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_ADDRESSING_MODE -{{OPT}}urSamplerGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_SAMPLER_INFO_FILTER_MODE -{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -{{OPT}}urSamplerRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +{{OPT}}urSamplerGetInfoTestWithParam.Success/* +{{OPT}}urSamplerGetInfoTest.InvalidSizePropSizeSmall/* +{{OPT}}urSamplerReleaseTest.Success/* +{{OPT}}urSamplerRetainTest.Success/* diff --git a/test/conformance/usm/usm_adapter_cuda.match b/test/conformance/usm/usm_adapter_cuda.match index a9f7c37b87..5460c48661 100644 --- a/test/conformance/usm/usm_adapter_cuda.match +++ b/test/conformance/usm/usm_adapter_cuda.match @@ -1,8 +1,4 @@ -{{NONDETERMINISTIC}} -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMHostAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled -{{OPT}}urUSMPoolCreateTest.SuccessWithFlag/NVIDIA_CUDA_BACKEND___{{.*}}_ -{{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolEnabled -{{OPT}}urUSMSharedAllocTest.InvalidUSMSize/NVIDIA_CUDA_BACKEND___{{.*}}___UsePoolDisabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/* +{{OPT}}urUSMHostAllocTest.InvalidUSMSize/* +{{OPT}}urUSMPoolCreateTest.SuccessWithFlag/* +{{OPT}}urUSMSharedAllocTest.InvalidUSMSize/* diff --git a/test/conformance/usm/usm_adapter_hip.match b/test/conformance/usm/usm_adapter_hip.match index 5a1be3c9d4..d1ae0952f4 100644 --- a/test/conformance/usm/usm_adapter_hip.match +++ b/test/conformance/usm/usm_adapter_hip.match @@ -1,85 +1,84 @@ -{{NONDETERMINISTIC}} -urUSMDeviceAllocTest.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.SuccessWithDescriptors/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidNullPtrResult/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolDisabled -urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_2048 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_8 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_512 -urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_2048 -urUSMGetMemAllocInfoTest.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_ALLOC_INFO_POOL -urUSMHostAllocTest.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.SuccessWithDescriptors/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidNullPtrMem/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolDisabled -urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_2048 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_8 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_512 -urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_2048 -urUSMPoolCreateTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolCreateTest.SuccessWithFlag/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_POOL_INFO_CONTEXT -urUSMPoolGetInfoTestWithInfoParam.Success/AMD_HIP_BACKEND___{{.*}}___UR_USM_POOL_INFO_REFERENCE_COUNT -urUSMPoolGetInfoTest.InvalidNullHandlePool/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidEnumerationProperty/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeZero/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidSizeTooSmall/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropValue/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolDestroyTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolDestroyTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolRetainTest.Success/AMD_HIP_BACKEND___{{.*}}_ -urUSMPoolRetainTest.InvalidNullHandlePool/AMD_HIP_BACKEND___{{.*}}_ -urUSMSharedAllocTest.Success/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithDescriptors/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.SuccessWithMultipleAdvices/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidNullHandleContext/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidNullHandleDevice/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidNullPtrMem/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/AMD_HIP_BACKEND___{{.*}}___UsePoolDisabled -urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_4_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_8_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_16_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_32_2048 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_8 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_512 -urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/AMD_HIP_BACKEND___{{.*}}___UsePoolEnabled_64_2048 +urUSMDeviceAllocTest.Success/*__UsePoolEnabled +urUSMDeviceAllocTest.SuccessWithDescriptors/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullHandleContext/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullHandleDevice/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidNullPtrResult/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidUSMSize/*__UsePoolEnabled +urUSMDeviceAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMDeviceAllocTest.InvalidValueAlignPowerOfTwo/*__UsePoolEnabled +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_2048 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_8 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_512 +urUSMDeviceAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_2048 +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_POOL +urUSMHostAllocTest.Success/*__UsePoolEnabled +urUSMHostAllocTest.SuccessWithDescriptors/*__UsePoolEnabled +urUSMHostAllocTest.InvalidNullHandleContext/*__UsePoolEnabled +urUSMHostAllocTest.InvalidNullPtrMem/*__UsePoolEnabled +urUSMHostAllocTest.InvalidUSMSize/*__UsePoolEnabled +urUSMHostAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMHostAllocTest.InvalidValueAlignPowerOfTwo/*__UsePoolEnabled +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_2048 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_8 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_512 +urUSMHostAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_2048 +urUSMPoolCreateTest.Success/* +urUSMPoolCreateTest.SuccessWithFlag/* +urUSMPoolGetInfoTestWithInfoParam.Success/*__UR_USM_POOL_INFO_CONTEXT +urUSMPoolGetInfoTestWithInfoParam.Success/*__UR_USM_POOL_INFO_REFERENCE_COUNT +urUSMPoolGetInfoTest.InvalidNullHandlePool/* +urUSMPoolGetInfoTest.InvalidEnumerationProperty/* +urUSMPoolGetInfoTest.InvalidSizeZero/* +urUSMPoolGetInfoTest.InvalidSizeTooSmall/* +urUSMPoolGetInfoTest.InvalidNullPointerPropValue/* +urUSMPoolGetInfoTest.InvalidNullPointerPropSizeRet/* +urUSMPoolDestroyTest.Success/* +urUSMPoolDestroyTest.InvalidNullHandleContext/* +urUSMPoolRetainTest.Success/* +urUSMPoolRetainTest.InvalidNullHandlePool/* +urUSMSharedAllocTest.Success/*__UsePoolEnabled +urUSMSharedAllocTest.SuccessWithDescriptors/*__UsePoolEnabled +urUSMSharedAllocTest.SuccessWithMultipleAdvices/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleContext/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidNullHandleDevice/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidNullPtrMem/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidUSMSize/*__UsePoolEnabled +urUSMSharedAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMSharedAllocTest.InvalidValueAlignPowerOfTwo/*__UsePoolEnabled +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_4_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_8_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_16_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_32_2048 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_8 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_512 +urUSMSharedAllocAlignmentTest.SuccessAlignedAllocations/*__UsePoolEnabled_64_2048 diff --git a/test/conformance/usm/usm_adapter_level_zero.match b/test/conformance/usm/usm_adapter_level_zero.match index 6f2d5ab1f9..201c351120 100644 --- a/test/conformance/usm/usm_adapter_level_zero.match +++ b/test/conformance/usm/usm_adapter_level_zero.match @@ -1,3 +1 @@ -{{NONDETERMINISTIC}} -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolEnabled -{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UsePoolDisabled +{{OPT}}urUSMDeviceAllocTest.InvalidUSMSize/* diff --git a/test/conformance/usm/usm_adapter_level_zero_v2.match b/test/conformance/usm/usm_adapter_level_zero_v2.match index 85f9c4e5c0..6c9b1a07a0 100644 --- a/test/conformance/usm/usm_adapter_level_zero_v2.match +++ b/test/conformance/usm/usm_adapter_level_zero_v2.match @@ -1,8 +1,4 @@ -{{NONDETERMINISTIC}} -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMDeviceAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMGetMemAllocInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UR_USM_ALLOC_INFO_POOL -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMHostAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolEnabled -urUSMSharedAllocTest.InvalidUSMSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}____UsePoolDisabled +urUSMDeviceAllocTest.InvalidUSMSize/* +urUSMGetMemAllocInfoTest.Success/*___UR_USM_ALLOC_INFO_POOL +urUSMHostAllocTest.InvalidUSMSize/* +urUSMSharedAllocTest.InvalidUSMSize/* diff --git a/test/conformance/usm/usm_adapter_native_cpu.match b/test/conformance/usm/usm_adapter_native_cpu.match index a0de04a27b..6ef26e2bdf 100644 --- a/test/conformance/usm/usm_adapter_native_cpu.match +++ b/test/conformance/usm/usm_adapter_native_cpu.match @@ -1,18 +1,17 @@ -{{NONDETERMINISTIC}} -urUSMDeviceAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMDeviceAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMFreeTest.SuccessDeviceAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMFreeTest.SuccessHostAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMFreeTest.SuccessSharedAlloc/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_TYPE -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_BASE_PTR -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_SIZE -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_DEVICE -urUSMGetMemAllocInfoTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UR_USM_ALLOC_INFO_POOL -urUSMGetMemAllocInfoNegativeTest.InvalidNullHandleContext/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoNegativeTest.InvalidNullPointerMem/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoNegativeTest.InvalidEnumeration/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMGetMemAllocInfoNegativeTest.InvalidValuePropSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}} -urUSMHostAllocTest.Success/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMHostAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled -urUSMSharedAllocTest.InvalidUSMSize/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}__UsePoolDisabled +urUSMDeviceAllocTest.Success/*__UsePoolDisabled +urUSMDeviceAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMFreeTest.SuccessDeviceAlloc/* +urUSMFreeTest.SuccessHostAlloc/* +urUSMFreeTest.SuccessSharedAlloc/* +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_TYPE +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_BASE_PTR +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_SIZE +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_DEVICE +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_POOL +urUSMGetMemAllocInfoNegativeTest.InvalidNullHandleContext/* +urUSMGetMemAllocInfoNegativeTest.InvalidNullPointerMem/* +urUSMGetMemAllocInfoNegativeTest.InvalidEnumeration/* +urUSMGetMemAllocInfoNegativeTest.InvalidValuePropSize/* +urUSMHostAllocTest.Success/*__UsePoolDisabled +urUSMHostAllocTest.InvalidUSMSize/*__UsePoolDisabled +urUSMSharedAllocTest.InvalidUSMSize/*__UsePoolDisabled diff --git a/test/conformance/usm/usm_adapter_opencl.match b/test/conformance/usm/usm_adapter_opencl.match index 3868e5be1b..2fffa9b0ed 100644 --- a/test/conformance/usm/usm_adapter_opencl.match +++ b/test/conformance/usm/usm_adapter_opencl.match @@ -1,2 +1 @@ -{{NONDETERMINISTIC}} -urUSMGetMemAllocInfoTest.Success/Intel_R__OpenCL___{{.*}}___UR_USM_ALLOC_INFO_POOL +urUSMGetMemAllocInfoTest.Success/*__UR_USM_ALLOC_INFO_POOL diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match index bf8c7ce279..627d8eaa78 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero.match @@ -1,11 +1,5 @@ -{{NONDETERMINISTIC}} -{{OPT}}urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -{{OPT}}urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -{{OPT}}urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 +{{OPT}}urPhysicalMemCreateTest.Success/*__3 +{{OPT}}urPhysicalMemCreateTest.Success/*__7 +{{OPT}}urPhysicalMemCreateTest.Success/*__12 +urPhysicalMemCreateTest.Success/*__44 +urPhysicalMemCreateTest.InvalidSize/* diff --git a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match index 1c83fd1e2a..ec7be06f7e 100644 --- a/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match +++ b/test/conformance/virtual_memory/virtual_memory_adapter_level_zero_v2.match @@ -1,84 +1,34 @@ -{{NONDETERMINISTIC}} -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullHandleDevice/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___3 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___7 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___12 -urPhysicalMemCreateTest.InvalidSize/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___44 -urPhysicalMemReleaseTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urPhysicalMemReleaseTest.InvalidNullHandlePhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urPhysicalMemRetainTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urPhysicalMemRetainTest.InvalidNullHandlePhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemFreeTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemFreeTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemFreeTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGetInfoTestWithParam.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_VIRTUAL_MEM_INFO_ACCESS_MODE -urVirtualMemGetInfoTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGetInfoTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGetInfoTest.InvalidEnumerationInfo/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemGranularityGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM -urVirtualMemGranularityGetInfoTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED -urVirtualMemGranularityGetInfoNegativeTest.InvalidSizePropSizeSmall/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidNullHandlePhysicalMem/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemMapTest.InvalidEnumerationFlags/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessNoStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___100000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___4 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___8 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___16 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___32 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___64 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___128 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___256 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___512 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___1024 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___2048 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___5000 -urVirtualMemReserveTestWithParam.SuccessWithStartPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}___100000 -urVirtualMemReserveTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemReserveTest.InvalidNullPointer/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemSetAccessTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemSetAccessTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemUnmapTest.Success/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullHandleContext/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ -urVirtualMemUnmapTest.InvalidNullPointerStart/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ +urPhysicalMemCreateTest.Success/* +urPhysicalMemCreateTest.InvalidNullHandleContext/* +urPhysicalMemCreateTest.InvalidNullHandleDevice/* +urPhysicalMemCreateTest.InvalidNullPointerPhysicalMem/* +urPhysicalMemCreateTest.InvalidSize/* +urPhysicalMemReleaseTest.Success/* +urPhysicalMemReleaseTest.InvalidNullHandlePhysicalMem/* +urPhysicalMemRetainTest.Success/* +urPhysicalMemRetainTest.InvalidNullHandlePhysicalMem/* +urVirtualMemFreeTest.Success/* +urVirtualMemFreeTest.InvalidNullHandleContext/* +urVirtualMemFreeTest.InvalidNullPointerStart/* +urVirtualMemGetInfoTestWithParam.Success/*__UR_VIRTUAL_MEM_INFO_ACCESS_MODE +urVirtualMemGetInfoTest.InvalidNullHandleContext/* +urVirtualMemGetInfoTest.InvalidNullPointerStart/* +urVirtualMemGetInfoTest.InvalidEnumerationInfo/* +urVirtualMemGranularityGetInfoTest.Success/*__UR_VIRTUAL_MEM_GRANULARITY_INFO_MINIMUM +urVirtualMemGranularityGetInfoTest.Success/*__UR_VIRTUAL_MEM_GRANULARITY_INFO_RECOMMENDED +urVirtualMemGranularityGetInfoNegativeTest.InvalidSizePropSizeSmall/* +urVirtualMemMapTest.Success/* +urVirtualMemMapTest.InvalidNullHandleContext/* +urVirtualMemMapTest.InvalidNullHandlePhysicalMem/* +urVirtualMemMapTest.InvalidNullPointerStart/* +urVirtualMemMapTest.InvalidEnumerationFlags/* +urVirtualMemReserveTestWithParam.SuccessNoStartPointer/* +urVirtualMemReserveTestWithParam.SuccessWithStartPointer/* +urVirtualMemReserveTest.InvalidNullHandleContext/* +urVirtualMemReserveTest.InvalidNullPointer/* +urVirtualMemSetAccessTest.Success/* +urVirtualMemSetAccessTest.InvalidNullHandleContext/* +urVirtualMemSetAccessTest.InvalidNullPointerStart/* +urVirtualMemUnmapTest.Success/* +urVirtualMemUnmapTest.InvalidNullHandleContext/* +urVirtualMemUnmapTest.InvalidNullPointerStart/* From 6ccc003f1974cd4020821b84ac76fe5a31cb21e9 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 13 Nov 2024 17:54:32 +0100 Subject: [PATCH 37/37] add llama.cpp benchmark --- scripts/benchmarks/benches/base.py | 22 ++- scripts/benchmarks/benches/llamacpp.py | 196 +++++++++++++++++++++++++ scripts/benchmarks/benches/velocity.py | 4 +- scripts/benchmarks/main.py | 7 +- scripts/benchmarks/utils/utils.py | 25 +++- 5 files changed, 235 insertions(+), 19 deletions(-) create mode 100644 scripts/benchmarks/benches/llamacpp.py diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index 3871938bfd..84e1b8287c 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -8,7 +8,7 @@ from pathlib import Path from .result import Result from .options import options -from utils.utils import run +from utils.utils import download, run import urllib.request import tarfile @@ -26,7 +26,7 @@ def get_adapter_full_path(): assert False, \ f"could not find adapter file {adapter_path} (and in similar lib paths)" - def run_bench(self, command, env_vars): + def run_bench(self, command, env_vars, ld_library=[]): env_vars_with_forced_adapter = env_vars.copy() if options.ur is not None: env_vars_with_forced_adapter.update( @@ -36,7 +36,8 @@ def run_bench(self, command, env_vars): command=command, env_vars=env_vars_with_forced_adapter, add_sycl=True, - cwd=options.benchmark_cwd + cwd=options.benchmark_cwd, + ld_library=ld_library ).stdout.decode() def create_data_path(self, name): @@ -49,17 +50,9 @@ def create_data_path(self, name): return data_path - def download_untar(self, name, url, file): + def download(self, name, url, file, untar = False): self.data_path = self.create_data_path(name) - data_file = os.path.join(self.data_path, file) - if not Path(data_file).exists(): - print(f"{data_file} does not exist, downloading") - urllib.request.urlretrieve(url, data_file) - file = tarfile.open(data_file) - file.extractall(self.data_path) - file.close() - else: - print(f"{data_file} exists, skipping...") + return download(self.data_path, url, file, True) def name(self): raise NotImplementedError() @@ -79,6 +72,9 @@ def run(self, env_vars) -> list[Result]: def teardown(self): raise NotImplementedError() + def ignore_iterations(self): + return False + class Suite: def benchmarks(self) -> list[Benchmark]: raise NotImplementedError() diff --git a/scripts/benchmarks/benches/llamacpp.py b/scripts/benchmarks/benches/llamacpp.py new file mode 100644 index 0000000000..3ff7963bd1 --- /dev/null +++ b/scripts/benchmarks/benches/llamacpp.py @@ -0,0 +1,196 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import csv +import io +from pathlib import Path +import re +import shutil +from utils.utils import download, git_clone +from .base import Benchmark, Suite +from .result import Result +from utils.utils import run, create_build_path +from .options import options +import os + +class OneAPI: + # random unique number for benchmark oneAPI installation + ONEAPI_BENCHMARK_INSTANCE_ID = 98765 + def __init__(self, directory): + self.oneapi_dir = os.path.join(directory, 'oneapi') + Path(self.oneapi_dir).mkdir(parents=True, exist_ok=True) + # delete if some option is set? + + # can we just hardcode these links? + self.install_package('dnnl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/87e117ab-039b-437d-9c80-dcd5c9e675d5/intel-onednn-2025.0.0.862_offline.sh') + self.install_package('mkl', 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940_offline.sh') + return + + def install_package(self, name, url): + package_path = os.path.join(self.oneapi_dir, name) + if Path(package_path).exists(): + print(f"{package_path} exists, skipping installing oneAPI package {name}...") + return + + package = download(self.oneapi_dir, url, f'package_{name}.sh') + try: + print(f"installing f{name}") + run(f"sh {package} -a -s --eula accept --install-dir {self.oneapi_dir} --instance f{self.ONEAPI_BENCHMARK_INSTANCE_ID}") + except: + print("oneAPI installation likely exists already") + return + print(f"f{name} installation complete") + + def package_dir(self, package, dir): + return os.path.join(self.oneapi_dir, package, 'latest', dir) + + def package_cmake(self, package): + package_lib = self.package_dir(package, 'lib') + return os.path.join(package_lib, 'cmake', package) + + def mkl_lib(self): + return self.package_dir('mkl', 'lib') + + def mkl_include(self): + return self.package_dir('mkl', 'include') + + def mkl_cmake(self): + return self.package_cmake('mkl') + + def dnn_lib(self): + return self.package_dir('dnnl', 'lib') + + def dnn_include(self): + return self.package_dir('dnnl', 'include') + + def dnn_cmake(self): + return self.package_cmake('dnnl') + + def tbb_lib(self): + return self.package_dir('tbb', 'lib') + + def tbb_cmake(self): + return self.package_cmake('tbb') + + def compiler_lib(self): + return self.package_dir('compiler', 'lib') + + def ld_libraries(self): + return [ + self.compiler_lib(), + self.mkl_lib(), + self.tbb_lib(), + self.dnn_lib() + ] + +class LlamaCppBench(Suite): + def __init__(self, directory): + if options.sycl is None: + return + + self.directory = directory + + def setup(self): + if options.sycl is None: + return + + repo_path = git_clone(self.directory, "llamacpp-repo", "https://github.com/ggerganov/llama.cpp", "1ee9eea094fe5846c7d8d770aa7caa749d246b23") + + self.models_dir = os.path.join(self.directory, 'models') + Path(self.models_dir).mkdir(parents=True, exist_ok=True) + + self.model = download(self.models_dir, "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf", "Phi-3-mini-4k-instruct-q4.gguf") + + self.oneapi = OneAPI(self.directory) + + self.build_path = create_build_path(self.directory, 'llamacpp-build') + + configure_command = [ + "cmake", + f"-B {self.build_path}", + f"-S {repo_path}", + f"-DCMAKE_BUILD_TYPE=Release", + f"-DGGML_SYCL=ON", + f"-DCMAKE_C_COMPILER=clang", + f"-DCMAKE_CXX_COMPILER=clang++", + f"-DDNNL_DIR={self.oneapi.dnn_cmake()}", + f"-DTBB_DIR={self.oneapi.tbb_cmake()}", + f'-DCMAKE_CXX_FLAGS=-I"{self.oneapi.mkl_include()}"', + f'-DCMAKE_SHARED_LINKER_FLAGS=-L{self.oneapi.compiler_lib()} -L{self.oneapi.mkl_lib()}' + ] + print(f"{self.__class__.__name__}: Run {configure_command}") + run(configure_command, add_sycl=True) + print(f"{self.__class__.__name__}: Run cmake --build {self.build_path} -j") + run(f"cmake --build {self.build_path} -j", add_sycl=True, ld_library=self.oneapi.ld_libraries()) + + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + return [ + LlamaBench(self) + ] + +class LlamaBench(Benchmark): + def __init__(self, bench): + self.bench = bench + super().__init__(bench.directory) + + def unit(self): + return "token/s" + + def setup(self): + self.benchmark_bin = os.path.join(self.bench.build_path, 'bin', 'llama-bench') + + def name(self): + return f"llama.cpp" + + def lower_is_better(self): + return False + + def ignore_iterations(self): + return True + + def run(self, env_vars) -> list[Result]: + command = [ + f"{self.benchmark_bin}", + "--output", "csv", + "-n", "128", + "-p", "512", + "-b", "128,256,512", + "--numa", "isolate", + "-t", "56", # TODO: use only as many threads as numa node 0 has cpus + "--model", f"{self.bench.model}", + ] + + result = self.run_bench(command, env_vars, ld_library=self.bench.oneapi.ld_libraries()) + parsed = self.parse_output(result) + results = [] + for r in parsed: + (extra_label, mean) = r + label = f"{self.name()} {extra_label}" + results.append(Result(label=label, value=mean, command=command, env=env_vars, stdout=result)) + return results + + def parse_output(self, output): + csv_file = io.StringIO(output) + reader = csv.DictReader(csv_file) + + results = [] + for row in reader: + try: + n_batch = row["n_batch"] + avg_ts = float(row["avg_ts"]) + n_prompt = int(row["n_prompt"]) + label = "Prompt Processing" if n_prompt != 0 else "Text Generation" + label += f" Batched {n_batch}" + results.append((label, avg_ts)) + except KeyError as e: + raise ValueError(f"Error parsing output: {e}") + + return results + + def teardown(self): + return diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index 38efa42f56..856fd993db 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -140,7 +140,7 @@ def __init__(self, vb: VelocityBench): super().__init__("sobel_filter", "sobel_filter", vb) def download_deps(self): - self.download_untar("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz") + self.download("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz", untar=True) return def name(self): @@ -203,7 +203,7 @@ def __init__(self, vb: VelocityBench): super().__init__("easywave", "easyWave_sycl", vb) def download_deps(self): - self.download_untar("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz") + self.download("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz", untar=True) def name(self): return "Velocity-Bench Easywave" diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index a31268a240..9dd77f14b2 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -8,6 +8,7 @@ from benches.compute import * from benches.velocity import VelocityBench from benches.syclbench import * +from benches.llamacpp import * from benches.test import TestSuite from benches.options import Compare, options from output_markdown import generate_markdown @@ -27,7 +28,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): suites = [ ComputeBench(directory), VelocityBench(directory), - SyclBench(directory) + SyclBench(directory), + LlamaCppBench(directory), #TestSuite() ] if not options.dry_run else [] @@ -64,7 +66,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): try: merged_env_vars = {**additional_env_vars} iteration_results = [] - for iter in range(options.iterations): + iterations = options.iterations if not benchmark.ignore_iterations() else 1 + for iter in range(iterations): print(f"running {benchmark.name()}, iteration {iter}... ", end='', flush=True) bench_results = benchmark.run(merged_env_vars) if bench_results is not None: diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py index 0cd10b9513..d077184e5c 100644 --- a/scripts/benchmarks/utils/utils.py +++ b/scripts/benchmarks/utils/utils.py @@ -5,17 +5,24 @@ import os import shutil -import subprocess # nosec B404 +import subprocess + +import tarfile +import urllib # nosec B404 from benches.options import options from pathlib import Path -def run(command, env_vars={}, cwd=None, add_sycl=False): +def run(command, env_vars={}, cwd=None, add_sycl=False, ld_library=[]): try: if isinstance(command, str): command = command.split() env = os.environ.copy() + for ldlib in ld_library: + env['LD_LIBRARY_PATH'] = ldlib + os.pathsep + env.get('LD_LIBRARY_PATH', '') + + # order is important, we want provided sycl rt libraries to be first if add_sycl: sycl_bin_path = os.path.join(options.sycl, 'bin') env['PATH'] = sycl_bin_path + os.pathsep + env.get('PATH', '') @@ -23,6 +30,7 @@ def run(command, env_vars={}, cwd=None, add_sycl=False): env['LD_LIBRARY_PATH'] = sycl_lib_path + os.pathsep + env.get('LD_LIBRARY_PATH', '') env.update(env_vars) + result = subprocess.run(command, cwd=cwd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, timeout=options.timeout) # nosec B603 if options.verbose: @@ -88,3 +96,16 @@ def create_build_path(directory, name): Path(build_path).mkdir(parents=True, exist_ok=True) return build_path + +def download(dir, url, file, untar = False): + data_file = os.path.join(dir, file) + if not Path(data_file).exists(): + print(f"{data_file} does not exist, downloading") + urllib.request.urlretrieve(url, data_file) + if untar: + file = tarfile.open(data_file) + file.extractall(dir) + file.close() + else: + print(f"{data_file} exists, skipping...") + return data_file