From 697d6f75faa078e9d6ac9bb35f879ef855d5b78d Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 30 Sep 2024 10:06:52 +0100 Subject: [PATCH] Add initial spec for async alloc entry points First basic work in progress spec. --- include/ur_api.h | 226 ++++++++++- include/ur_api_funcs.def | 4 + include/ur_ddi.h | 50 +++ include/ur_print.h | 48 +++ include/ur_print.hpp | 364 ++++++++++++++++++ scripts/core/EXP-ASYNC-ALLOC.rst | 78 ++++ scripts/core/exp-async-alloc.yml | 216 +++++++++++ scripts/core/registry.yml | 12 + source/adapters/cuda/usm.cpp | 120 ++++++ source/adapters/hip/usm.cpp | 120 ++++++ .../level_zero/ur_interface_loader.cpp | 4 + .../level_zero/ur_interface_loader.hpp | 20 + source/adapters/level_zero/usm.cpp | 120 ++++++ source/adapters/level_zero/v2/queue_api.cpp | 35 ++ source/adapters/level_zero/v2/queue_api.hpp | 18 + .../v2/queue_immediate_in_order.cpp | 56 +++ .../v2/queue_immediate_in_order.hpp | 19 + source/adapters/mock/ur_mockddi.cpp | 261 +++++++++++++ source/adapters/native_cpu/usm.cpp | 61 +++ source/adapters/opencl/usm.cpp | 45 +++ source/common/stype_map_helpers.def | 2 + source/loader/layers/tracing/ur_trcddi.cpp | 233 +++++++++++ source/loader/layers/validation/ur_valddi.cpp | 277 ++++++++++++- source/loader/loader.def.in | 10 + source/loader/loader.map.in | 10 + source/loader/ur_ldrddi.cpp | 284 ++++++++++++++ source/loader/ur_libapi.cpp | 188 ++++++++- source/loader/ur_print.cpp | 49 +++ source/ur_api.cpp | 153 +++++++- tools/urinfo/urinfo.hpp | 3 + 30 files changed, 3082 insertions(+), 4 deletions(-) create mode 100644 scripts/core/EXP-ASYNC-ALLOC.rst create mode 100644 scripts/core/exp-async-alloc.yml diff --git a/include/ur_api.h b/include/ur_api.h index 60d6fc2f70..854c3b70ec 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -230,6 +230,10 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP = 243, ///< Enumerator for ::urCommandBufferUpdateSignalEventExp UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp + UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP = 246, ///< Enumerator for ::urEnqueueUSMDeviceAllocExp + UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP = 247, ///< Enumerator for ::urEnqueueUSMSharedAllocExp + UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP = 248, ///< Enumerator for ::urEnqueueUSMHostAllocExp + UR_FUNCTION_ENQUEUE_USM_FREE_EXP = 249, ///< Enumerator for ::urEnqueueUSMFreeExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -288,6 +292,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES = 0x2006, ///< ::ur_exp_sampler_cubemap_properties_t UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION = 0x2007, ///< ::ur_exp_image_copy_region_t UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES = 0x3000, ///< ::ur_exp_enqueue_native_command_properties_t + UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES = 0x3001, ///< ::ur_exp_enqueue_usm_alloc_properties_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1702,6 +1707,8 @@ typedef enum ur_device_info_t { ///< backed 2D sampled image data. UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native ///< work + UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports USM allocation + ///< enqueueing /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1727,7 +1734,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -5794,6 +5801,10 @@ typedef enum ur_command_t { UR_COMMAND_EXTERNAL_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp UR_COMMAND_ENQUEUE_NATIVE_EXP = 0x2004, ///< Event created by ::urEnqueueNativeCommandExp + UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP = 0x2005, ///< Event created by ::urEnqueueUSMDeviceAllocExp + UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP = 0x2006, ///< Event created by ::urEnqueueUSMSharedAllocExp + UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP = 0x2007, ///< Event created by ::urEnqueueUSMHostAllocExp + UR_COMMAND_ENQUEUE_USM_FREE_EXP = 0x2008, ///< Event created by ::urEnqueueUSMFreeExp /// @cond UR_COMMAND_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -7421,6 +7432,161 @@ urEnqueueWriteHostPipe( ///< an element of the phEventWaitList array. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental API for enqueuing asynchronous USM allocations +#if !defined(__GNUC__) +#pragma region async_alloc_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue USM allocation flags +typedef uint32_t ur_exp_enqueue_usm_alloc_flags_t; +typedef enum ur_exp_enqueue_usm_alloc_flag_t { + UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD = UR_BIT(0), ///< reserved for future use. + /// @cond + UR_EXP_ENQUEUE_USM_ALLOC_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_enqueue_usm_alloc_flag_t; +/// @brief Bit Mask for validating ur_exp_enqueue_usm_alloc_flags_t +#define UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK 0xfffffffe + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue USM allocation properties +typedef struct ur_exp_enqueue_usm_alloc_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + ur_exp_enqueue_usm_alloc_flags_t flags; ///< [in] enqueue USM allocation flags + +} ur_exp_enqueue_usm_alloc_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM deallocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM deallocation +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -11450,6 +11616,64 @@ typedef struct ur_enqueue_kernel_launch_custom_exp_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_kernel_launch_custom_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMDeviceAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_device_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_enqueue_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_device_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMSharedAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_shared_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_enqueue_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_shared_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMHostAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_host_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_enqueue_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_host_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMFreeExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_free_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + void **ppMem; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_free_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index a7ca4d88a0..8da700c84d 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -127,6 +127,10 @@ _UR_API(urEnqueueDeviceGlobalVariableRead) _UR_API(urEnqueueReadHostPipe) _UR_API(urEnqueueWriteHostPipe) _UR_API(urEnqueueKernelLaunchCustomExp) +_UR_API(urEnqueueUSMDeviceAllocExp) +_UR_API(urEnqueueUSMSharedAllocExp) +_UR_API(urEnqueueUSMHostAllocExp) +_UR_API(urEnqueueUSMFreeExp) _UR_API(urEnqueueCooperativeKernelLaunchExp) _UR_API(urEnqueueTimestampRecordingExp) _UR_API(urEnqueueNativeCommandExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 80a0003fca..f7b006b501 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1463,6 +1463,52 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunchCustomExp_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMDeviceAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMDeviceAllocExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, + const ur_event_handle_t *, + void **, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMSharedAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMSharedAllocExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, + const ur_event_handle_t *, + void **, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMHostAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMHostAllocExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, + const ur_event_handle_t *, + void **, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMFreeExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMFreeExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + void *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)( @@ -1502,6 +1548,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueNativeCommandExp_t)( /// @brief Table of EnqueueExp functions pointers typedef struct ur_enqueue_exp_dditable_t { ur_pfnEnqueueKernelLaunchCustomExp_t pfnKernelLaunchCustomExp; + ur_pfnEnqueueUSMDeviceAllocExp_t pfnUSMDeviceAllocExp; + ur_pfnEnqueueUSMSharedAllocExp_t pfnUSMSharedAllocExp; + ur_pfnEnqueueUSMHostAllocExp_t pfnUSMHostAllocExp; + ur_pfnEnqueueUSMFreeExp_t pfnUSMFreeExp; ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp; ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp; ur_pfnEnqueueNativeCommandExp_t pfnNativeCommandExp; diff --git a/include/ur_print.h b/include/ur_print.h index 1dd874e5a5..c3de867728 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -874,6 +874,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintMapFlags(enum ur_map_flag_t value, ch /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_usm_alloc_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueUsmAllocFlags(enum ur_exp_enqueue_usm_alloc_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_usm_alloc_properties_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueUsmAllocProperties(const struct ur_exp_enqueue_usm_alloc_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_image_copy_flag_t enum /// @returns @@ -2034,6 +2050,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueKernelLaunchCustomExpParams(const struct ur_enqueue_kernel_launch_custom_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_device_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmDeviceAllocExpParams(const struct ur_enqueue_usm_device_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_shared_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmSharedAllocExpParams(const struct ur_enqueue_usm_shared_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_host_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmHostAllocExpParams(const struct ur_enqueue_usm_host_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_free_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmFreeExpParams(const struct ur_enqueue_usm_free_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_enqueue_cooperative_kernel_launch_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 09431d4352..3847037612 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -194,6 +194,9 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -325,6 +328,8 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_usm_alloc_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_usm_alloc_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_external_mem_type_t value); @@ -954,6 +959,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP"; break; + case UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_FREE_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_FREE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -1113,6 +1130,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value case UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES"; + break; default: os << "unknown enumerator"; break; @@ -1374,6 +1394,11 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { const ur_exp_enqueue_native_command_properties_t *pstruct = (const ur_exp_enqueue_native_command_properties_t *)ptr; printPtr(os, pstruct); } break; + + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES: { + const ur_exp_enqueue_usm_alloc_properties_t *pstruct = (const ur_exp_enqueue_usm_alloc_properties_t *)ptr; + printPtr(os, pstruct); + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -2646,6 +2671,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP: os << "UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP: + os << "UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP"; + break; default: os << "unknown enumerator"; break; @@ -4440,6 +4468,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -8995,6 +9035,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) { case UR_COMMAND_ENQUEUE_NATIVE_EXP: os << "UR_COMMAND_ENQUEUE_NATIVE_EXP"; break; + case UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_FREE_EXP: + os << "UR_COMMAND_ENQUEUE_USM_FREE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -9406,6 +9458,77 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_usm_alloc_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_enqueue_usm_alloc_flag_t value) { + switch (value) { + case UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD: + os << "UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_enqueue_usm_alloc_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD) == (uint32_t)UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD) { + val ^= (uint32_t)UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_ENQUEUE_USM_ALLOC_FLAG_TBD; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_enqueue_usm_alloc_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_enqueue_usm_alloc_properties_t params) { + os << "(struct ur_exp_enqueue_usm_alloc_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, + (params.flags)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_image_copy_flag_t type /// @returns /// std::ostream & @@ -14776,6 +14899,235 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_device_alloc_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_device_alloc_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_shared_alloc_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_shared_alloc_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_host_alloc_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_host_alloc_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_free_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_free_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".pMem = "; + + ur::details::printPtr(os, + *(params->ppMem)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_enqueue_cooperative_kernel_launch_exp_params_t type /// @returns @@ -18463,6 +18815,18 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP: { os << (const struct ur_enqueue_kernel_launch_custom_exp_params_t *)params; } break; + case UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_device_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_shared_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_host_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_FREE_EXP: { + os << (const struct ur_enqueue_usm_free_exp_params_t *)params; + } break; case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: { os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-ASYNC-ALLOC.rst b/scripts/core/EXP-ASYNC-ALLOC.rst new file mode 100644 index 0000000000..74c7570cdc --- /dev/null +++ b/scripts/core/EXP-ASYNC-ALLOC.rst @@ -0,0 +1,78 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-async-allocations: + +================================================================================ +Async Allocation Functions +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- + +Asynchronous allocations can allow queues to allocate and free memory between +UR command enqueues without forcing synchronization points in the asynchronous +command DAG associated with a queue. This can allow applications to compose +memory allocation and command execution asynchronously, which can improve +performancet. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}_device_info_t + * ${X}_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP +* ${x}_command_t + * ${X}_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_FREE_EXP +* ${x}_exp_enqueue_usm_alloc_flags_t + +Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_enqueue_usm_alloc_properties_t + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}EnqueueUSMDeviceAllocExp +* ${x}EnqueueUSMSharedAllocExp +* ${x}EnqueueUSMHostAllocExp +* ${x}EnqueueUSMFreeExp + +Changelog +-------------------------------------------------------------------------------- + ++-----------+---------------------------+ +| Revision | Changes | ++===========+===========================+ +| 1.0 | Initial Draft | ++-----------+---------------------------+ + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return true for the new +``${X}_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP`` device info query. + + +Contributors +-------------------------------------------------------------------------------- + +* Hugh Delaney `hugh.delaney@codeplay.com `_ diff --git a/scripts/core/exp-async-alloc.yml b/scripts/core/exp-async-alloc.yml new file mode 100644 index 0000000000..b999fa6d8e --- /dev/null +++ b/scripts/core/exp-async-alloc.yml @@ -0,0 +1,216 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental API for enqueuing asynchronous USM allocations" +ordinal: "99" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support USM allocation enqueuing." +name: $x_device_info_t +etors: + - name: ENQUEUE_USM_ALLOCATIONS_EXP + value: "0x2021" + desc: "[$x_bool_t] returns true if the device supports USM allocation enqueueing" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Command Type experimental enumerations." +name: $x_command_t +etors: + - name: ENQUEUE_USM_DEVICE_ALLOC_EXP + value: "0x2005" + desc: Event created by $xEnqueueUSMDeviceAllocExp + - name: ENQUEUE_USM_SHARED_ALLOC_EXP + value: "0x2006" + desc: Event created by $xEnqueueUSMSharedAllocExp + - name: ENQUEUE_USM_HOST_ALLOC_EXP + value: "0x2007" + desc: Event created by $xEnqueueUSMHostAllocExp + - name: ENQUEUE_USM_FREE_EXP + value: "0x2008" + desc: Event created by $xEnqueueUSMFreeExp + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Enqueue USM allocation flags" +name: $x_exp_enqueue_usm_alloc_flags_t +etors: + - name: TBD + desc: "reserved for future use." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Enqueue USM allocation properties" +name: $x_exp_enqueue_usm_alloc_properties_t +base: $x_base_properties_t +members: + - type: $x_exp_enqueue_usm_alloc_flags_t + name: flags + desc: "[in] enqueue USM allocation flags" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Structure type experimental enumerations" +name: $x_structure_type_t +etors: + - name: EXP_ENQUEUE_USM_ALLOC_PROPERTIES + desc: $x_exp_enqueue_usm_alloc_properties_t + value: "0x3001" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM device allocation" +class: $xEnqueue +name: USMDeviceAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pool" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_enqueue_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue asynchronous USM allocation properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM device allocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM shared allocation" +class: $xEnqueue +name: USMSharedAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pool" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_enqueue_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue asynchronous USM allocation properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM shared allocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM host allocation" +class: $xEnqueue +name: USMHostAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pool" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_enqueue_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue asynchronous USM allocation properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM host allocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an asynchronous USM deallocation" +class: $xEnqueue +name: USMFreeExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] handle of the USM memory pooliptor" + name: pPool + - type: void* + name: pMem + desc: "[in] pointer to USM memory object" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the asynchronous USM deallocation" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index f4ba983bfc..9566b81b7f 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -604,6 +604,18 @@ etors: - name: BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP desc: Enumerator for $xBindlessImagesMapExternalLinearMemoryExp value: '245' +- name: ENQUEUE_USM_DEVICE_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMDeviceAllocExp + value: '246' +- name: ENQUEUE_USM_SHARED_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMSharedAllocExp + value: '247' +- name: ENQUEUE_USM_HOST_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMHostAllocExp + value: '248' +- name: ENQUEUE_USM_FREE_EXP + desc: Enumerator for $xEnqueueUSMFreeExp + value: '249' --- type: enum desc: Defines structure types diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 8a6ac41b08..75d64ff9a3 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -503,3 +503,123 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( } } } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that + ///< identifies the asynchronous USM deallocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 5e28f3592d..129bcaaa23 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -468,3 +468,123 @@ ur_result_t umfPoolMallocHelper(ur_usm_pool_handle_t hPool, void **ppMem, } return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that + ///< identifies the asynchronous USM deallocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 1c2f68c07c..921168d0f2 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -213,6 +213,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur::level_zero::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = ur::level_zero::urEnqueueUSMDeviceAllocExp; + pDdiTable->pfnUSMSharedAllocExp = ur::level_zero::urEnqueueUSMSharedAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur::level_zero::urEnqueueUSMHostAllocExp; + pDdiTable->pfnUSMFreeExp = ur::level_zero::urEnqueueUSMFreeExp; pDdiTable->pfnCooperativeKernelLaunchExp = ur::level_zero::urEnqueueCooperativeKernelLaunchExp; pDdiTable->pfnTimestampRecordingExp = diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 1207f7776b..927ddbcd9f 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -461,6 +461,26 @@ ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t hQueue, + ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 28bdf233e8..0f266fc13b 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -787,6 +787,126 @@ ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) { Context->getPlatform()->ZeDriverHandleExpTranslated, HostPtr); return UR_RESULT_SUCCESS; } + +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t size, ///< [in] minimum size in bytes of the USM memory object + ///< to be allocated + const ur_exp_enqueue_usm_alloc_properties_t + *pProperties, ///< [in][optional] pointer to the enqueue asynchronous + ///< USM allocation properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that + ///< identifies the asynchronous USM deallocation +) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} } // namespace ur::level_zero static ur_result_t USMFreeImpl(ur_context_handle_t Context, void *Ptr) { diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index ea2e931bfe..4dddcc14ab 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -271,6 +271,41 @@ ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, size, numEventsInWaitList, phEventWaitList, phEvent); } +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMDeviceAllocExp(pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, + ppMem, phEvent); +} +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMSharedAllocExp(pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, + ppMem, phEvent); +} +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMHostAllocExp(pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, + ppMem, phEvent); +} +ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t hQueue, + ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMFreeExp(pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); +} ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, diff --git a/source/adapters/level_zero/v2/queue_api.hpp b/source/adapters/level_zero/v2/queue_api.hpp index 577f6c5aba..a4bc4ae119 100644 --- a/source/adapters/level_zero/v2/queue_api.hpp +++ b/source/adapters/level_zero/v2/queue_api.hpp @@ -125,6 +125,24 @@ struct ur_queue_handle_t_ { bool, void *, size_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMDeviceAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMSharedAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMHostAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_enqueue_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMFreeExp(ur_usm_pool_handle_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; virtual ur_result_t bindlessImagesImageCopyExp( const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, const ur_image_format_t *, const ur_image_format_t *, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index 08fae0719f..844bf3068b 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -916,6 +916,62 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueWriteHostPipe( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMDeviceAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMSharedAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMHostAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueUSMFreeExp( + ur_usm_pool_handle_t pPool, void *pMem, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + ur_result_t ur_queue_immediate_in_order_t::bindlessImagesImageCopyExp( const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index 3fac90110a..5f545a48e0 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -228,6 +228,25 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMDeviceAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMSharedAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMHostAllocExp( + ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) override; + ur_result_t enqueueUSMFreeExp(ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) override; ur_result_t bindlessImagesImageCopyExp( const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index dea28a4658..fb92d8426d 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -7106,6 +7106,259 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_device_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMDeviceAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urEnqueueUSMDeviceAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMDeviceAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_shared_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMSharedAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urEnqueueUSMSharedAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMSharedAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_host_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMHostAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMHostAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMHostAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM deallocation + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_free_exp_params_t params = { + &hQueue, &pPool, &pMem, &numEventsInWaitList, + &phEventWaitList, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMFreeExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMFreeExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMFreeExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -11027,6 +11280,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = driver::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = driver::urEnqueueUSMDeviceAllocExp; + + pDdiTable->pfnUSMSharedAllocExp = driver::urEnqueueUSMSharedAllocExp; + + pDdiTable->pfnUSMHostAllocExp = driver::urEnqueueUSMHostAllocExp; + + pDdiTable->pfnUSMFreeExp = driver::urEnqueueUSMFreeExp; + pDdiTable->pfnCooperativeKernelLaunchExp = driver::urEnqueueCooperativeKernelLaunchExp; diff --git a/source/adapters/native_cpu/usm.cpp b/source/adapters/native_cpu/usm.cpp index 2fe0d551a8..2bea4c08a4 100644 --- a/source/adapters/native_cpu/usm.cpp +++ b/source/adapters/native_cpu/usm.cpp @@ -155,3 +155,64 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMReleaseExp(ur_context_handle_t Context, std::ignore = HostPtr; DIE_NO_IMPLEMENTATION; } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = size; + std::ignore = pProperties; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = ppMem; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + std::ignore = hQueue; + std::ignore = pPool; + std::ignore = pMem; + std::ignore = numEventsInWaitList; + std::ignore = phEventWaitList; + std::ignore = phEvent; + DIE_NO_IMPLEMENTATION; +} diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index dfcc1dfafa..3c110d20c7 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -728,3 +728,48 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolGetInfo( [[maybe_unused]] size_t *pPropSizeRet) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, + [[maybe_unused]] const size_t size, + [[maybe_unused]] const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] void **ppMem, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, + [[maybe_unused]] const size_t size, + [[maybe_unused]] const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] void **ppMem, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, + [[maybe_unused]] const size_t size, + [[maybe_unused]] const ur_exp_enqueue_usm_alloc_properties_t *pProperties, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] void **ppMem, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + [[maybe_unused]] ur_queue_handle_t hQueue, + [[maybe_unused]] ur_usm_pool_handle_t pPool, [[maybe_unused]] void *pMem, + [[maybe_unused]] uint32_t numEventsInWaitList, + [[maybe_unused]] const ur_event_handle_t *phEventWaitList, + [[maybe_unused]] ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/common/stype_map_helpers.def b/source/common/stype_map_helpers.def index c938ca6b95..df7f0e5d6b 100644 --- a/source/common/stype_map_helpers.def +++ b/source/common/stype_map_helpers.def @@ -99,4 +99,6 @@ template <> struct stype_map : stype_map_impl {}; template <> struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 9cc18c66c4..c45395dfc9 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6034,6 +6034,225 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + auto pfnUSMDeviceAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_device_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, + "urEnqueueUSMDeviceAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMDeviceAllocExp\n"); + + ur_result_t result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, + "urEnqueueUSMDeviceAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMDeviceAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + auto pfnUSMSharedAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_shared_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, + "urEnqueueUSMSharedAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMSharedAllocExp\n"); + + ur_result_t result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, + "urEnqueueUSMSharedAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMSharedAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + auto pfnUSMHostAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_host_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, + "urEnqueueUSMHostAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMHostAllocExp\n"); + + ur_result_t result = pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, + "urEnqueueUSMHostAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMHostAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM deallocation +) { + auto pfnUSMFreeExp = getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_free_exp_params_t params = { + &hQueue, &pPool, &pMem, &numEventsInWaitList, + &phEventWaitList, &phEvent}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_ENQUEUE_USM_FREE_EXP, "urEnqueueUSMFreeExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMFreeExp\n"); + + ur_result_t result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_FREE_EXP, + "urEnqueueUSMFreeExp", ¶ms, &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_FREE_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMFreeExp({}) -> {};\n", args_str.str(), + result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -9643,6 +9862,20 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur_tracing_layer::urEnqueueKernelLaunchCustomExp; + dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_tracing_layer::urEnqueueUSMDeviceAllocExp; + + dditable.pfnUSMSharedAllocExp = pDdiTable->pfnUSMSharedAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_tracing_layer::urEnqueueUSMSharedAllocExp; + + dditable.pfnUSMHostAllocExp = pDdiTable->pfnUSMHostAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur_tracing_layer::urEnqueueUSMHostAllocExp; + + dditable.pfnUSMFreeExp = pDdiTable->pfnUSMFreeExp; + pDdiTable->pfnUSMFreeExp = ur_tracing_layer::urEnqueueUSMFreeExp; + dditable.pfnCooperativeKernelLaunchExp = pDdiTable->pfnCooperativeKernelLaunchExp; pDdiTable->pfnCooperativeKernelLaunchExp = diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index fdfce7951b..a171a9c60e 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -518,7 +518,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName) { + if (UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -6884,6 +6884,266 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + auto pfnUSMDeviceAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == ppMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL != pProperties && + UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + auto pfnUSMSharedAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == ppMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL != pProperties && + UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + auto pfnUSMHostAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == ppMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL != pProperties && + UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM deallocation +) { + auto pfnUSMFreeExp = getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -10696,6 +10956,21 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur_validation_layer::urEnqueueKernelLaunchCustomExp; + dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_validation_layer::urEnqueueUSMDeviceAllocExp; + + dditable.pfnUSMSharedAllocExp = pDdiTable->pfnUSMSharedAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_validation_layer::urEnqueueUSMSharedAllocExp; + + dditable.pfnUSMHostAllocExp = pDdiTable->pfnUSMHostAllocExp; + pDdiTable->pfnUSMHostAllocExp = + ur_validation_layer::urEnqueueUSMHostAllocExp; + + dditable.pfnUSMFreeExp = pDdiTable->pfnUSMFreeExp; + pDdiTable->pfnUSMFreeExp = ur_validation_layer::urEnqueueUSMFreeExp; + dditable.pfnCooperativeKernelLaunchExp = pDdiTable->pfnCooperativeKernelLaunchExp; pDdiTable->pfnCooperativeKernelLaunchExp = diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index c34bde6fd2..b7272a8ff3 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -87,11 +87,15 @@ EXPORTS urEnqueueReadHostPipe urEnqueueTimestampRecordingExp urEnqueueUSMAdvise + urEnqueueUSMDeviceAllocExp urEnqueueUSMFill urEnqueueUSMFill2D + urEnqueueUSMFreeExp + urEnqueueUSMHostAllocExp urEnqueueUSMMemcpy urEnqueueUSMMemcpy2D urEnqueueUSMPrefetch + urEnqueueUSMSharedAllocExp urEnqueueWriteHostPipe urEventCreateWithNativeHandle urEventGetInfo @@ -285,11 +289,15 @@ EXPORTS urPrintEnqueueReadHostPipeParams urPrintEnqueueTimestampRecordingExpParams urPrintEnqueueUsmAdviseParams + urPrintEnqueueUsmDeviceAllocExpParams urPrintEnqueueUsmFillParams urPrintEnqueueUsmFill_2dParams + urPrintEnqueueUsmFreeExpParams + urPrintEnqueueUsmHostAllocExpParams urPrintEnqueueUsmMemcpyParams urPrintEnqueueUsmMemcpy_2dParams urPrintEnqueueUsmPrefetchParams + urPrintEnqueueUsmSharedAllocExpParams urPrintEnqueueWriteHostPipeParams urPrintEventCreateWithNativeHandleParams urPrintEventGetInfoParams @@ -312,6 +320,8 @@ EXPORTS urPrintExpCommandBufferUpdateValueArgDesc urPrintExpEnqueueNativeCommandFlags urPrintExpEnqueueNativeCommandProperties + urPrintExpEnqueueUsmAllocFlags + urPrintExpEnqueueUsmAllocProperties urPrintExpExternalMemDesc urPrintExpExternalMemType urPrintExpExternalSemaphoreDesc diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 8333ee2fa4..bacd598df9 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -87,11 +87,15 @@ urEnqueueReadHostPipe; urEnqueueTimestampRecordingExp; urEnqueueUSMAdvise; + urEnqueueUSMDeviceAllocExp; urEnqueueUSMFill; urEnqueueUSMFill2D; + urEnqueueUSMFreeExp; + urEnqueueUSMHostAllocExp; urEnqueueUSMMemcpy; urEnqueueUSMMemcpy2D; urEnqueueUSMPrefetch; + urEnqueueUSMSharedAllocExp; urEnqueueWriteHostPipe; urEventCreateWithNativeHandle; urEventGetInfo; @@ -285,11 +289,15 @@ urPrintEnqueueReadHostPipeParams; urPrintEnqueueTimestampRecordingExpParams; urPrintEnqueueUsmAdviseParams; + urPrintEnqueueUsmDeviceAllocExpParams; urPrintEnqueueUsmFillParams; urPrintEnqueueUsmFill_2dParams; + urPrintEnqueueUsmFreeExpParams; + urPrintEnqueueUsmHostAllocExpParams; urPrintEnqueueUsmMemcpyParams; urPrintEnqueueUsmMemcpy_2dParams; urPrintEnqueueUsmPrefetchParams; + urPrintEnqueueUsmSharedAllocExpParams; urPrintEnqueueWriteHostPipeParams; urPrintEventCreateWithNativeHandleParams; urPrintEventGetInfoParams; @@ -312,6 +320,8 @@ urPrintExpCommandBufferUpdateValueArgDesc; urPrintExpEnqueueNativeCommandFlags; urPrintExpEnqueueNativeCommandProperties; + urPrintExpEnqueueUsmAllocFlags; + urPrintExpEnqueueUsmAllocProperties; urPrintExpExternalMemDesc; urPrintExpExternalMemType; urPrintExpExternalSemaphoreDesc; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index a67879a9eb..95a5c8ae1f 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -6127,6 +6127,284 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMDeviceAllocExp = dditable->ur.EnqueueExp.pfnUSMDeviceAllocExp; + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMSharedAllocExp = dditable->ur.EnqueueExp.pfnUSMSharedAllocExp; + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMHostAllocExp = dditable->ur.EnqueueExp.pfnUSMHostAllocExp; + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM deallocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMFreeExp = dditable->ur.EnqueueExp.pfnUSMFreeExp; + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitListLocal.data(), phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -9772,6 +10050,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( // return pointers to loader's DDIs pDdiTable->pfnKernelLaunchCustomExp = ur_loader::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_loader::urEnqueueUSMDeviceAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_loader::urEnqueueUSMSharedAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur_loader::urEnqueueUSMHostAllocExp; + pDdiTable->pfnUSMFreeExp = ur_loader::urEnqueueUSMFreeExp; pDdiTable->pfnCooperativeKernelLaunchExp = ur_loader::urEnqueueCooperativeKernelLaunchExp; pDdiTable->pfnTimestampRecordingExp = diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 9a8e4c2e12..85da1add11 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -870,7 +870,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -6556,6 +6556,192 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation + ) try { + auto pfnUSMDeviceAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation + ) try { + auto pfnUSMSharedAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation + ) try { + auto pfnUSMHostAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM deallocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM deallocation + ) try { + auto pfnUSMFreeExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 3a14d9a9de..bbcac79175 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -879,6 +879,23 @@ ur_result_t urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintExpEnqueueUsmAllocFlags(enum ur_exp_enqueue_usm_alloc_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpEnqueueUsmAllocProperties( + const struct ur_exp_enqueue_usm_alloc_properties_t params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { @@ -1746,6 +1763,38 @@ ur_result_t urPrintEnqueueKernelLaunchCustomExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintEnqueueUsmDeviceAllocExpParams( + const struct ur_enqueue_usm_device_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmSharedAllocExpParams( + const struct ur_enqueue_usm_shared_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmHostAllocExpParams( + const struct ur_enqueue_usm_host_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmFreeExpParams( + const struct ur_enqueue_usm_free_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintEnqueueCooperativeKernelLaunchExpParams( const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 92b02b7176..c3ee528136 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -771,7 +771,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -5604,6 +5604,157 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM device allocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM shared allocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ENQUEUE_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pool + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_enqueue_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue asynchronous USM allocation + ///< properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM host allocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an asynchronous USM deallocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t + pPool, ///< [in][optional] handle of the USM memory pooliptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t + *phEvent ///< [out][optional] return an event object that identifies the + ///< asynchronous USM deallocation +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index d949b1f5df..2eb086b1b6 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -418,5 +418,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo( hDevice, UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_ENQUEUE_USM_ALLOCATIONS_EXP); } } // namespace urinfo