From 1912b88ce0128f48dd04003a9091081042b4b525 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Tue, 5 Nov 2024 12:22:28 +0000 Subject: [PATCH] Add some fixes after merging latest main --- source/adapters/opencl/adapter.hpp | 3 ++ source/adapters/opencl/device.cpp | 10 ++-- source/adapters/opencl/enqueue.cpp | 14 ++---- source/adapters/opencl/platform.cpp | 5 +- source/adapters/opencl/program.cpp | 76 +++++++++++------------------ source/adapters/opencl/program.hpp | 10 +++- source/adapters/opencl/usm.cpp | 39 +++++++-------- 7 files changed, 72 insertions(+), 85 deletions(-) diff --git a/source/adapters/opencl/adapter.hpp b/source/adapters/opencl/adapter.hpp index 36919c682d..2b17762de7 100644 --- a/source/adapters/opencl/adapter.hpp +++ b/source/adapters/opencl/adapter.hpp @@ -20,6 +20,9 @@ struct ur_adapter_handle_t_ { std::mutex Mutex; logger::Logger &log = logger::get_logger("opencl"); + std::vector> URPlatforms; + uint32_t NumPlatforms = 0; + // Function pointers to core OpenCL entry points which may not exist in older // versions of the OpenCL-ICD-Loader are tracked here and initialized by // dynamically loading the symbol by name. diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 71abc70df0..80204690ca 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -826,15 +826,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_bool_t */ oclv::OpenCLVersion DevVer; - CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - cl_adapter::cast(hDevice), DevVer)); + CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); /* Independent forward progress query is only supported as of OpenCL 2.1 * if version is older we return a default false. */ if (DevVer >= oclv::V2_1) { cl_bool CLValue; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bool), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName, + sizeof(cl_bool), &CLValue, nullptr)); /* cl_bool is uint32_t and ur_bool_t is bool */ return ReturnValue(static_cast(CLValue)); @@ -911,7 +909,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } case UR_DEVICE_INFO_IP_VERSION: { - bool Supported; + bool Supported = false; UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_device_attribute_query"}, Supported)); if (!Supported) { diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 1a9fd3d0a6..b6effaee6e 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -64,15 +64,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( if (!pLocalWorkSize) { cl_device_id device = nullptr; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_DEVICE, - sizeof(device), &device, nullptr)); + hQueue->CLQueue, CL_QUEUE_DEVICE, sizeof(device), &device, nullptr)); // This query always returns size_t[3], if nothing was specified it returns // all zeroes. size_t queriedLocalWorkSize[3] = {0, 0, 0}; CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - cl_adapter::cast(hKernel), device, - CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(size_t[3]), - queriedLocalWorkSize, nullptr)); + hKernel->CLKernel, device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + sizeof(size_t[3]), queriedLocalWorkSize, nullptr)); if (queriedLocalWorkSize[0] != 0) { for (uint32_t i = 0; i < workDim; i++) { compiledLocalWorksize.push_back(queriedLocalWorkSize[i]); @@ -84,13 +82,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( std::vector CLWaitEvents(numEventsInWaitList); MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( - hQueue->CLQueue, - hKernel->CLKernel, workDim, pGlobalWorkOffset, + hQueue->CLQueue, hKernel->CLKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, compiledLocalWorksize.empty() ? pLocalWorkSize : compiledLocalWorksize.data(), - numEventsInWaitList, CLWaitEvents.data(), - &Event)); + numEventsInWaitList, CLWaitEvents.data(), &Event)); UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent)); return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index c58a25c8c3..341830b1c2 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -85,7 +85,8 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, return UR_RESULT_SUCCESS; } } - /* INVALID_VALUE is returned when the size is invalid, special case it here */ + /* INVALID_VALUE is returned when the size is invalid, special case it here + */ if (Res == CL_INVALID_VALUE && phPlatforms != nullptr && NumEntries == 0) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -114,7 +115,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } } - return mapCLErrorToUR(Result); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle( diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index ff6fca76f5..6c9b51e1a0 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -31,8 +31,8 @@ ur_result_t ur_program_handle_t_::makeWithNative(native_type NativeProg, if (Context->CLContext != CLContext) { return UR_RESULT_ERROR_INVALID_CONTEXT; } - auto URProgram = - std::make_unique(NativeProg, Context); + auto URProgram = std::make_unique( + NativeProg, Context, Context->DeviceCount, Context->Devices.data()); Program = URProgram.release(); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -47,9 +47,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( ur_context_handle_t hContext, const void *pIL, size_t length, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - if (!hContext->DeviceCount || !hContext->Devices[0]->Platform) { - return UR_RESULT_ERROR_INVALID_CONTEXT; - } ur_platform_handle_t CurPlatform = hContext->Devices[0]->Platform; oclv::OpenCLVersion PlatVer; @@ -57,6 +54,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( phProgram); cl_int Err = CL_SUCCESS; + cl_program Program; if (PlatVer >= oclv::V2_1) { /* Make sure all devices support CL 2.1 or newer as well. */ @@ -81,18 +79,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( } } - cl_program Program = - clCreateProgramWithIL(hContext->CLContext, pIL, length, &Err); - CL_RETURN_ON_FAILURE(Err); - try { - auto URProgram = - std::make_unique(Program, hContext); - *phProgram = URProgram.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } + Program = clCreateProgramWithIL(hContext->CLContext, pIL, length, &Err); } else { /* If none of the devices conform with CL 2.1 or newer make sure they all * support the cl_khr_il_program extension. @@ -116,17 +103,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( assert(FuncPtr != nullptr); - try { - cl_program Program = FuncPtr(hContext->CLContext, pIL, length, &Err); - CL_RETURN_ON_FAILURE(Err); - auto URProgram = - std::make_unique(Program, hContext); - *phProgram = URProgram.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } + Program = FuncPtr(hContext->CLContext, pIL, length, &Err); } // INVALID_VALUE is only returned in three circumstances according to the cl @@ -148,6 +125,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( CL_RETURN_ON_FAILURE(Err); } + try { + auto URProgram = std::make_unique( + Program, hContext, hContext->DeviceCount, hContext->Devices.data()); + *phProgram = URProgram.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; } @@ -155,17 +142,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( ur_context_handle_t hContext, uint32_t numDevices, ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries, const ur_program_properties_t *, ur_program_handle_t *phProgram) { - std::vector Devices(numDevices); + std::vector CLDevices(numDevices); for (uint32_t i = 0; i < numDevices; ++i) - Devices[i] = phDevices[i]->CLDevice; + CLDevices[i] = phDevices[i]->CLDevice; std::vector BinaryStatus(numDevices); cl_int CLResult; cl_program Program = clCreateProgramWithBinary( - hContext->CLContext, - static_cast(numDevices), Devices.data(), pLengths, - ppBinaries, BinaryStatus.data(), &CLResult); + hContext->CLContext, static_cast(numDevices), CLDevices.data(), + pLengths, ppBinaries, BinaryStatus.data(), &CLResult); CL_RETURN_ON_FAILURE(CLResult); - auto URProgram = std::make_unique(Program, hContext); + auto URProgram = std::make_unique( + Program, hContext, numDevices, phDevices); *phProgram = URProgram.release(); for (uint32_t i = 0; i < numDevices; ++i) { CL_RETURN_ON_FAILURE(BinaryStatus[i]); @@ -179,10 +166,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - uint32_t DeviceCount = hProgram->Context->DeviceCount; + uint32_t DeviceCount = hProgram->NumDevices; std::vector CLDevicesInProgram(DeviceCount); for (uint32_t i = 0; i < DeviceCount; i++) { - CLDevicesInProgram[i] = hProgram->Context->Devices[i]->CLDevice; + CLDevicesInProgram[i] = hProgram->Devices[i]->CLDevice; } CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->CLProgram, DeviceCount, @@ -230,15 +217,11 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, return ReturnValue(hProgram->Context); } case UR_PROGRAM_INFO_NUM_DEVICES: { - if (!hProgram->Context || !hProgram->Context->DeviceCount) { - return UR_RESULT_ERROR_INVALID_PROGRAM; - } - cl_uint DeviceCount = hProgram->Context->DeviceCount; + cl_uint DeviceCount = hProgram->NumDevices; return ReturnValue(DeviceCount); } case UR_PROGRAM_INFO_DEVICES: { - return ReturnValue(&hProgram->Context->Devices[0], - hProgram->Context->DeviceCount); + return ReturnValue(hProgram->Devices.data(), hProgram->NumDevices); } case UR_PROGRAM_INFO_REFERENCE_COUNT: { return ReturnValue(hProgram->getReferenceCount()); @@ -264,10 +247,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - uint32_t DeviceCount = hProgram->Context->DeviceCount; + uint32_t DeviceCount = hProgram->NumDevices; std::vector CLDevicesInProgram(DeviceCount); for (uint32_t i = 0; i < DeviceCount; i++) { - CLDevicesInProgram[i] = hProgram->Context->Devices[i]->CLDevice; + CLDevicesInProgram[i] = hProgram->Devices[i]->CLDevice; } CL_RETURN_ON_FAILURE( @@ -297,7 +280,8 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, } CL_RETURN_ON_FAILURE(CLResult); try { - auto URProgram = std::make_unique(Program, hContext); + auto URProgram = std::make_unique( + Program, hContext, hContext->DeviceCount, hContext->Devices.data()); *phProgram = URProgram.release(); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; @@ -445,8 +429,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants( return UR_RESULT_ERROR_INVALID_CONTEXT; } - ur_platform_handle_t CurPlatform = Ctx->Devices[0]->Platform; - if (ur::cl::getAdapter()->clSetProgramSpecializationConstant) { for (uint32_t i = 0; i < count; ++i) { CL_RETURN_ON_FAILURE( diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index b97a2feb0f..4bdbad5249 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -20,11 +20,17 @@ struct ur_program_handle_t_ { ur_context_handle_t Context; std::atomic RefCount = 0; bool IsNativeHandleOwned = true; + uint32_t NumDevices = 0; + std::vector Devices; - ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx) - : CLProgram(Prog), Context(Ctx) { + ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx, + uint32_t NumDevices, ur_device_handle_t *Devs) + : CLProgram(Prog), Context(Ctx), NumDevices(NumDevices) { RefCount = 1; urContextRetain(Context); + for (uint32_t i = 0; i < NumDevices; i++) { + Devices.push_back(Devs[i]); + } } ~ur_program_handle_t_() { diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 1a93217771..297b84f6a9 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -375,6 +375,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( // Have to look up the context from the kernel cl_context CLContext = hQueue->Context->CLContext; + cl_int CLErr = CL_SUCCESS; clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, @@ -405,9 +406,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( // We need a queue associated with each device, so first figure out which // one we weren't given. cl_device_id QueueDevice = nullptr; - CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - hQueue->CLQueue, CL_QUEUE_DEVICE, - sizeof(QueueDevice), &QueueDevice, nullptr)); + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(hQueue->CLQueue, CL_QUEUE_DEVICE, + sizeof(QueueDevice), + &QueueDevice, nullptr)); cl_command_queue MissingQueue = nullptr, SrcQueue = nullptr, DstQueue = nullptr; @@ -451,9 +452,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - UR_RETURN_ON_FAILURE(checkCLErr(USMMemcpy( - SrcQueue, blocking, HostAlloc, pSrc, size, numEventsInWaitList, - CLWaitEvents.data(), &HostCopyEvent))); + UR_RETURN_ON_FAILURE(checkCLErr( + USMMemcpy(SrcQueue, blocking, HostAlloc, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), &HostCopyEvent))); UR_RETURN_ON_FAILURE( checkCLErr(USMMemcpy(DstQueue, blocking, pDst, HostAlloc, size, 1, @@ -517,19 +518,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->CLEvent; } - CL_RETURN_ON_FAILURE( - USMMemcpy(hQueue->CLQueue, blocking, pDst, - pSrc, size, numEventsInWaitList, - CLWaitEvents.data(), - &Event)); - try { - auto UREvent = std::make_unique( - Event, hQueue->Context, hQueue); - *phEvent = UREvent.release(); - } catch (std::bad_alloc &) { - return UR_RESULT_ERROR_OUT_OF_RESOURCES; - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; + CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->CLQueue, blocking, pDst, pSrc, size, + numEventsInWaitList, CLWaitEvents.data(), + &Event)); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } } }