Skip to content

Commit

Permalink
Add some fixes after merging latest main
Browse files Browse the repository at this point in the history
  • Loading branch information
omarahmed1111 committed Nov 7, 2024
1 parent 18d3063 commit 1912b88
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 85 deletions.
3 changes: 3 additions & 0 deletions source/adapters/opencl/adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ struct ur_adapter_handle_t_ {
std::mutex Mutex;
logger::Logger &log = logger::get_logger("opencl");

std::vector<std::unique_ptr<ur_platform_handle_t_>> URPlatforms;
uint32_t NumPlatforms = 0;

// Function pointers to core OpenCL entry points which may not exist in older
// versions of the OpenCL-ICD-Loader are tracked here and initialized by
// dynamically loading the symbol by name.
Expand Down
10 changes: 4 additions & 6 deletions source/adapters/opencl/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,15 +826,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
* UR type: ur_bool_t */

oclv::OpenCLVersion DevVer;
CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(
cl_adapter::cast<cl_device_id>(hDevice), DevVer));
CL_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer));
/* Independent forward progress query is only supported as of OpenCL 2.1
* if version is older we return a default false. */
if (DevVer >= oclv::V2_1) {
cl_bool CLValue;
CL_RETURN_ON_FAILURE(
clGetDeviceInfo(cl_adapter::cast<cl_device_id>(hDevice), CLPropName,
sizeof(cl_bool), &CLValue, nullptr));
CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->CLDevice, CLPropName,
sizeof(cl_bool), &CLValue, nullptr));

/* cl_bool is uint32_t and ur_bool_t is bool */
return ReturnValue(static_cast<ur_bool_t>(CLValue));
Expand Down Expand Up @@ -911,7 +909,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return UR_RESULT_SUCCESS;
}
case UR_DEVICE_INFO_IP_VERSION: {
bool Supported;
bool Supported = false;
UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions(
{"cl_intel_device_attribute_query"}, Supported));
if (!Supported) {
Expand Down
14 changes: 5 additions & 9 deletions source/adapters/opencl/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
if (!pLocalWorkSize) {
cl_device_id device = nullptr;
CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(
cl_adapter::cast<cl_command_queue>(hQueue), CL_QUEUE_DEVICE,
sizeof(device), &device, nullptr));
hQueue->CLQueue, CL_QUEUE_DEVICE, sizeof(device), &device, nullptr));
// This query always returns size_t[3], if nothing was specified it returns
// all zeroes.
size_t queriedLocalWorkSize[3] = {0, 0, 0};
CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo(
cl_adapter::cast<cl_kernel>(hKernel), device,
CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(size_t[3]),
queriedLocalWorkSize, nullptr));
hKernel->CLKernel, device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
sizeof(size_t[3]), queriedLocalWorkSize, nullptr));
if (queriedLocalWorkSize[0] != 0) {
for (uint32_t i = 0; i < workDim; i++) {
compiledLocalWorksize.push_back(queriedLocalWorkSize[i]);
Expand All @@ -84,13 +82,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
std::vector<cl_event> CLWaitEvents(numEventsInWaitList);
MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents);
CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel(
hQueue->CLQueue,
hKernel->CLKernel, workDim, pGlobalWorkOffset,
hQueue->CLQueue, hKernel->CLKernel, workDim, pGlobalWorkOffset,
pGlobalWorkSize,
compiledLocalWorksize.empty() ? pLocalWorkSize
: compiledLocalWorksize.data(),
numEventsInWaitList, CLWaitEvents.data(),
&Event));
numEventsInWaitList, CLWaitEvents.data(), &Event));

UR_RETURN_ON_FAILURE(createUREvent(Event, hQueue->Context, hQueue, phEvent));
return UR_RESULT_SUCCESS;
Expand Down
5 changes: 3 additions & 2 deletions source/adapters/opencl/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries,
return UR_RESULT_SUCCESS;
}
}
/* INVALID_VALUE is returned when the size is invalid, special case it here */
/* INVALID_VALUE is returned when the size is invalid, special case it here
*/
if (Res == CL_INVALID_VALUE && phPlatforms != nullptr && NumEntries == 0) {
return UR_RESULT_ERROR_INVALID_SIZE;
}
Expand Down Expand Up @@ -114,7 +115,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries,
}
}

return mapCLErrorToUR(Result);
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urPlatformGetNativeHandle(
Expand Down
76 changes: 29 additions & 47 deletions source/adapters/opencl/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ ur_result_t ur_program_handle_t_::makeWithNative(native_type NativeProg,
if (Context->CLContext != CLContext) {
return UR_RESULT_ERROR_INVALID_CONTEXT;
}
auto URProgram =
std::make_unique<ur_program_handle_t_>(NativeProg, Context);
auto URProgram = std::make_unique<ur_program_handle_t_>(
NativeProg, Context, Context->DeviceCount, Context->Devices.data());
Program = URProgram.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
Expand All @@ -47,16 +47,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(
ur_context_handle_t hContext, const void *pIL, size_t length,
const ur_program_properties_t *, ur_program_handle_t *phProgram) {

if (!hContext->DeviceCount || !hContext->Devices[0]->Platform) {
return UR_RESULT_ERROR_INVALID_CONTEXT;
}
ur_platform_handle_t CurPlatform = hContext->Devices[0]->Platform;

oclv::OpenCLVersion PlatVer;
CL_RETURN_ON_FAILURE_AND_SET_NULL(CurPlatform->getPlatformVersion(PlatVer),
phProgram);

cl_int Err = CL_SUCCESS;
cl_program Program;
if (PlatVer >= oclv::V2_1) {

/* Make sure all devices support CL 2.1 or newer as well. */
Expand All @@ -81,18 +79,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(
}
}

cl_program Program =
clCreateProgramWithIL(hContext->CLContext, pIL, length, &Err);
CL_RETURN_ON_FAILURE(Err);
try {
auto URProgram =
std::make_unique<ur_program_handle_t_>(Program, hContext);
*phProgram = URProgram.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
Program = clCreateProgramWithIL(hContext->CLContext, pIL, length, &Err);
} else {
/* If none of the devices conform with CL 2.1 or newer make sure they all
* support the cl_khr_il_program extension.
Expand All @@ -116,17 +103,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(

assert(FuncPtr != nullptr);

try {
cl_program Program = FuncPtr(hContext->CLContext, pIL, length, &Err);
CL_RETURN_ON_FAILURE(Err);
auto URProgram =
std::make_unique<ur_program_handle_t_>(Program, hContext);
*phProgram = URProgram.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
Program = FuncPtr(hContext->CLContext, pIL, length, &Err);
}

// INVALID_VALUE is only returned in three circumstances according to the cl
Expand All @@ -148,24 +125,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL(
CL_RETURN_ON_FAILURE(Err);
}

try {
auto URProgram = std::make_unique<ur_program_handle_t_>(
Program, hContext, hContext->DeviceCount, hContext->Devices.data());
*phProgram = URProgram.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
ur_context_handle_t hContext, uint32_t numDevices,
ur_device_handle_t *phDevices, size_t *pLengths, const uint8_t **ppBinaries,
const ur_program_properties_t *, ur_program_handle_t *phProgram) {
std::vector<cl_device_id> Devices(numDevices);
std::vector<cl_device_id> CLDevices(numDevices);
for (uint32_t i = 0; i < numDevices; ++i)
Devices[i] = phDevices[i]->CLDevice;
CLDevices[i] = phDevices[i]->CLDevice;
std::vector<cl_int> BinaryStatus(numDevices);
cl_int CLResult;
cl_program Program = clCreateProgramWithBinary(
hContext->CLContext,
static_cast<cl_uint>(numDevices), Devices.data(), pLengths,
ppBinaries, BinaryStatus.data(), &CLResult);
hContext->CLContext, static_cast<cl_uint>(numDevices), CLDevices.data(),
pLengths, ppBinaries, BinaryStatus.data(), &CLResult);
CL_RETURN_ON_FAILURE(CLResult);
auto URProgram = std::make_unique<ur_program_handle_t_>(Program, hContext);
auto URProgram = std::make_unique<ur_program_handle_t_>(
Program, hContext, numDevices, phDevices);
*phProgram = URProgram.release();
for (uint32_t i = 0; i < numDevices; ++i) {
CL_RETURN_ON_FAILURE(BinaryStatus[i]);
Expand All @@ -179,10 +166,10 @@ UR_APIEXPORT ur_result_t UR_APICALL
urProgramCompile([[maybe_unused]] ur_context_handle_t hContext,
ur_program_handle_t hProgram, const char *pOptions) {

uint32_t DeviceCount = hProgram->Context->DeviceCount;
uint32_t DeviceCount = hProgram->NumDevices;
std::vector<cl_device_id> CLDevicesInProgram(DeviceCount);
for (uint32_t i = 0; i < DeviceCount; i++) {
CLDevicesInProgram[i] = hProgram->Context->Devices[i]->CLDevice;
CLDevicesInProgram[i] = hProgram->Devices[i]->CLDevice;
}

CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->CLProgram, DeviceCount,
Expand Down Expand Up @@ -230,15 +217,11 @@ urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName,
return ReturnValue(hProgram->Context);
}
case UR_PROGRAM_INFO_NUM_DEVICES: {
if (!hProgram->Context || !hProgram->Context->DeviceCount) {
return UR_RESULT_ERROR_INVALID_PROGRAM;
}
cl_uint DeviceCount = hProgram->Context->DeviceCount;
cl_uint DeviceCount = hProgram->NumDevices;
return ReturnValue(DeviceCount);
}
case UR_PROGRAM_INFO_DEVICES: {
return ReturnValue(&hProgram->Context->Devices[0],
hProgram->Context->DeviceCount);
return ReturnValue(hProgram->Devices.data(), hProgram->NumDevices);
}
case UR_PROGRAM_INFO_REFERENCE_COUNT: {
return ReturnValue(hProgram->getReferenceCount());
Expand All @@ -264,10 +247,10 @@ UR_APIEXPORT ur_result_t UR_APICALL
urProgramBuild([[maybe_unused]] ur_context_handle_t hContext,
ur_program_handle_t hProgram, const char *pOptions) {

uint32_t DeviceCount = hProgram->Context->DeviceCount;
uint32_t DeviceCount = hProgram->NumDevices;
std::vector<cl_device_id> CLDevicesInProgram(DeviceCount);
for (uint32_t i = 0; i < DeviceCount; i++) {
CLDevicesInProgram[i] = hProgram->Context->Devices[i]->CLDevice;
CLDevicesInProgram[i] = hProgram->Devices[i]->CLDevice;
}

CL_RETURN_ON_FAILURE(
Expand Down Expand Up @@ -297,7 +280,8 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count,
}
CL_RETURN_ON_FAILURE(CLResult);
try {
auto URProgram = std::make_unique<ur_program_handle_t_>(Program, hContext);
auto URProgram = std::make_unique<ur_program_handle_t_>(
Program, hContext, hContext->DeviceCount, hContext->Devices.data());
*phProgram = URProgram.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
Expand Down Expand Up @@ -445,8 +429,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramSetSpecializationConstants(
return UR_RESULT_ERROR_INVALID_CONTEXT;
}

ur_platform_handle_t CurPlatform = Ctx->Devices[0]->Platform;

if (ur::cl::getAdapter()->clSetProgramSpecializationConstant) {
for (uint32_t i = 0; i < count; ++i) {
CL_RETURN_ON_FAILURE(
Expand Down
10 changes: 8 additions & 2 deletions source/adapters/opencl/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@ struct ur_program_handle_t_ {
ur_context_handle_t Context;
std::atomic<uint32_t> RefCount = 0;
bool IsNativeHandleOwned = true;
uint32_t NumDevices = 0;
std::vector<ur_device_handle_t> Devices;

ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx)
: CLProgram(Prog), Context(Ctx) {
ur_program_handle_t_(native_type Prog, ur_context_handle_t Ctx,
uint32_t NumDevices, ur_device_handle_t *Devs)
: CLProgram(Prog), Context(Ctx), NumDevices(NumDevices) {
RefCount = 1;
urContextRetain(Context);
for (uint32_t i = 0; i < NumDevices; i++) {
Devices.push_back(Devs[i]);
}
}

~ur_program_handle_t_() {
Expand Down
39 changes: 20 additions & 19 deletions source/adapters/opencl/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
// Have to look up the context from the kernel
cl_context CLContext = hQueue->Context->CLContext;

cl_int CLErr = CL_SUCCESS;
clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr;
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<clGetMemAllocInfoINTEL_fn>(
CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache,
Expand Down Expand Up @@ -405,9 +406,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
// We need a queue associated with each device, so first figure out which
// one we weren't given.
cl_device_id QueueDevice = nullptr;
CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(
hQueue->CLQueue, CL_QUEUE_DEVICE,
sizeof(QueueDevice), &QueueDevice, nullptr));
CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(hQueue->CLQueue, CL_QUEUE_DEVICE,
sizeof(QueueDevice),
&QueueDevice, nullptr));

cl_command_queue MissingQueue = nullptr, SrcQueue = nullptr,
DstQueue = nullptr;
Expand Down Expand Up @@ -451,9 +452,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->CLEvent;
}
UR_RETURN_ON_FAILURE(checkCLErr(USMMemcpy(
SrcQueue, blocking, HostAlloc, pSrc, size, numEventsInWaitList,
CLWaitEvents.data(), &HostCopyEvent)));
UR_RETURN_ON_FAILURE(checkCLErr(
USMMemcpy(SrcQueue, blocking, HostAlloc, pSrc, size,
numEventsInWaitList, CLWaitEvents.data(), &HostCopyEvent)));

UR_RETURN_ON_FAILURE(
checkCLErr(USMMemcpy(DstQueue, blocking, pDst, HostAlloc, size, 1,
Expand Down Expand Up @@ -517,19 +518,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->CLEvent;
}
CL_RETURN_ON_FAILURE(
USMMemcpy(hQueue->CLQueue, blocking, pDst,
pSrc, size, numEventsInWaitList,
CLWaitEvents.data(),
&Event));
try {
auto UREvent = std::make_unique<ur_event_handle_t_>(
Event, hQueue->Context, hQueue);
*phEvent = UREvent.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
CL_RETURN_ON_FAILURE(USMMemcpy(hQueue->CLQueue, blocking, pDst, pSrc, size,
numEventsInWaitList, CLWaitEvents.data(),
&Event));
if (phEvent) {
try {
auto UREvent = std::make_unique<ur_event_handle_t_>(
Event, hQueue->Context, hQueue);
*phEvent = UREvent.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}
}

Expand Down

0 comments on commit 1912b88

Please sign in to comment.