Skip to content
This repository has been archived by the owner on Jan 20, 2024. It is now read-only.

[OpenMP] Add OpenMP v6.0 API Routines omp_target_memset() and omp_target_memset_sync() #239

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions openmp/libomptarget/include/omptarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
const size_t *DstDimensions,
const size_t *SrcDimensions, int DstDevice,
int SrcDevice);
void *omp_target_memset(void *Ptr, int C, size_t N, int DeviceNum);
int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
size_t Size, size_t DeviceOffset, int DeviceNum);
int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum);
Expand Down
133 changes: 109 additions & 24 deletions openmp/libomptarget/src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
}

// The helper function that calls omp_target_memcpy or omp_target_memcpy_rect
static int libomp_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
static int libomp_target_memcpy_async_task(kmp_int32 Gtid, kmp_task_t *Task) {
if (Task == nullptr)
return OFFLOAD_FAIL;

Expand Down Expand Up @@ -241,47 +241,130 @@ static int libomp_target_memcpy_async_helper(kmp_int32 Gtid, kmp_task_t *Task) {
return Rc;
}

// Allocate and launch helper task
static int libomp_helper_task_creation(TargetMemcpyArgsTy *Args,
int DepObjCount,
omp_depend_t *DepObjList) {
static int libomp_target_memset_async_task(kmp_int32 Gtid, kmp_task_t *Task) {
if (!Task)
return OFFLOAD_FAIL;

auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds);
if (!Args) {
return OFFLOAD_FAIL;
}

// call omp_target_memset()
omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum);

delete Args;

return OFFLOAD_SUCCESS;
}

static inline void
ConvertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount,
omp_depend_t *DepObjList) {
for (int i = 0; i < DepObjCount; ++i) {
omp_depend_t DepObj = DepObjList[i];
Vec.push_back(*((kmp_depend_info_t *)DepObj));
}
}

template <class T>
static inline int
libomp_helper_task_creation(T *Args, int (*Fn)(kmp_int32, kmp_task_t *),
int DepObjCount, omp_depend_t *DepObjList) {
// Create global thread ID
int Gtid = __kmpc_global_thread_num(nullptr);
int (*Fn)(kmp_int32, kmp_task_t *) = &libomp_target_memcpy_async_helper;

// Setup the hidden helper flags;
// Setup the hidden helper flags
kmp_int32 Flags = 0;
kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
InputFlags->hidden_helper = 1;

// Alloc helper task
kmp_task_t *Ptr = __kmpc_omp_target_task_alloc(nullptr, Gtid, Flags,
sizeof(kmp_task_t), 0, Fn, -1);

if (Ptr == nullptr) {
// Task allocation failed, delete the argument object
// Alloc the helper task
kmp_task_t *Task = __kmpc_omp_target_task_alloc(
nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1);
if (!Task) {
delete Args;

return OFFLOAD_FAIL;
}

// Setup the arguments passed to helper task
Ptr->shareds = Args;
// Setup the arguments for the helper task
Task->shareds = Args;

// Convert the type of depend objects
// Convert types of depend objects
llvm::SmallVector<kmp_depend_info_t> DepObjs;
for (int i = 0; i < DepObjCount; i++) {
omp_depend_t DepObj = DepObjList[i];
DepObjs.push_back(*((kmp_depend_info_t *)DepObj));
}
ConvertDepObjVector(DepObjs, DepObjCount, DepObjList);

// Launch the helper task
int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Ptr, DepObjCount,
int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount,
DepObjs.data(), 0, nullptr);

return Rc;
}

EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes,
int DeviceNum) {
TIMESCOPE();
DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n",
DeviceNum, Ptr, NumBytes);

// Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
// of unspecified behavior, see OpenMP spec).
if (!Ptr || NumBytes == 0) {
return Ptr;
}

if (DeviceNum == omp_get_initial_device()) {
DP("filling memory on host via memset");
memset(Ptr, ByteVal, NumBytes); // ignore return value, memset() cannot fail
} else {
// TODO: replace the omp_target_memset() slow path with the fast path.
// That will require the ability to execute a kernel from within
// libomptarget.so (which we do not have at the moment).

// This is a very slow path: create a filled array on the host and upload
// it to the GPU device.
int InitialDevice = omp_get_initial_device();
void *Shadow = omp_target_alloc(NumBytes, InitialDevice);
if (Shadow) {
(void)memset(Shadow, ByteVal, NumBytes);
(void)omp_target_memcpy(Ptr, Shadow, NumBytes, 0, 0, DeviceNum,
InitialDevice);
(void)omp_target_free(Shadow, InitialDevice);
} else {
// If the omp_target_alloc has failed, let's just not do anything.
// omp_target_memset does not have any good way to fail, so we
// simply avoid a catastrophic failure of the process for now.
DP("omp_target_memset failed to fill memory due to error with "
"omp_target_alloc");
}
}

DP("omp_target_memset returns %p\n", Ptr);
return Ptr;
}

EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes,
int DeviceNum, int DepObjCount,
omp_depend_t *DepObjList) {
DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu",
DeviceNum, Ptr, NumBytes);

// Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
// of unspecified behavior, see OpenMP spec).
if (!Ptr || NumBytes == 0)
return Ptr;

// Create the task object to deal with the async invocation
auto *Args = new TargetMemsetArgsTy{Ptr, ByteVal, NumBytes, DeviceNum};

// omp_target_memset_async() cannot fail via a return code, so ignore the
// return code of the helper function
(void)libomp_helper_task_creation(Args, &libomp_target_memset_async_task,
DepObjCount, DepObjList);

return Ptr;
}

EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
size_t DstOffset, size_t SrcOffset,
int DstDevice, int SrcDevice,
Expand All @@ -302,7 +385,8 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice);

// Create and launch helper task
int Rc = libomp_helper_task_creation(Args, DepObjCount, DepObjList);
int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task,
DepObjCount, DepObjList);

DP("omp_target_memcpy_async returns %d\n", Rc);
return Rc;
Expand Down Expand Up @@ -399,7 +483,8 @@ EXTERN int omp_target_memcpy_rect_async(
DstDimensions, SrcDimensions, DstDevice, SrcDevice);

// Create and launch helper task
int Rc = libomp_helper_task_creation(Args, DepObjCount, DepObjList);
int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task,
DepObjCount, DepObjList);

DP("omp_target_memcpy_rect_async returns %d\n", Rc);
return Rc;
Expand Down
2 changes: 2 additions & 0 deletions openmp/libomptarget/src/exports
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ VERS1.0 {
omp_target_memcpy_rect;
omp_target_memcpy_async;
omp_target_memcpy_rect_async;
omp_target_memset;
omp_target_memset_async;
omp_target_associate_ptr;
omp_target_disassociate_ptr;
llvm_omp_target_alloc_host;
Expand Down
13 changes: 13 additions & 0 deletions openmp/libomptarget/src/private.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,19 @@ struct TargetMemcpyArgsTy {
DstOffsets(DstOffsets), SrcOffsets(SrcOffsets),
DstDimensions(DstDimensions), SrcDimensions(SrcDimensions){};
};

struct TargetMemsetArgsTy {
/**
* Common attributes of a memset operation
*/
void *Ptr;
int C;
size_t N;
int DeviceNum;

// no constructors defined, because this is a PoD
};

// Invalid GTID as defined by libomp; keep in sync
#define KMP_GTID_DNE (-2)
#ifdef __cplusplus
Expand Down
45 changes: 45 additions & 0 deletions openmp/libomptarget/test/api/omp_target_memset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// RUN: %libomptarget-compile-and-run-generic

#include "stdio.h"
#include <omp.h>
#include <stdlib.h>

int main() {
int d = omp_get_default_device();
int id = omp_get_initial_device();
int q[128], i;
void *p;
void *result;

if (d < 0 || d >= omp_get_num_devices())
d = id;

p = omp_target_alloc(130 * sizeof(int), d);
if (p == NULL)
return 0;

for (i = 0; i < 128; i++)
q[i] = i;

result = omp_target_memset(p, 0, 130 * sizeof(int), d);
if (result != p) {
abort();
}

int q2[128];
for (i = 0; i < 128; ++i)
q2[i] = i;
if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
0, NULL))
abort();

#pragma omp taskwait

for (i = 0; i < 128; ++i)
if (q2[i] != 0)
abort();

omp_target_free(p, d);

return 0;
}
2 changes: 2 additions & 0 deletions openmp/runtime/src/dllexports
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,8 @@ kmp_set_warnings_off 780
omp_target_memcpy_rect 887
omp_target_associate_ptr 888
omp_target_disassociate_ptr 889
omp_target_memset 3000
omp_target_memset_async 3001
%endif

kmp_set_disp_num_buffers 890
Expand Down
5 changes: 5 additions & 0 deletions openmp/runtime/src/include/omp.h.var
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,11 @@
extern int __KAI_KMPC_CONVENTION omp_target_memcpy_rect_async(void *, const void *, size_t, int, const size_t *,
const size_t *, const size_t *, const size_t *, const size_t *, int, int,
int, omp_depend_t *);

/* OpenMP 6.0 device memory routines */
extern void * __KAI_KMPC_CONVENTION omp_target_memset(void *, int, size_t, int);
extern void * __KAI_KMPC_CONVENTION omp_target_memset_async(void *, int, size_t, int, int, omp_depend_t *);

/*!
* The `omp_get_mapped_ptr` routine returns the device pointer that is associated with a host pointer for a given device.
*/
Expand Down
22 changes: 22 additions & 0 deletions openmp/runtime/src/include/omp_lib.f90.var
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,28 @@
integer (omp_depend_kind), optional :: depobj_list(*)
end function omp_target_memcpy_rect_async

function omp_target_memset(ptr, val, count, device_num) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
type(c_ptr) :: omp_target_memset
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
end function

function omp_target_memset_async(ptr, val, count, device_num, &
depobj_count, depobj_list) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
use omp_lib_kinds
type(c_ptr) :: omp_target_memset_async
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
integer(c_int), value :: depobj_count
integer(omp_depend_kind), optional :: depobj_list(*)
end function

function omp_target_associate_ptr(host_ptr, device_ptr, size, &
device_offset, device_num) bind(c)
use omp_lib_kinds
Expand Down
22 changes: 22 additions & 0 deletions openmp/runtime/src/include/omp_lib.h.var
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,28 @@
integer(omp_depend_kind), optional :: depobj_list(*)
end function omp_target_memcpy_rect_async

function omp_target_memset(ptr, val, count, device_num) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
type(c_ptr) :: omp_target_memset
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
end function

function omp_target_memset_async(ptr, val, count, device_num, &
depobj_count, depobj_list) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_int, c_size_t
use omp_lib_kinds
type(c_ptr) :: omp_target_memset_async
type(c_ptr), value :: ptr
integer(c_int), value :: val
integer(c_size_t), value :: count
integer(c_int), value :: device_num
integer(c_int), value :: depobj_count
integer(omp_depend_kind), optional :: depobj_list(*)
end function

function omp_target_associate_ptr(host_ptr, device_ptr, size, &
& device_offset, device_num) bind(c)
use, intrinsic :: iso_c_binding, only : c_ptr, c_size_t, c_int
Expand Down
2 changes: 2 additions & 0 deletions openmp/runtime/src/kmp_ftn_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@
#define FTN_TARGET_IS_PRESENT omp_target_is_present
#define FTN_TARGET_MEMCPY omp_target_memcpy
#define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect
#define FTN_TARGET_MEMSET omp_target_memset
#define FTN_TARGET_MEMSET_ASYNC omp_target_memset_async
#define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr
#define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr
#endif
Expand Down
Loading