-
Notifications
You must be signed in to change notification settings - Fork 102
/
solution.cpp
129 lines (101 loc) · 3.46 KB
/
solution.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
/*
SYCL Academy (c)
SYCL Academy is licensed under a Creative Commons
Attribution-ShareAlike 4.0 International License.
You should have received a copy of the license along with this
work. If not, see <http://creativecommons.org/licenses/by-sa/4.0/>.
*/
#include "../helpers.hpp"
#include <sycl/sycl.hpp>
class kernel_a_1;
class kernel_b_1;
class kernel_a_2;
class kernel_b_2;
int usm_selector(const sycl::device& dev) {
if (dev.has(sycl::aspect::usm_device_allocations)) {
if (dev.has(sycl::aspect::gpu))
return 2;
return 1;
}
return -1;
}
void test_buffer() {
constexpr size_t dataSize = 1024;
float in[dataSize], out[dataSize];
for (int i = 0; i < dataSize; ++i) {
in[i] = static_cast<float>(i);
out[i] = 0.0f;
}
try {
auto gpuQueue = sycl::queue { sycl::gpu_selector_v };
auto bufIn = sycl::buffer { in, sycl::range { dataSize } };
auto bufInt = sycl::buffer<float> { sycl::range { dataSize } };
auto bufOut = sycl::buffer<float> { sycl::range { dataSize } };
bufIn.set_final_data(nullptr);
bufOut.set_final_data(out);
gpuQueue.submit([&](sycl::handler& cgh) {
sycl::accessor accIn { bufIn, cgh, sycl::read_only };
sycl::accessor accOut { bufInt, cgh, sycl::write_only };
cgh.parallel_for<kernel_a_1>(
sycl::range { dataSize },
[=](sycl::id<1> idx) { accOut[idx] = accIn[idx] * 8.0f; });
});
gpuQueue.submit([&](sycl::handler& cgh) {
sycl::accessor accIn { bufInt, cgh, sycl::read_only };
sycl::accessor accOut { bufOut, cgh, sycl::write_only };
cgh.parallel_for<kernel_b_1>(
sycl::range { dataSize },
[=](sycl::id<1> idx) { accOut[idx] = accIn[idx] / 2.0f; });
});
gpuQueue.wait_and_throw();
} catch (const sycl::exception& e) {
std::cout << "Exception caught: " << e.what() << std::endl;
}
for (int i = 0; i < dataSize; ++i) {
SYCLACADEMY_ASSERT(out[i] == i * 4.0f);
}
}
void test_usm() {
constexpr size_t dataSize = 1024;
float in[dataSize], out[dataSize];
for (int i = 0; i < dataSize; ++i) {
in[i] = static_cast<float>(i);
out[i] = 0.0f;
}
try {
auto usmQueue = sycl::queue { usm_selector };
auto devicePtrIn = sycl::malloc_device<float>(dataSize, usmQueue);
auto devicePtrInt = sycl::malloc_device<float>(dataSize, usmQueue);
auto devicePtrOut = sycl::malloc_device<float>(dataSize, usmQueue);
auto e1 = usmQueue.memcpy(devicePtrIn, in, sizeof(float) * dataSize);
auto e2 = usmQueue.parallel_for<kernel_a_2>(
sycl::range { dataSize }, e1, [=](sycl::id<1> idx) {
auto globalId = idx[0];
devicePtrInt[globalId] = devicePtrIn[globalId] * 8.0f;
});
auto e3 = usmQueue.parallel_for<kernel_b_2>(
sycl::range { dataSize }, e2, [=](sycl::id<1> idx) {
auto globalId = idx[0];
devicePtrOut[globalId] = devicePtrInt[globalId] / 2.0f;
});
usmQueue
.submit([&](sycl::handler& cgh) {
cgh.depends_on(e3);
cgh.memcpy(out, devicePtrOut, sizeof(float) * dataSize);
})
.wait();
sycl::free(devicePtrIn, usmQueue);
sycl::free(devicePtrInt, usmQueue);
sycl::free(devicePtrOut, usmQueue);
usmQueue.throw_asynchronous();
} catch (const sycl::exception& e) {
std::cout << "Exception caught: " << e.what() << std::endl;
}
for (int i = 0; i < dataSize; ++i) {
SYCLACADEMY_ASSERT(out[i] == i * 4.0f);
}
}
int main() {
test_usm();
test_buffer();
}