forked from ville-k/sycl_starter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_sycl.cc
80 lines (66 loc) · 2.6 KB
/
main_sycl.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#include <iostream>
#include <CL/sycl.hpp>
using namespace std;
using namespace cl::sycl;
template<typename GeneratedType>
struct Generator {
Generator(size_t start, size_t length)
: offset_(0), start_(start), length_(length) {
}
GeneratedType generate() {
GeneratedType generated(start_ + offset_);
offset_ = (offset_ + 1) % length_;
return generated;
}
size_t offset_;
size_t start_;
size_t length_;
};
int main(int argc, char **argv) {
default_selector selector;
// Use host implementation for easy debugging
//host_selector selector;
queue queue(selector);
const size_t ROWS = 4;
const size_t COLS = 4;
float data[ROWS * COLS] = {
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0
};
Generator<float> host_generator(0, 4);
for (size_t i = 0; i < 8; ++i) {
cout << host_generator.generate() << " " << std::endl;
}
buffer<float, 2> buffer(data, range<2>(ROWS, COLS));
queue.submit([&](handler &command_group_handler) {
auto access = buffer.get_access<access::mode::read_write>(command_group_handler);
range<2> work_groups(2, 2);
command_group_handler.parallel_for_work_group<class matrix_map_g>(
work_groups, range<2>(2,2),
[=](group<2> group_id) {
auto group_row = group_id.get(0);
auto group_col = group_id.get(1);
Generator<float> device_generator(0, 4);
parallel_for_work_item(group_id, [&](item<2> item_id) {
for (size_t row_offset = 0; row_offset < group_id.get_group_range()[0]; ++row_offset) {
for (size_t col_offset = 0; col_offset < group_id.get_group_range()[1]; ++col_offset) {
size_t matrix_row = group_row * group_id.get_group_range()[0] + row_offset;
size_t matrix_col = group_col * group_id.get_group_range()[1] + col_offset;
access[matrix_row][matrix_col] = device_generator.generate();
}
}
});
});
});
cout << "Waiting for kernel to finish..." << std::endl;
queue.wait();
accessor<float, 2, access::mode::read, access::target::host_buffer> host_accessor(buffer);
for (size_t i = 0; i < ROWS; i++) {
for (size_t j = 0; j < COLS; j++) {
cout << " " << host_accessor[i][j];
}
cout << std::endl;
}
}