diff --git a/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp b/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp deleted file mode 100644 index d3fb670b6bb7..000000000000 --- a/sycl/test-e2e/NewOffloadDriver/diamond_shape.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// REQUIRES: fusion - -// RUN: %{build} %{embed-ir} -O2 --offload-new-driver -o %t.out -// RUN: %{run} %t.out - -// Test complete fusion with private internalization specified on the -// accessors for a combination of four kernels, forming a diamond-like shape and -// repeating one of the kernels. This test uses the new offloading model for -// linking device objects. - -#include -#include -#include - -using namespace sycl; - -struct AddKernel { - accessor accIn1; - accessor accIn2; - accessor accOut; - - void operator()(id<1> i) const { accOut[i] = accIn1[i] + accIn2[i]; } -}; - -int main() { - constexpr size_t dataSize = 512; - int in1[dataSize], in2[dataSize], in3[dataSize], tmp1[dataSize], - tmp2[dataSize], tmp3[dataSize], out[dataSize]; - - for (size_t i = 0; i < dataSize; ++i) { - in1[i] = i * 2; - in2[i] = i * 3; - in3[i] = i * 4; - tmp1[i] = -1; - tmp2[i] = -1; - tmp3[i] = -1; - out[i] = -1; - } - - queue q{ext::codeplay::experimental::property::queue::enable_fusion{}}; - - { - buffer bIn1{in1, range{dataSize}}; - buffer bIn2{in2, range{dataSize}}; - buffer bIn3{in3, range{dataSize}}; - buffer bTmp1{tmp1, range{dataSize}}; - buffer bTmp2{tmp2, range{dataSize}}; - buffer bTmp3{tmp3, range{dataSize}}; - buffer bOut{out, range{dataSize}}; - - ext::codeplay::experimental::fusion_wrapper fw{q}; - fw.start_fusion(); - - assert(fw.is_in_fusion_mode() && "Queue should be in fusion mode"); - - q.submit([&](handler &cgh) { - auto accIn1 = bIn1.get_access(cgh); - auto accIn2 = bIn2.get_access(cgh); - auto accTmp1 = bTmp1.get_access( - cgh, sycl::ext::codeplay::experimental::property::promote_private{}); - cgh.parallel_for(dataSize, AddKernel{accIn1, accIn2, accTmp1}); - }); - - q.submit([&](handler &cgh) { - auto accTmp1 = bTmp1.get_access( - cgh, sycl::ext::codeplay::experimental::property::promote_private{}); - auto accIn3 = bIn3.get_access(cgh); - auto accTmp2 = bTmp2.get_access( - cgh, sycl::ext::codeplay::experimental::property::promote_private{}); - cgh.parallel_for( - dataSize, [=](id<1> i) { accTmp2[i] = accTmp1[i] * accIn3[i]; }); - }); - - q.submit([&](handler &cgh) { - auto accTmp1 = bTmp1.get_access( - cgh, sycl::ext::codeplay::experimental::property::promote_private{}); - auto accTmp3 = bTmp3.get_access( - cgh, sycl::ext::codeplay::experimental::property::promote_private{}); - cgh.parallel_for( - dataSize, [=](id<1> i) { accTmp3[i] = accTmp1[i] * 5; }); - }); - - q.submit([&](handler &cgh) { - auto accTmp2 = bTmp2.get_access( - cgh, sycl::ext::codeplay::experimental::property::promote_private{}); - auto accTmp3 = bTmp3.get_access( - cgh, sycl::ext::codeplay::experimental::property::promote_private{}); - auto accOut = bOut.get_access(cgh); - cgh.parallel_for(dataSize, - AddKernel{accTmp2, accTmp3, accOut}); - }); - - fw.complete_fusion({ext::codeplay::experimental::property::no_barriers{}}); - - assert(!fw.is_in_fusion_mode() && - "Queue should not be in fusion mode anymore"); - } - - // Check the results - for (size_t i = 0; i < dataSize; ++i) { - assert(out[i] == (20 * i * i + i * 25) && "Computation error"); - assert(tmp1[i] == -1 && "tmp1 not internalized"); - assert(tmp2[i] == -1 && "tmp2 not internalized"); - assert(tmp3[i] == -1 && "tmp3 not internalized"); - } - - return 0; -} - diff --git a/sycl/test-e2e/README.md b/sycl/test-e2e/README.md index e04ebdc6d957..15d09b0d9012 100644 --- a/sycl/test-e2e/README.md +++ b/sycl/test-e2e/README.md @@ -229,7 +229,6 @@ environment: * **dump_ir**: - compiler can / cannot dump IR; * **llvm-spirv** - llvm-spirv tool availability; * **llvm-link** - llvm-link tool availability; - * **fusion**: - Runtime supports kernel fusion; * **aspect-\**: - SYCL aspects supported by a device; * **arch-\** - [SYCL architecture](https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc) of a device (e.g. `arch-intel_gpu_pvc`, the name matches what you can pass into `-fsycl-targets` compiler flag);