From e9c87fa6ab1262189ec3d1b47e55ab7c69497b67 Mon Sep 17 00:00:00 2001 From: "Tang, Jiajun" Date: Wed, 6 Mar 2024 15:53:47 +0800 Subject: [PATCH 1/2] [SYCLomatic] Refine bindless image helper functions. Signed-off-by: Tang, Jiajun jiajun.tang@intel.com --- clang/lib/DPCT/ASTTraversal.cpp | 2 +- .../dpct-rt/include/dpct/bindless_images.hpp | 63 ++++++++++--------- .../texture/texture_object_bindless_image.cu | 43 +++++++------ .../texture_reference_bindless_image.cu | 10 +++ 4 files changed, 69 insertions(+), 49 deletions(-) diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 615a1d8121ce..9ac561b6722e 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -13261,7 +13261,7 @@ bool TextureRule::tryMerge(const MemberExpr *ME, const Expr *BO) { void TextureRule::replaceTextureMember(const MemberExpr *ME, ASTContext &Context, SourceManager &SM) { auto AssignedBO = getParentAsAssignedBO(ME, Context); - if (tryMerge(ME, AssignedBO)) + if (!DpctGlobalInfo::useExtBindlessImages() && tryMerge(ME, AssignedBO)) return; auto Field = ME->getMemberNameInfo().getAsString(); diff --git a/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp b/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp index f6907db1e119..bc7a8acf3457 100644 --- a/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp +++ b/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp @@ -50,7 +50,7 @@ class image_mem_wrapper { /// memory. template image_mem_wrapper(image_channel channel, Args... size) - : image_mem_wrapper(channel, sycl::range{size...}) {} + : image_mem_wrapper(channel, sycl::range{(size_t)size...}) {} image_mem_wrapper(const image_mem_wrapper &) = delete; image_mem_wrapper &operator=(const image_mem_wrapper &) = delete; /// Destroy bindless image memory wrapper. @@ -218,7 +218,8 @@ dpct_memcpy(sycl::ext::oneapi::experimental::image_mem_handle src, } static inline sycl::event -dpct_memcpy(void *src, sycl::ext::oneapi::experimental::image_mem_handle dest, +dpct_memcpy(const void *src, + sycl::ext::oneapi::experimental::image_mem_handle dest, const sycl::ext::oneapi::experimental::image_descriptor &desc_dest, size_t w_offset_dest, size_t h_offset_dest, size_t p, size_t w, size_t h, sycl::queue q) { @@ -228,12 +229,13 @@ dpct_memcpy(void *src, sycl::ext::oneapi::experimental::image_mem_handle dest, const auto dest_offset = sycl::range<3>(w_offset_dest / ele_size, h_offset_dest, 0); const auto copy_extend = sycl::range<3>(w / ele_size, h, 0); - return q.ext_oneapi_copy(src, src_offset, src_extend, dest, dest_offset, - desc_dest, copy_extend); + return q.ext_oneapi_copy(const_cast(src), src_offset, src_extend, + dest, dest_offset, desc_dest, copy_extend); } static inline std::vector -dpct_memcpy(void *src, sycl::ext::oneapi::experimental::image_mem_handle dest, +dpct_memcpy(const void *src, + sycl::ext::oneapi::experimental::image_mem_handle dest, const sycl::ext::oneapi::experimental::image_descriptor &desc_dest, size_t w_offset_dest, size_t h_offset_dest, size_t s, sycl::queue q = get_default_queue()) { @@ -248,9 +250,9 @@ dpct_memcpy(void *src, sycl::ext::oneapi::experimental::image_mem_handle dest, sycl::range<3>(w_offset_dest / ele_size, h_offset_dest, 0); const auto copy_extend = sycl::range<3>((w - w_offset_dest) / ele_size, 1, 0); - event_list.push_back(q.ext_oneapi_copy(src, src_offset, src_extend, dest, - dest_offset, desc_dest, - copy_extend)); + event_list.push_back(q.ext_oneapi_copy(const_cast(src), src_offset, + src_extend, dest, dest_offset, + desc_dest, copy_extend)); offset_src += w - w_offset_dest; w_offset_dest = 0; ++h_offset_dest; @@ -261,8 +263,9 @@ dpct_memcpy(void *src, sycl::ext::oneapi::experimental::image_mem_handle dest, sycl::range<3>(w_offset_dest / ele_size, h_offset_dest, 0); const auto copy_extend = sycl::range<3>((s - offset_src - w_offset_dest) / ele_size, 1, 0); - event_list.push_back(q.ext_oneapi_copy(src, src_offset, src_extend, dest, - dest_offset, desc_dest, copy_extend)); + event_list.push_back(q.ext_oneapi_copy(const_cast(src), src_offset, + src_extend, dest, dest_offset, + desc_dest, copy_extend)); return event_list; } @@ -595,7 +598,7 @@ template class bindless_image_wrapper { /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(image_mem_wrapper *src, +static inline void async_dpct_memcpy(const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, void *dest, size_t p, size_t w, size_t h, sycl::queue q = get_default_queue()) { @@ -613,9 +616,9 @@ static inline void async_dpct_memcpy(image_mem_wrapper *src, /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src, - size_t h_offset_src, void *dest, size_t p, - size_t w, size_t h, +static inline void dpct_memcpy(const image_mem_wrapper *src, + size_t w_offset_src, size_t h_offset_src, + void *dest, size_t p, size_t w, size_t h, sycl::queue q = get_default_queue()) { detail::dpct_memcpy(src->get_handle(), src->get_desc(), w_offset_src, h_offset_src, dest, p, w, h, q) @@ -631,7 +634,7 @@ static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src, /// \param [in] dest The destination memory address. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(image_mem_wrapper *src, +static inline void async_dpct_memcpy(const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, void *dest, size_t s, sycl::queue q = get_default_queue()) { @@ -647,8 +650,9 @@ static inline void async_dpct_memcpy(image_mem_wrapper *src, /// \param [in] dest The destination memory address. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src, - size_t h_offset_src, void *dest, size_t s, +static inline void dpct_memcpy(const image_mem_wrapper *src, + size_t w_offset_src, size_t h_offset_src, + void *dest, size_t s, sycl::queue q = get_default_queue()) { sycl::event::wait(detail::dpct_memcpy(src->get_handle(), src->get_desc(), w_offset_src, h_offset_src, dest, s, @@ -665,7 +669,7 @@ static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src, /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(void *src, image_mem_wrapper *dest, +static inline void async_dpct_memcpy(const void *src, image_mem_wrapper *dest, size_t w_offset_dest, size_t h_offset_dest, size_t p, size_t w, size_t h, sycl::queue q = get_default_queue()) { @@ -683,7 +687,7 @@ static inline void async_dpct_memcpy(void *src, image_mem_wrapper *dest, /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(void *src, image_mem_wrapper *dest, +static inline void dpct_memcpy(const void *src, image_mem_wrapper *dest, size_t w_offset_dest, size_t h_offset_dest, size_t p, size_t w, size_t h, sycl::queue q = get_default_queue()) { @@ -700,7 +704,7 @@ static inline void dpct_memcpy(void *src, image_mem_wrapper *dest, /// \param [in] h_offset_dest The y offset of destination image memory. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(void *src, image_mem_wrapper *dest, +static inline void async_dpct_memcpy(const void *src, image_mem_wrapper *dest, size_t w_offset_dest, size_t h_offset_dest, size_t s, sycl::queue q = get_default_queue()) { @@ -716,7 +720,7 @@ static inline void async_dpct_memcpy(void *src, image_mem_wrapper *dest, /// \param [in] h_offset_dest The y offset of destination image memory. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(void *src, image_mem_wrapper *dest, +static inline void dpct_memcpy(const void *src, image_mem_wrapper *dest, size_t w_offset_dest, size_t h_offset_dest, size_t s, sycl::queue q = get_default_queue()) { sycl::event::wait(detail::dpct_memcpy(src, dest->get_handle(), @@ -735,10 +739,10 @@ static inline void dpct_memcpy(void *src, image_mem_wrapper *dest, /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src, - size_t h_offset_src, image_mem_wrapper *dest, - size_t w_offset_dest, size_t h_offset_dest, - size_t w, size_t h, +static inline void dpct_memcpy(const image_mem_wrapper *src, + size_t w_offset_src, size_t h_offset_src, + image_mem_wrapper *dest, size_t w_offset_dest, + size_t h_offset_dest, size_t w, size_t h, sycl::queue q = get_default_queue()) { auto temp = (void *)sycl::malloc_device(w * h, q); // TODO: Need change logic when sycl support image_mem to image_mem copy. @@ -757,10 +761,11 @@ static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src, /// \param [in] h_offset_dest The y offset of destination image memory. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(image_mem_wrapper *src, size_t w_offset_src, - size_t h_offset_src, image_mem_wrapper *dest, - size_t w_offset_dest, size_t h_offset_dest, - size_t s, sycl::queue q = get_default_queue()) { +static inline void dpct_memcpy(const image_mem_wrapper *src, + size_t w_offset_src, size_t h_offset_src, + image_mem_wrapper *dest, size_t w_offset_dest, + size_t h_offset_dest, size_t s, + sycl::queue q = get_default_queue()) { auto temp = (void *)sycl::malloc_device(s, q); // TODO: Need change logic when sycl support image_mem to image_mem copy. dpct_memcpy(src, w_offset_src, h_offset_src, temp, s, q); diff --git a/clang/test/dpct/texture/texture_object_bindless_image.cu b/clang/test/dpct/texture/texture_object_bindless_image.cu index c0c44507b2dc..96cea96bc831 100644 --- a/clang/test/dpct/texture/texture_object_bindless_image.cu +++ b/clang/test/dpct/texture/texture_object_bindless_image.cu @@ -69,12 +69,15 @@ void driver() { } int main() { - void *input; + const void *input; + void *output; size_t w, h, sizeInBytes, w_offest_src, h_offest_src, w_offest_dest, h_offest_dest; unsigned int flag, l; cudaExtent e; - // CHECK: dpct::experimental::image_mem_wrapper_ptr pArr, pArr_src; - cudaArray_t pArr, pArr_src; + // CHECK: dpct::experimental::image_mem_wrapper_ptr pArr; + cudaArray_t pArr; + // CHECK: const dpct::experimental::image_mem_wrapper *pArr_src; + const cudaArray *pArr_src; // CHECK: dpct::experimental::image_mem_wrapper_ptr pMipMapArr; cudaMipmappedArray_t pMipMapArr; // CHECK: dpct::image_channel desc; @@ -83,6 +86,8 @@ int main() { cudaMalloc3DArray(&pArr, &desc, e); // CHECK: pArr = new dpct::experimental::image_mem_wrapper(desc, w, h); cudaMallocArray(&pArr, &desc, w, h); + // CHECK: pArr = new dpct::experimental::image_mem_wrapper(desc, 1, 0.1); + cudaMallocArray(&pArr, &desc, 1, 0.1); // CHECK: pMipMapArr = new dpct::experimental::image_mem_wrapper(desc, e, sycl::ext::oneapi::experimental::image_type::mipmap, l); cudaMallocMipmappedArray(&pMipMapArr, &desc, e, l, flag); // CHECK: pArr = pMipMapArr->get_mip_level(0); @@ -95,11 +100,11 @@ int main() { cudaMemcpy2DArrayToArray(pArr, w_offest_dest, h_offest_dest, pArr_src, w_offest_src, h_offest_src, w, h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_src, h_offest_src, input, w, w, h, q_ct1); - cudaMemcpy2DFromArray(input, w, pArr, w_offest_src, h_offest_src, w, h, + // CHECK: dpct::experimental::dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w, w, h, q_ct1); + cudaMemcpy2DFromArray(output, w, pArr_src, w_offest_src, h_offest_src, w, h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::async_dpct_memcpy(pArr, w_offest_src, h_offest_src, input, w, w, h, q_ct1); - cudaMemcpy2DFromArrayAsync(input, w, pArr, w_offest_src, h_offest_src, w, h, + // CHECK: dpct::experimental::async_dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w, w, h, q_ct1); + cudaMemcpy2DFromArrayAsync(output, w, pArr_src, w_offest_src, h_offest_src, w, h, cudaMemcpyHostToDevice); // CHECK: dpct::experimental::dpct_memcpy(input, pArr, w_offest_dest, h_offest_dest, w, w, h, q_ct1); cudaMemcpy2DToArray(pArr, w_offest_dest, h_offest_dest, input, w, w, h, @@ -111,11 +116,11 @@ int main() { cudaMemcpyArrayToArray(pArr, w_offest_dest, h_offest_dest, pArr_src, w_offest_src, h_offest_src, w * h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_src, h_offest_src, input, w * h, q_ct1); - cudaMemcpyFromArray(input, pArr, w_offest_src, h_offest_src, w * h, + // CHECK: dpct::experimental::dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w * h, q_ct1); + cudaMemcpyFromArray(output, pArr_src, w_offest_src, h_offest_src, w * h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::async_dpct_memcpy(pArr, w_offest_src, h_offest_src, input, w * h, q_ct1); - cudaMemcpyFromArrayAsync(input, pArr, w_offest_src, h_offest_src, w * h, + // CHECK: dpct::experimental::async_dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w * h, q_ct1); + cudaMemcpyFromArrayAsync(output, pArr_src, w_offest_src, h_offest_src, w * h, cudaMemcpyHostToDevice); // CHECK: dpct::experimental::dpct_memcpy(input, pArr, w_offest_dest, h_offest_dest, w * h, q_ct1); cudaMemcpyToArray(pArr, w_offest_dest, h_offest_dest, input, w * h, @@ -215,16 +220,16 @@ int main() { resDesc2.res.mipmap.mipmap = pMipMapArr; // CHECK: resDesc0.set_data_type(dpct::image_data_type::linear); resDesc0.resType = cudaResourceTypeLinear; - // CHECK: resDesc3.set_data_ptr(input); - resDesc3.res.linear.devPtr = input; + // CHECK: resDesc3.set_data_ptr(output); + resDesc3.res.linear.devPtr = output; // CHECK: resDesc3.set_channel(desc); resDesc3.res.linear.desc = desc; // CHECK: resDesc3.set_x(sizeInBytes); resDesc3.res.linear.sizeInBytes = sizeInBytes; // CHECK: resDesc0.set_data_type(dpct::image_data_type::pitch); resDesc0.resType = cudaResourceTypePitch2D; - // CHECK: resDesc4.set_data_ptr(input); - resDesc4.res.pitch2D.devPtr = input; + // CHECK: resDesc4.set_data_ptr(output); + resDesc4.res.pitch2D.devPtr = output; // CHECK: resDesc4.set_channel(desc); resDesc4.res.pitch2D.desc = desc; // CHECK: resDesc4.set_x(w); @@ -251,18 +256,18 @@ int main() { { // CHECK: dpct::image_data resDesc; cudaResourceDesc resDesc; - // CHECK: resDesc.set_data(input, sizeInBytes, desc); + // CHECK: resDesc.set_data(output, sizeInBytes, desc); resDesc.resType = cudaResourceTypeLinear; - resDesc.res.linear.devPtr = input; + resDesc.res.linear.devPtr = output; resDesc.res.linear.desc = desc; resDesc.res.linear.sizeInBytes = sizeInBytes; } { // CHECK: dpct::image_data resDesc; cudaResourceDesc resDesc; - // CHECK: resDesc.set_data(input, w, h, sizeInBytes, desc); + // CHECK: resDesc.set_data(output, w, h, sizeInBytes, desc); resDesc.resType = cudaResourceTypePitch2D; - resDesc.res.pitch2D.devPtr = input; + resDesc.res.pitch2D.devPtr = output; resDesc.res.pitch2D.desc = desc; resDesc.res.pitch2D.width = w; resDesc.res.pitch2D.height = h; diff --git a/clang/test/dpct/texture/texture_reference_bindless_image.cu b/clang/test/dpct/texture/texture_reference_bindless_image.cu index 064030012dcd..b994ef35ecdf 100644 --- a/clang/test/dpct/texture/texture_reference_bindless_image.cu +++ b/clang/test/dpct/texture/texture_reference_bindless_image.cu @@ -67,6 +67,16 @@ int main() { tex2.normalized = 0; // CHECK: i = tex2.is_coordinate_normalized(); i = tex2.normalized; + { + // CHECK: tex3.set(sycl::coordinate_normalization_mode::normalized); + tex3.normalized = true; + // CHECK: tex3.set(sycl::addressing_mode::clamp_to_edge); + tex3.addressMode[0] = cudaAddressModeClamp; + // CHECK: tex3.set(sycl::addressing_mode::clamp_to_edge); + tex3.addressMode[1] = cudaAddressModeClamp; + // CHECK: tex3.set(sycl::filtering_mode::linear); + tex3.filterMode = cudaFilterModeLinear; + } void *dataPtr; const size_t w = 4; From d84c4d845989a77277824e84793eb131a15b0c09 Mon Sep 17 00:00:00 2001 From: "Tang, Jiajun" Date: Mon, 6 May 2024 10:31:07 +0800 Subject: [PATCH 2/2] reorder memcpy param. --- clang/lib/DPCT/ASTTraversal.cpp | 84 ++-------------- .../dpct-rt/include/dpct/bindless_images.hpp | 98 ++++++++++--------- .../texture/texture_object_bindless_image.cu | 61 ++++++++---- 3 files changed, 103 insertions(+), 140 deletions(-) diff --git a/clang/lib/DPCT/ASTTraversal.cpp b/clang/lib/DPCT/ASTTraversal.cpp index 9ac561b6722e..e7979d845139 100644 --- a/clang/lib/DPCT/ASTTraversal.cpp +++ b/clang/lib/DPCT/ASTTraversal.cpp @@ -10440,7 +10440,8 @@ void MemoryMigrationRule::arrayMigration( auto Begin = getArgEndLocation(C, EndPos - 2, SM), End = getArgEndLocation(C, EndPos, SM); llvm::raw_string_ostream OS(Str); - OS << ", " << MapNames::getDpctNamespace() << "automatic"; + if (!DpctGlobalInfo::useExtBindlessImages()) + OS << ", " << MapNames::getDpctNamespace() << "automatic"; OS << ", "; DerefExpr(StreamExpr, C).print(OS); emplaceTransformation(replaceText(Begin, End, std::move(Str), SM)); @@ -10457,20 +10458,7 @@ void MemoryMigrationRule::arrayMigration( if (NameRef == "cudaMemcpy2DArrayToArray") { if (DpctGlobalInfo::useExtBindlessImages()) { - std::string Replacement; - llvm::raw_string_ostream OS(Replacement); - OS << ReplaceStr << "(" << ExprAnalysis::ref(C->getArg(3)) << ", " - << ExprAnalysis::ref(C->getArg(4)) << ", " - << ExprAnalysis::ref(C->getArg(5)) << ", " - << ExprAnalysis::ref(C->getArg(0)) << ", " - << ExprAnalysis::ref(C->getArg(1)) << ", " - << ExprAnalysis::ref(C->getArg(2)) << ", " - << ExprAnalysis::ref(C->getArg(6)) << ", " - << ExprAnalysis::ref(C->getArg(7)) << ", "; - int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc(); - buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue); - OS << "{{NEEDREPLACEQ" + std::to_string(Index) + "}})"; - emplaceTransformation(new ReplaceStmt(C, Replacement)); + emplaceTransformation(new ReplaceCalleeName(C, std::move(ReplaceStr))); } else { insertToPitchedData(C, 0); aggregate3DVectorClassCtor(C, "id", 1, "0", SM); @@ -10480,19 +10468,7 @@ void MemoryMigrationRule::arrayMigration( } } else if (NameRef == "cudaMemcpy2DFromArray") { if (DpctGlobalInfo::useExtBindlessImages()) { - std::string Replacement; - llvm::raw_string_ostream OS(Replacement); - OS << ReplaceStr << "(" << ExprAnalysis::ref(C->getArg(2)) << ", " - << ExprAnalysis::ref(C->getArg(3)) << ", " - << ExprAnalysis::ref(C->getArg(4)) << ", " - << ExprAnalysis::ref(C->getArg(0)) << ", " - << ExprAnalysis::ref(C->getArg(1)) << ", " - << ExprAnalysis::ref(C->getArg(5)) << ", " - << ExprAnalysis::ref(C->getArg(6)) << ", "; - int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc(); - buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue); - OS << "{{NEEDREPLACEQ" + std::to_string(Index) + "}})"; - emplaceTransformation(new ReplaceStmt(C, Replacement)); + emplaceTransformation(new ReplaceCalleeName(C, std::move(ReplaceStr))); } else { aggregatePitchedData(C, 0, 1, SM); insertZeroOffset(C, 2); @@ -10502,19 +10478,7 @@ void MemoryMigrationRule::arrayMigration( } } else if (NameRef == "cudaMemcpy2DToArray") { if (DpctGlobalInfo::useExtBindlessImages()) { - std::string Replacement; - llvm::raw_string_ostream OS(Replacement); - OS << ReplaceStr << "(" << ExprAnalysis::ref(C->getArg(3)) << ", " - << ExprAnalysis::ref(C->getArg(0)) << ", " - << ExprAnalysis::ref(C->getArg(1)) << ", " - << ExprAnalysis::ref(C->getArg(2)) << ", " - << ExprAnalysis::ref(C->getArg(4)) << ", " - << ExprAnalysis::ref(C->getArg(5)) << ", " - << ExprAnalysis::ref(C->getArg(6)) << ", "; - int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc(); - buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue); - OS << "{{NEEDREPLACEQ" + std::to_string(Index) + "}})"; - emplaceTransformation(new ReplaceStmt(C, Replacement)); + emplaceTransformation(new ReplaceCalleeName(C, std::move(ReplaceStr))); } else { insertToPitchedData(C, 0); aggregate3DVectorClassCtor(C, "id", 1, "0", SM); @@ -10524,19 +10488,7 @@ void MemoryMigrationRule::arrayMigration( } } else if (NameRef == "cudaMemcpyArrayToArray") { if (DpctGlobalInfo::useExtBindlessImages()) { - std::string Replacement; - llvm::raw_string_ostream OS(Replacement); - OS << ReplaceStr << "(" << ExprAnalysis::ref(C->getArg(3)) << ", " - << ExprAnalysis::ref(C->getArg(4)) << ", " - << ExprAnalysis::ref(C->getArg(5)) << ", " - << ExprAnalysis::ref(C->getArg(0)) << ", " - << ExprAnalysis::ref(C->getArg(1)) << ", " - << ExprAnalysis::ref(C->getArg(2)) << ", " - << ExprAnalysis::ref(C->getArg(6)) << ", "; - int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc(); - buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue); - OS << "{{NEEDREPLACEQ" + std::to_string(Index) + "}})"; - emplaceTransformation(new ReplaceStmt(C, Replacement)); + emplaceTransformation(new ReplaceCalleeName(C, std::move(ReplaceStr))); } else { insertToPitchedData(C, 0); aggregate3DVectorClassCtor(C, "id", 1, "0", SM); @@ -10546,17 +10498,7 @@ void MemoryMigrationRule::arrayMigration( } } else if (NameRef == "cudaMemcpyFromArray") { if (DpctGlobalInfo::useExtBindlessImages()) { - std::string Replacement; - llvm::raw_string_ostream OS(Replacement); - OS << ReplaceStr << "(" << ExprAnalysis::ref(C->getArg(1)) << ", " - << ExprAnalysis::ref(C->getArg(2)) << ", " - << ExprAnalysis::ref(C->getArg(3)) << ", " - << ExprAnalysis::ref(C->getArg(0)) << ", " - << ExprAnalysis::ref(C->getArg(4)) << ", "; - int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc(); - buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue); - OS << "{{NEEDREPLACEQ" + std::to_string(Index) + "}})"; - emplaceTransformation(new ReplaceStmt(C, Replacement)); + emplaceTransformation(new ReplaceCalleeName(C, std::move(ReplaceStr))); } else { aggregatePitchedData(C, 0, 4, SM, true); insertZeroOffset(C, 1); @@ -10566,17 +10508,7 @@ void MemoryMigrationRule::arrayMigration( } } else if (NameRef == "cudaMemcpyToArray") { if (DpctGlobalInfo::useExtBindlessImages()) { - std::string Replacement; - llvm::raw_string_ostream OS(Replacement); - OS << ReplaceStr << "(" << ExprAnalysis::ref(C->getArg(3)) << ", " - << ExprAnalysis::ref(C->getArg(0)) << ", " - << ExprAnalysis::ref(C->getArg(1)) << ", " - << ExprAnalysis::ref(C->getArg(2)) << ", " - << ExprAnalysis::ref(C->getArg(4)) << ", "; - int Index = DpctGlobalInfo::getHelperFuncReplInfoIndexThenInc(); - buildTempVariableMap(Index, C, HelperFuncType::HFT_DefaultQueue); - OS << "{{NEEDREPLACEQ" + std::to_string(Index) + "}})"; - emplaceTransformation(new ReplaceStmt(C, Replacement)); + emplaceTransformation(new ReplaceCalleeName(C, std::move(ReplaceStr))); } else { insertToPitchedData(C, 0); aggregate3DVectorClassCtor(C, "id", 1, "0", SM); diff --git a/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp b/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp index bc7a8acf3457..ab56f03182ce 100644 --- a/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp +++ b/clang/runtime/dpct-rt/include/dpct/bindless_images.hpp @@ -229,6 +229,7 @@ dpct_memcpy(const void *src, const auto dest_offset = sycl::range<3>(w_offset_dest / ele_size, h_offset_dest, 0); const auto copy_extend = sycl::range<3>(w / ele_size, h, 0); + // TODO: Remove const_cast after refining the signature of ext_oneapi_copy. return q.ext_oneapi_copy(const_cast(src), src_offset, src_extend, dest, dest_offset, desc_dest, copy_extend); } @@ -250,6 +251,7 @@ dpct_memcpy(const void *src, sycl::range<3>(w_offset_dest / ele_size, h_offset_dest, 0); const auto copy_extend = sycl::range<3>((w - w_offset_dest) / ele_size, 1, 0); + // TODO: Remove const_cast after refining the signature of ext_oneapi_copy. event_list.push_back(q.ext_oneapi_copy(const_cast(src), src_offset, src_extend, dest, dest_offset, desc_dest, copy_extend)); @@ -263,6 +265,7 @@ dpct_memcpy(const void *src, sycl::range<3>(w_offset_dest / ele_size, h_offset_dest, 0); const auto copy_extend = sycl::range<3>((s - offset_src - w_offset_dest) / ele_size, 1, 0); + // TODO: Remove const_cast after refining the signature of ext_oneapi_copy. event_list.push_back(q.ext_oneapi_copy(const_cast(src), src_offset, src_extend, dest, dest_offset, desc_dest, copy_extend)); @@ -590,17 +593,18 @@ template class bindless_image_wrapper { /// Asynchronously copies from the image memory to memory specified by a /// pointer, The return of the function does NOT guarantee the copy is /// completed. +/// \param [in] dest The destination memory address. +/// \param [in] p The pitch of destination memory. /// \param [in] src The source image memory. /// \param [in] w_offset_src The x offset of source image memory. /// \param [in] h_offset_src The y offset of source image memory. -/// \param [in] dest The destination memory address. -/// \param [in] p The pitch of destination memory. /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(const image_mem_wrapper *src, +static inline void async_dpct_memcpy(void *dest, size_t p, + const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, - void *dest, size_t p, size_t w, size_t h, + size_t w, size_t h, sycl::queue q = get_default_queue()) { detail::dpct_memcpy(src->get_handle(), src->get_desc(), w_offset_src, h_offset_src, dest, p, w, h, q); @@ -608,17 +612,18 @@ static inline void async_dpct_memcpy(const image_mem_wrapper *src, /// Synchronously copies from the image memory to memory specified by a /// pointer, The function will return after the copy is completed. +/// \param [in] dest The destination memory address. +/// \param [in] p The pitch of destination memory. /// \param [in] src The source image memory. /// \param [in] w_offset_src The x offset of source image memory. /// \param [in] h_offset_src The y offset of source image memory. -/// \param [in] dest The destination memory address. -/// \param [in] p The pitch of destination memory. /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(const image_mem_wrapper *src, +static inline void dpct_memcpy(void *dest, size_t p, + const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, - void *dest, size_t p, size_t w, size_t h, + size_t w, size_t h, sycl::queue q = get_default_queue()) { detail::dpct_memcpy(src->get_handle(), src->get_desc(), w_offset_src, h_offset_src, dest, p, w, h, q) @@ -628,15 +633,15 @@ static inline void dpct_memcpy(const image_mem_wrapper *src, /// Asynchronously copies from the image memory to memory specified by a /// pointer, The return of the function does NOT guarantee the copy is /// completed. +/// \param [in] dest The destination memory address. /// \param [in] src The source image memory. /// \param [in] w_offset_src The x offset of source image memory. /// \param [in] h_offset_src The y offset of source image memory. -/// \param [in] dest The destination memory address. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(const image_mem_wrapper *src, +static inline void async_dpct_memcpy(void *dest, const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, - void *dest, size_t s, + size_t s, sycl::queue q = get_default_queue()) { detail::dpct_memcpy(src->get_handle(), src->get_desc(), w_offset_src, h_offset_src, dest, s, q); @@ -644,16 +649,15 @@ static inline void async_dpct_memcpy(const image_mem_wrapper *src, /// Synchronously copies from the image memory to memory specified by a /// pointer, The function will return after the copy is completed. +/// \param [in] dest The destination memory address. /// \param [in] src The source image memory. /// \param [in] w_offset_src The x offset of source image memory. /// \param [in] h_offset_src The y offset of source image memory. -/// \param [in] dest The destination memory address. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(const image_mem_wrapper *src, +static inline void dpct_memcpy(void *dest, const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, - void *dest, size_t s, - sycl::queue q = get_default_queue()) { + size_t s, sycl::queue q = get_default_queue()) { sycl::event::wait(detail::dpct_memcpy(src->get_handle(), src->get_desc(), w_offset_src, h_offset_src, dest, s, q)); @@ -661,17 +665,18 @@ static inline void dpct_memcpy(const image_mem_wrapper *src, /// Asynchronously copies from memory specified by a pointer to the image /// memory, The return of the function does NOT guarantee the copy is completed. -/// \param [in] src The source memory address. /// \param [in] dest The destination image memory. /// \param [in] w_offset_dest The x offset of destination image memory. /// \param [in] h_offset_dest The y offset of destination image memory. +/// \param [in] src The source memory address. /// \param [in] p The pitch of source memory. /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(const void *src, image_mem_wrapper *dest, +static inline void async_dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest, size_t h_offset_dest, - size_t p, size_t w, size_t h, + const void *src, size_t p, size_t w, + size_t h, sycl::queue q = get_default_queue()) { detail::dpct_memcpy(src, dest->get_handle(), dest->get_desc(), w_offset_dest, h_offset_dest, p, w, h, q); @@ -679,17 +684,17 @@ static inline void async_dpct_memcpy(const void *src, image_mem_wrapper *dest, /// Synchronously copies from memory specified by a pointer to the image /// memory, The function will return after the copy is completed. -/// \param [in] src The source memory address. /// \param [in] dest The destination image memory. /// \param [in] w_offset_dest The x offset of destination image memory. /// \param [in] h_offset_dest The y offset of destination image memory. +/// \param [in] src The source memory address. /// \param [in] p The pitch of source memory. /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(const void *src, image_mem_wrapper *dest, - size_t w_offset_dest, size_t h_offset_dest, - size_t p, size_t w, size_t h, +static inline void dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest, + size_t h_offset_dest, const void *src, size_t p, + size_t w, size_t h, sycl::queue q = get_default_queue()) { detail::dpct_memcpy(src, dest->get_handle(), dest->get_desc(), w_offset_dest, h_offset_dest, p, w, h, q) @@ -698,15 +703,15 @@ static inline void dpct_memcpy(const void *src, image_mem_wrapper *dest, /// Asynchronously copies from memory specified by a pointer to the image /// memory, The return of the function does NOT guarantee the copy is completed. -/// \param [in] src The source memory address. /// \param [in] dest The destination image memory. /// \param [in] w_offset_dest The x offset of destination image memory. /// \param [in] h_offset_dest The y offset of destination image memory. +/// \param [in] src The source memory address. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void async_dpct_memcpy(const void *src, image_mem_wrapper *dest, +static inline void async_dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest, size_t h_offset_dest, - size_t s, + const void *src, size_t s, sycl::queue q = get_default_queue()) { detail::dpct_memcpy(src, dest->get_handle(), dest->get_desc(), w_offset_dest, h_offset_dest, s, q); @@ -714,15 +719,15 @@ static inline void async_dpct_memcpy(const void *src, image_mem_wrapper *dest, /// Synchronously copies from memory specified by a pointer to the image /// memory, The function will return after the copy is completed. -/// \param [in] src The source memory address. /// \param [in] dest The destination image memory. /// \param [in] w_offset_dest The x offset of destination image memory. /// \param [in] h_offset_dest The y offset of destination image memory. +/// \param [in] src The source memory address. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(const void *src, image_mem_wrapper *dest, - size_t w_offset_dest, size_t h_offset_dest, - size_t s, sycl::queue q = get_default_queue()) { +static inline void dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest, + size_t h_offset_dest, const void *src, size_t s, + sycl::queue q = get_default_queue()) { sycl::event::wait(detail::dpct_memcpy(src, dest->get_handle(), dest->get_desc(), w_offset_dest, h_offset_dest, s, q)); @@ -730,46 +735,47 @@ static inline void dpct_memcpy(const void *src, image_mem_wrapper *dest, /// Synchronously copies from image memory to the image memory, The function /// will return after the copy is completed. -/// \param [in] src The source image memory. -/// \param [in] w_offset_src The x offset of source image memory. -/// \param [in] h_offset_src The y offset of source image memory. /// \param [in] dest The destination image memory. /// \param [in] w_offset_dest The x offset of destination image memory. /// \param [in] h_offset_dest The y offset of destination image memory. +/// \param [in] src The source image memory. +/// \param [in] w_offset_src The x offset of source image memory. +/// \param [in] h_offset_src The y offset of source image memory. /// \param [in] w The width of matrix to be copied. /// \param [in] h The height of matrix to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(const image_mem_wrapper *src, +static inline void dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest, + size_t h_offset_dest, + const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, - image_mem_wrapper *dest, size_t w_offset_dest, - size_t h_offset_dest, size_t w, size_t h, + size_t w, size_t h, sycl::queue q = get_default_queue()) { auto temp = (void *)sycl::malloc_device(w * h, q); // TODO: Need change logic when sycl support image_mem to image_mem copy. - dpct_memcpy(src, w_offset_src, h_offset_src, temp, w, w, h, q); - dpct_memcpy(temp, dest, w_offset_dest, h_offset_dest, w, w, h, q); + dpct_memcpy(temp, w, src, w_offset_src, h_offset_src, w, h, q); + dpct_memcpy(dest, w_offset_dest, h_offset_dest, temp, w, w, h, q); sycl::free(temp, q); } /// Synchronously copies from image memory to the image memory, The function /// will return after the copy is completed. -/// \param [in] src The source image memory. -/// \param [in] w_offset_src The x offset of source image memory. -/// \param [in] h_offset_src The y offset of source image memory. /// \param [in] dest The destination image memory. /// \param [in] w_offset_dest The x offset of destination image memory. /// \param [in] h_offset_dest The y offset of destination image memory. +/// \param [in] src The source image memory. +/// \param [in] w_offset_src The x offset of source image memory. +/// \param [in] h_offset_src The y offset of source image memory. /// \param [in] s The size to be copied. /// \param [in] q The queue to execute the copy task. -static inline void dpct_memcpy(const image_mem_wrapper *src, +static inline void dpct_memcpy(image_mem_wrapper *dest, size_t w_offset_dest, + size_t h_offset_dest, + const image_mem_wrapper *src, size_t w_offset_src, size_t h_offset_src, - image_mem_wrapper *dest, size_t w_offset_dest, - size_t h_offset_dest, size_t s, - sycl::queue q = get_default_queue()) { + size_t s, sycl::queue q = get_default_queue()) { auto temp = (void *)sycl::malloc_device(s, q); // TODO: Need change logic when sycl support image_mem to image_mem copy. - dpct_memcpy(src, w_offset_src, h_offset_src, temp, s, q); - dpct_memcpy(temp, dest, w_offset_dest, h_offset_dest, s, q); + dpct_memcpy(temp, src, w_offset_src, h_offset_src, s, q); + dpct_memcpy(dest, w_offset_dest, h_offset_dest, temp, s, q); sycl::free(temp, q); } diff --git a/clang/test/dpct/texture/texture_object_bindless_image.cu b/clang/test/dpct/texture/texture_object_bindless_image.cu index 96cea96bc831..a3ba2b27d89c 100644 --- a/clang/test/dpct/texture/texture_object_bindless_image.cu +++ b/clang/test/dpct/texture/texture_object_bindless_image.cu @@ -74,6 +74,7 @@ int main() { size_t w, h, sizeInBytes, w_offest_src, h_offest_src, w_offest_dest, h_offest_dest; unsigned int flag, l; cudaExtent e; + cudaStream_t s; // CHECK: dpct::experimental::image_mem_wrapper_ptr pArr; cudaArray_t pArr; // CHECK: const dpct::experimental::image_mem_wrapper *pArr_src; @@ -96,38 +97,62 @@ int main() { // CHECK-NEXT: e = pArr->get_range(); // CHECK-NEXT: flag = 0; cudaArrayGetInfo(&desc, &e, &flag, pArr); - // CHECK: dpct::experimental::dpct_memcpy(pArr_src, w_offest_src, h_offest_src, pArr, w_offest_dest, h_offest_dest, w, h, q_ct1); + // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_dest, h_offest_dest, pArr_src, + // CHECK-NEXT: w_offest_src, h_offest_src, w, h); cudaMemcpy2DArrayToArray(pArr, w_offest_dest, h_offest_dest, pArr_src, w_offest_src, h_offest_src, w, h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w, w, h, q_ct1); + // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_dest, h_offest_dest, pArr_src, + // CHECK-NEXT: w_offest_src, h_offest_src, w, h); + cudaMemcpy2DArrayToArray(pArr, w_offest_dest, h_offest_dest, pArr_src, + w_offest_src, h_offest_src, w, h); + // CHECK: dpct::experimental::dpct_memcpy(output, w, pArr_src, w_offest_src, h_offest_src, w, h); cudaMemcpy2DFromArray(output, w, pArr_src, w_offest_src, h_offest_src, w, h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::async_dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w, w, h, q_ct1); - cudaMemcpy2DFromArrayAsync(output, w, pArr_src, w_offest_src, h_offest_src, w, h, - cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::dpct_memcpy(input, pArr, w_offest_dest, h_offest_dest, w, w, h, q_ct1); + // CHECK: dpct::experimental::async_dpct_memcpy(output, w, pArr_src, w_offest_src, h_offest_src, w, + // CHECK-NEXT: h); + cudaMemcpy2DFromArrayAsync(output, w, pArr_src, w_offest_src, h_offest_src, w, + h, cudaMemcpyHostToDevice); + // CHECK: dpct::experimental::async_dpct_memcpy(output, w, pArr_src, w_offest_src, h_offest_src, w, + // CHECK-NEXT: h, *s); + cudaMemcpy2DFromArrayAsync(output, w, pArr_src, w_offest_src, h_offest_src, w, + h, cudaMemcpyHostToDevice, s); + // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_dest, h_offest_dest, input, w, w, h); cudaMemcpy2DToArray(pArr, w_offest_dest, h_offest_dest, input, w, w, h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::async_dpct_memcpy(input, pArr, w_offest_dest, h_offest_dest, w, w, h, q_ct1); + // CHECK: dpct::experimental::async_dpct_memcpy(pArr, w_offest_dest, h_offest_dest, input, w, w, h); cudaMemcpy2DToArrayAsync(pArr, w_offest_dest, h_offest_dest, input, w, w, h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::dpct_memcpy(pArr_src, w_offest_src, h_offest_src, pArr, w_offest_dest, h_offest_dest, w * h, q_ct1); + // CHECK: dpct::experimental::async_dpct_memcpy(pArr, w_offest_dest, h_offest_dest, input, w, w, h, *s); + cudaMemcpy2DToArrayAsync(pArr, w_offest_dest, h_offest_dest, input, w, w, h, + cudaMemcpyHostToDevice, s); + // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_dest, h_offest_dest, pArr_src, + // w_offest_src, h_offest_src, w * h); cudaMemcpyArrayToArray(pArr, w_offest_dest, h_offest_dest, pArr_src, w_offest_src, h_offest_src, w * h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w * h, q_ct1); + // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_dest, h_offest_dest, pArr_src, + // w_offest_src, h_offest_src, w * h); + cudaMemcpyArrayToArray(pArr, w_offest_dest, h_offest_dest, pArr_src, + w_offest_src, h_offest_src, w * h); + // CHECK: dpct::experimental::dpct_memcpy(output, pArr_src, w_offest_src, h_offest_src, w * h); cudaMemcpyFromArray(output, pArr_src, w_offest_src, h_offest_src, w * h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::async_dpct_memcpy(pArr_src, w_offest_src, h_offest_src, output, w * h, q_ct1); + // CHECK: dpct::experimental::async_dpct_memcpy(output, pArr_src, w_offest_src, h_offest_src, w * h); cudaMemcpyFromArrayAsync(output, pArr_src, w_offest_src, h_offest_src, w * h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::dpct_memcpy(input, pArr, w_offest_dest, h_offest_dest, w * h, q_ct1); + // CHECK: dpct::experimental::async_dpct_memcpy(output, pArr_src, w_offest_src, h_offest_src, w * h, *s); + cudaMemcpyFromArrayAsync(output, pArr_src, w_offest_src, h_offest_src, w * h, + cudaMemcpyHostToDevice, s); + // CHECK: dpct::experimental::dpct_memcpy(pArr, w_offest_dest, h_offest_dest, input, w * h); cudaMemcpyToArray(pArr, w_offest_dest, h_offest_dest, input, w * h, cudaMemcpyHostToDevice); - // CHECK: dpct::experimental::async_dpct_memcpy(input, pArr, w_offest_dest, h_offest_dest, w * h, q_ct1); + // CHECK: dpct::experimental::async_dpct_memcpy(pArr, w_offest_dest, h_offest_dest, input, w * h); cudaMemcpyToArrayAsync(pArr, w_offest_dest, h_offest_dest, input, w * h, cudaMemcpyHostToDevice); + // CHECK: dpct::experimental::async_dpct_memcpy(pArr, w_offest_dest, h_offest_dest, input, w * h, *s); + cudaMemcpyToArrayAsync(pArr, w_offest_dest, h_offest_dest, input, w * h, + cudaMemcpyHostToDevice, s); // CHECK: dpct::memcpy_parameter p3d; cudaMemcpy3DParms p3d; @@ -137,8 +162,8 @@ int main() { cudaPitchedPtr pp; // CHECK: dpct::memcpy_direction k; cudaMemcpyKind k; - // CHECK: p3d.from.image_bindless = pArr_src; - p3d.srcArray = pArr_src; + // CHECK: p3d.from.image_bindless = pArr; + p3d.srcArray = pArr; // CHECK: pArr_src = p3d.from.image_bindless; pArr_src = p3d.srcArray; // CHECK: p3d.from.pos = pos; @@ -153,8 +178,8 @@ int main() { pos = p3d.srcPos; // CHECK: p3d.from.pitched = pp; p3d.srcPtr = pp; - // CHECK: p3d.from.pitched.set_data_ptr(input); - p3d.srcPtr.ptr = input; + // CHECK: p3d.from.pitched.set_data_ptr(output); + p3d.srcPtr.ptr = output; // CHECK: p3d.from.pitched.set_pitch(1); p3d.srcPtr.pitch = 1; // CHECK: p3d.from.pitched.set_x(2); @@ -179,8 +204,8 @@ int main() { pos = p3d.dstPos; // CHECK: p3d.to.pitched = pp; p3d.dstPtr = pp; - // CHECK: p3d.to.pitched.set_data_ptr(input); - p3d.dstPtr.ptr = input; + // CHECK: p3d.to.pitched.set_data_ptr(output); + p3d.dstPtr.ptr = output; // CHECK: p3d.to.pitched.set_pitch(1); p3d.dstPtr.pitch = 1; // CHECK: p3d.to.pitched.set_x(2);