diff --git a/lib/Transforms/InsertGPUAllocs.cpp b/lib/Transforms/InsertGPUAllocs.cpp index 178a8ac72..1bad1a9c7 100644 --- a/lib/Transforms/InsertGPUAllocs.cpp +++ b/lib/Transforms/InsertGPUAllocs.cpp @@ -277,8 +277,20 @@ class InsertGPUAllocsPass final // address space. auto isGpuAddrSpace = [&](mlir::Value memref) { if (auto type = mlir::dyn_cast(memref.getType())) { - return mlir::isa_and_nonnull( - type.getMemorySpace()); + auto memSpace = type.getMemorySpace(); + if (!memSpace) + return false; + + if (mlir::dyn_cast(memSpace)) + return true; + + // HACK: XeGPU dialect only understands integer memory spaces, meaning + // that we also have to check for them in order for XeGPU pipelines to + // work properly. MemSpace = 3 (gpu::Private) is used to describe SLM + // (shared local memory on GPU). + if (auto intAttr = mlir::dyn_cast(memSpace)) + return intAttr.getValue() == + static_cast(mlir::gpu::AddressSpace::Private); } return false; }; @@ -410,6 +422,8 @@ class InsertGPUAllocsPass final if (m_clientAPI == "opencl") { for (const auto &it : gpuBufferAllocs) { auto alloc = mlir::cast(it.first); + if (isGpuAddrSpace(alloc)) + continue; auto access = getAccessType(alloc); auto loc = alloc.getLoc(); builder.setInsertionPoint(alloc);