diff --git a/include/imex/Dialect/NDArray/IR/NDArrayOps.h b/include/imex/Dialect/NDArray/IR/NDArrayOps.h index f4c578059..a9c58d714 100644 --- a/include/imex/Dialect/NDArray/IR/NDArrayOps.h +++ b/include/imex/Dialect/NDArray/IR/NDArrayOps.h @@ -84,7 +84,7 @@ namespace ndarray { /// @return true if given NDArrayTYpe has this specific environment attribute template bool hasEnv(const ::mlir::RankedTensorType &t) { auto encoding = t.getEncoding(); - if (auto envs = ::mlir::dyn_cast(encoding)) { + if (auto envs = ::mlir::dyn_cast(encoding)) { for (auto a : envs.getEnvs()) { if (::mlir::isa(a)) { return true; @@ -103,7 +103,7 @@ inline bool hasGPUEnv(const ::mlir::Type &t) { inline ::imex::region::GPUEnvAttr getGPUEnv(const ::mlir::Type &t) { if (auto tt = ::mlir::dyn_cast<::mlir::RankedTensorType>(t)) { auto encoding = tt.getEncoding(); - if (auto envs = ::mlir::dyn_cast(encoding)) { + if (auto envs = ::mlir::dyn_cast(encoding)) { for (auto a : envs.getEnvs()) { if (auto g = ::mlir::dyn_cast<::imex::region::GPUEnvAttr>(a)) { return g; diff --git a/include/imex/Dialect/NDArray/IR/NDArrayOps.td b/include/imex/Dialect/NDArray/IR/NDArrayOps.td index 208dc70ae..91959f359 100644 --- a/include/imex/Dialect/NDArray/IR/NDArrayOps.td +++ b/include/imex/Dialect/NDArray/IR/NDArrayOps.td @@ -59,8 +59,8 @@ def NDArray_Dialect : Dialect { let useDefaultAttributePrinterParser = true; } -def NDArray_EnvironmentAttr : AttrDef { - let mnemonic = "environment"; +def NDArray_EnvsAttr : AttrDef { + let mnemonic = "envs"; let parameters = (ins ArrayRefParameter<"::mlir::Attribute">:$envs); let assemblyFormat = "`<` $envs `>`"; } diff --git a/test/Dialect/NDArray/Transforms/AddGPURegions.mlir b/test/Dialect/NDArray/Transforms/AddGPURegions.mlir index 6a00cd4ce..2c29f26c6 100644 --- a/test/Dialect/NDArray/Transforms/AddGPURegions.mlir +++ b/test/Dialect/NDArray/Transforms/AddGPURegions.mlir @@ -1,5 +1,7 @@ // RUN: imex-opt --split-input-file --add-gpu-regions %s -verify-diagnostics -o -| FileCheck %s +#GPUENV = #ndarray.envs<#region.gpu_env> + func.func @test_region(%arg0: i64, %arg1: i64, %arg2: i64) -> i64 { %c0 = arith.constant 0 : index %c3 = arith.constant 3 : index @@ -7,38 +9,38 @@ func.func @test_region(%arg0: i64, %arg1: i64, %arg2: i64) -> i64 { %c22 = arith.constant 22 : index %v = arith.constant 55 : i64 %s = arith.index_cast %arg0 : i64 to index - %0 = ndarray.linspace %arg0 %arg1 %c33 false {device = "XeGPU", team = 1 : i64} : (i64, i64, i64) -> tensor<33xi64, #region.gpu_env> - %1 = ndarray.create %c22 value %v {dtype = 2 : i8, device = "XeGPU", team = 1 : i64} : (index, i64) -> tensor> - %10 = ndarray.subview %0[%c0][22][%c3] : tensor<33xi64, #region.gpu_env> to tensor> - %20 = ndarray.ewbin %10, %1 {op = 0 : i32} : (tensor>, tensor>) -> tensor> - %21 = ndarray.reduction %20 {op = 4 : i32} : tensor> -> tensor> - %30 = builtin.unrealized_conversion_cast %21 : tensor> to i64 - ndarray.delete %0 : tensor<33xi64, #region.gpu_env> - ndarray.delete %1 : tensor> + %0 = ndarray.linspace %arg0 %arg1 %c33 false {device = "XeGPU", team = 1 : i64} : (i64, i64, i64) -> tensor<33xi64, #GPUENV> + %1 = ndarray.create %c22 value %v {dtype = 2 : i8, device = "XeGPU", team = 1 : i64} : (index, i64) -> tensor + %10 = ndarray.subview %0[%c0][22][%c3] : tensor<33xi64, #GPUENV> to tensor + %20 = ndarray.ewbin %10, %1 {op = 0 : i32} : (tensor, tensor) -> tensor + %21 = ndarray.reduction %20 {op = 4 : i32} : tensor -> tensor + %30 = builtin.unrealized_conversion_cast %21 : tensor to i64 + ndarray.delete %0 : tensor<33xi64, #GPUENV> + ndarray.delete %1 : tensor return %30 : i64 } // CHECK-LABEL: func.func @test_region -// CHECK: [[V0:%.*]] = region.env_region #region.gpu_env -> tensor<33xi64, #region.gpu_env> { +// CHECK: [[V0:%.*]] = region.env_region #GPUENV -> tensor<33xi64, #GPUENV> { // CHECK-NEXT: ndarray.linspace // CHECK-NEXT: region.env_region_yield -// CHECK: [[V1:%.*]] = region.env_region #region.gpu_env -> tensor> { +// CHECK: [[V1:%.*]] = region.env_region #GPUENV -> tensor { // CHECK-NEXT: ndarray.create // CHECK-NEXT: region.env_region_yield -// CHECK: [[V2:%.*]] = region.env_region #region.gpu_env -> tensor> { +// CHECK: [[V2:%.*]] = region.env_region #GPUENV -> tensor { // CHECK-NEXT: ndarray.subview [[V0]] // CHECK-NEXT: region.env_region_yield -// CHECK: [[V3:%.*]] = region.env_region #region.gpu_env -> tensor> { +// CHECK: [[V3:%.*]] = region.env_region #GPUENV -> tensor { // CHECK-NEXT: ndarray.ewbin [[V2]], [[V1]] -// CHECK: [[V4:%.*]] = region.env_region #region.gpu_env -> tensor> { +// CHECK: [[V4:%.*]] = region.env_region #GPUENV -> tensor { // CHECK-NEXT: ndarray.reduction [[V3]] // CHECK-NEXT: region.env_region_yield // CHECK-NEXT: } // CHECK-NEXT: [[V5:%.*]] = builtin.unrealized_conversion_cast -// CHECK: region.env_region #region.gpu_env { -// CHECK-NEXT: ndarray.delete [[V0]] : tensor<33xi64, #region.gpu_env> +// CHECK: region.env_region #GPUENV { +// CHECK-NEXT: ndarray.delete [[V0]] : tensor<33xi64, #GPUENV> // CHECK-NEXT: } -// CHECK-NEXT: region.env_region #region.gpu_env { -// CHECK-NEXT: ndarray.delete [[V1]] : tensor> +// CHECK-NEXT: region.env_region #GPUENV { +// CHECK-NEXT: ndarray.delete [[V1]] : tensor // CHECK-NEXT: } // CHECK-NEXT: return [[V5]] @@ -48,34 +50,34 @@ func.func @test_copy() -> tensor<33xi64> { %c0 = arith.constant 0 : i64 %c3 = arith.constant 3 : i64 %c33 = arith.constant 33 : i64 - %0 = ndarray.linspace %c0 %c3 %c33 false {device = "XeGPU", team = 1 : i64} : (i64, i64, i64) -> tensor<33xi64, #region.gpu_env> - %1 = ndarray.copy %0 : tensor<33xi64, #region.gpu_env> -> tensor<33xi64> - %2 = ndarray.copy %1 : tensor<33xi64> -> tensor<33xi64, #region.gpu_env> - %3 = ndarray.copy %2 : tensor<33xi64, #region.gpu_env> -> tensor<33xi64, #region.gpu_env> - %4 = ndarray.copy %3 : tensor<33xi64, #region.gpu_env> -> tensor<33xi64> + %0 = ndarray.linspace %c0 %c3 %c33 false {device = "XeGPU", team = 1 : i64} : (i64, i64, i64) -> tensor<33xi64, #GPUENV> + %1 = ndarray.copy %0 : tensor<33xi64, #GPUENV> -> tensor<33xi64> + %2 = ndarray.copy %1 : tensor<33xi64> -> tensor<33xi64, #GPUENV> + %3 = ndarray.copy %2 : tensor<33xi64, #GPUENV> -> tensor<33xi64, #GPUENV> + %4 = ndarray.copy %3 : tensor<33xi64, #GPUENV> -> tensor<33xi64> %5 = ndarray.copy %4 : tensor<33xi64> -> tensor<33xi64> return %5 : tensor<33xi64> } // CHECK-LABEL: func.func @test_copy() -> tensor<33xi64> { -// CHECK: region.env_region #region.gpu_env -> tensor<33xi64, #region.gpu_env> { +// CHECK: region.env_region #GPUENV -> tensor<33xi64, #GPUENV> { // CHECK: ndarray.linspace -// CHECK-SAME: -> tensor<33xi64, #region.gpu_env> +// CHECK-SAME: -> tensor<33xi64, #GPUENV> // CHECK: region.env_region_yield -// CHECK-SAME: tensor<33xi64, #region.gpu_env> +// CHECK-SAME: tensor<33xi64, #GPUENV> // CHECK: ndarray.copy -// CHECK-SAME: tensor<33xi64, #region.gpu_env> -> tensor<33xi64> -// CHECK: region.env_region #region.gpu_env -> tensor<33xi64, #region.gpu_env> { +// CHECK-SAME: tensor<33xi64, #GPUENV> -> tensor<33xi64> +// CHECK: region.env_region #GPUENV -> tensor<33xi64, #GPUENV> { // CHECK: ndarray.copy -// CHECK-SAME: tensor<33xi64> -> tensor<33xi64, #region.gpu_env> +// CHECK-SAME: tensor<33xi64> -> tensor<33xi64, #GPUENV> // CHECK: region.env_region_yield -// CHECK-SAME: tensor<33xi64, #region.gpu_env> -// CHECK: region.env_region #region.gpu_env -> tensor<33xi64, #region.gpu_env> { +// CHECK-SAME: tensor<33xi64, #GPUENV> +// CHECK: region.env_region #GPUENV -> tensor<33xi64, #GPUENV> { // CHECK: ndarray.copy -// CHECK-SAME: tensor<33xi64, #region.gpu_env> -> tensor<33xi64, #region.gpu_env> +// CHECK-SAME: tensor<33xi64, #GPUENV> -> tensor<33xi64, #GPUENV> // CHECK: region.env_region_yield -// CHECK-SAME: tensor<33xi64, #region.gpu_env> +// CHECK-SAME: tensor<33xi64, #GPUENV> // CHECK: ndarray.copy -// CHECK-SAME: tensor<33xi64, #region.gpu_env> -> tensor<33xi64> +// CHECK-SAME: tensor<33xi64, #GPUENV> -> tensor<33xi64> // CHECK: ndarray.copy // CHECK-SAME: tensor<33xi64> -> tensor<33xi64> // CHECK: return