From daa697eaf60f697045292b515d4cbc5d7b7e61d0 Mon Sep 17 00:00:00 2001 From: Chao Chen Date: Mon, 7 Oct 2024 16:22:47 +0000 Subject: [PATCH] Add lowering pattern and test for arith::AndIOp --- .../XeTileToXeGPU/XeTileOpConversion.cpp | 1 + .../XeTileToXeGPU/XeTileToXeGPU.cpp | 2 ++ .../XeTileToXeGPU/elementwise_ops.mlir | 36 +++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp b/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp index 7087899f6..bd9201d10 100644 --- a/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp +++ b/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp @@ -1164,6 +1164,7 @@ void populateXeTileOpConversionPatterns(imex::XeOneToNTypeConverter &converter, ElementWiseOpPattern, ElementWiseOpPattern, ElementWiseOpPattern, + ElementWiseOpPattern, ElementWiseOpPattern, ElementWiseOpPattern, ElementWiseOpPattern, diff --git a/lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp b/lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp index 03db34de9..31c09ebbc 100644 --- a/lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp +++ b/lib/Conversion/XeTileToXeGPU/XeTileToXeGPU.cpp @@ -92,6 +92,8 @@ class XeTileConversionTarget : public mlir::ConversionTarget { // Arith ops addDynamicallyLegalOp( [&](mlir::Operation *op) -> bool { return isLegalElementWiseOp(op); }); + addDynamicallyLegalOp( + [&](mlir::Operation *op) -> bool { return isLegalElementWiseOp(op); }); addDynamicallyLegalOp( [&](mlir::Operation *op) -> bool { return isLegalElementWiseOp(op); }); addDynamicallyLegalOp( diff --git a/test/Conversion/XeTileToXeGPU/elementwise_ops.mlir b/test/Conversion/XeTileToXeGPU/elementwise_ops.mlir index 8e5ed4621..18898ab83 100644 --- a/test/Conversion/XeTileToXeGPU/elementwise_ops.mlir +++ b/test/Conversion/XeTileToXeGPU/elementwise_ops.mlir @@ -221,4 +221,40 @@ xetile.store_tile %7, %6 : vector<4x2x8x16xi16>, !xetile.tile<32x32xi16, #xetile.tile_attr> gpu.return } + + + gpu.func @sglevel_and_test(%arg0: memref<1x4096xi8>, %arg1: memref<1x4096xi8>, %arg2: memref<1x4096xi8>) { + %c0 = arith.constant 0 : index + %c4096 = arith.constant 4096 : index + %c32 = arith.constant 32 : index + %c1024_i32 = arith.constant 1024 : i32 + %thread_id_x = gpu.thread_id x + %thread_id_y = gpu.thread_id y + %block_dim_y = gpu.block_dim y + %0 = arith.muli %thread_id_x, %block_dim_y : index + %1 = arith.addi %0, %thread_id_y : index + %block_id_x = gpu.block_id x + %2 = arith.index_cast %block_id_x : index to i32 + %3 = arith.muli %2, %c1024_i32 : i32 + %4 = arith.index_cast %3 : i32 to index + %5 = arith.remsi %1, %c32 : index + %6 = arith.muli %5, %c32 : index + %7 = arith.remsi %6, %c4096 : index + %8 = arith.addi %7, %4 : index + %9 = xetile.init_tile %arg0[%c0, %8] : memref<1x4096xi8> -> !xetile.tile<1x32xi8, #xetile.tile_attr> + %10 = xetile.load_tile %9 {padding = 0 : i32} : !xetile.tile<1x32xi8, #xetile.tile_attr> -> vector<1x1x1x32xi8> + %11 = xetile.tile_unpack %10 {inner_blocks = array} : vector<1x1x1x32xi8> -> vector<1x32xi8> + %12 = xetile.init_tile %arg1[%c0, %8] : memref<1x4096xi8> -> !xetile.tile<1x32xi8, #xetile.tile_attr> + %13 = xetile.load_tile %12 {padding = 0 : i32} : !xetile.tile<1x32xi8, #xetile.tile_attr> -> vector<1x1x1x32xi8> + %14 = xetile.tile_unpack %13 {inner_blocks = array} : vector<1x1x1x32xi8> -> vector<1x32xi8> + %15 = xetile.tile_pack %11 {inner_blocks = array} : vector<1x32xi8> -> vector<1x1x1x32xi8> + %16 = xetile.tile_pack %14 {inner_blocks = array} : vector<1x32xi8> -> vector<1x1x1x32xi8> + //CHECK: %{{.*}} = arith.andi %{{.*}}, %{{.*}} : vector<1x32xi8> + %17 = arith.andi %15, %16 : vector<1x1x1x32xi8> + %18 = xetile.tile_unpack %17 {inner_blocks = array} : vector<1x1x1x32xi8> -> vector<1x32xi8> + %19 = xetile.init_tile %arg2[%c0, %8] : memref<1x4096xi8> -> !xetile.tile<1x32xi8, #xetile.tile_attr> + %20 = xetile.tile_pack %18 {inner_blocks = array} : vector<1x32xi8> -> vector<1x1x1x32xi8> + xetile.store_tile %20, %19 : vector<1x1x1x32xi8>, !xetile.tile<1x32xi8, #xetile.tile_attr> + gpu.return + } }