[XeTile][Canonicalization] Bug fix in VectorBroadcast Canonicalizatio…

…n. (#956)
intel · Nov 7, 2024 · 0330284 · 0330284
1 parent 42fdea5
commit 0330284
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 3 deletions.
diff --git a/lib/Dialect/XeTile/Transforms/Canonicalization.cpp b/lib/Dialect/XeTile/Transforms/Canonicalization.cpp
@@ -272,10 +272,11 @@ struct VectorBroadcastToXetileBroadcastOpPattern
       newOp->setDiscardableAttrs(discardableAttrs);
       return mlir::success();
     }
-    // If ranks are same, inner dimension is stretched in vector.broadcast. So
-    // broadcast dimension is 1 for this case.
+    // If ranks are same, decide the broadcast dimension based on the source
+    // vector shape.
+    auto broadcastDim = (sourceShape[0] == 1) ? 0 : 1;
     auto newOp = rewriter.replaceOpWithNewOp<imex::xetile::BroadcastOp>(
-        op, resultTy, op.getSource(), llvm::ArrayRef<int64_t>({1}));
+        op, resultTy, op.getSource(), llvm::ArrayRef<int64_t>({broadcastDim}));
     newOp->setDiscardableAttrs(discardableAttrs);
     return mlir::success();
   }

diff --git a/test/Dialect/XeTile/Transforms/canonicalization.mlir b/test/Dialect/XeTile/Transforms/canonicalization.mlir
@@ -268,6 +268,19 @@ gpu.module @test_module {
 // CHECK: %[[T1:.*]] = xetile.broadcast %[[T0]] [0] : vector<1x16xf32> -> vector<8x16xf32>
 // CHECK: gpu.return %[[T1]] : vector<8x16xf32>
 
+// -----
+gpu.module @test_module {
+  gpu.func @test_broadcast_3(%arg0 : vector<1x16xf32>) -> vector<8x16xf32> {
+    %0 = vector.broadcast %arg0 : vector<1x16xf32> to vector<8x16xf32>
+    gpu.return %0 : vector<8x16xf32>
+  }
+}
+
+// CHECK-LABEL: @test_broadcast_3
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: vector<1x16xf32>) -> vector<8x16xf32>
+// CHECK: %[[T0:.*]] = xetile.broadcast %[[ARG0]] [0] : vector<1x16xf32> -> vector<8x16xf32>
+// CHECK: gpu.return %[[T0]] : vector<8x16xf32>
+
 // -----
 gpu.module @test_module {
   gpu.func @test_multireduction_1(%arg0 : vector<64x256xf32>, %arg1 : vector<256xf32>) -> vector<256xf32> {