Skip to content

Commit

Permalink
Add XeGPU test cases covering various combinations of Ops supported b…
Browse files Browse the repository at this point in the history
…y XeGPU dialect

Co-Authored-By: Chang, Liangliang <[email protected]>
  • Loading branch information
2 people authored and silee2 committed Nov 21, 2023
1 parent bee8c01 commit 7ac6b72
Show file tree
Hide file tree
Showing 20 changed files with 649 additions and 201 deletions.
4 changes: 4 additions & 0 deletions test/Conversion/XeGPUToSPIRV/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
local_excludes = [
'gemm_basic.mlir'
]
config.excludes.update(local_excludes)
70 changes: 70 additions & 0 deletions test/Conversion/XeGPUToSPIRV/xegpu-to-vc.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// RUN: imex-opt -imex-convert-gpu-to-spirv %s | FileCheck %s

gpu.module @test attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Addresses, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR, SubgroupDispatch, VectorComputeINTEL, VectorAnyINTEL], [SPV_EXT_shader_atomic_float_add, SPV_KHR_expect_assume, SPV_INTEL_vector_compute]>, api=OpenCL, #spirv.resource_limits<>>} {
// CHECK: spirv.ConvertPtrToU
// CHECK: spirv.VectorInsertDynamic
gpu.func @create_nd_tdesc(%src: memref<64x64xf16>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
%c32 = arith.constant 16 : index
%0 = xegpu.create_nd_tdesc %src[%c32, 0] {mode = vc} : memref<64x64xf16> -> !xegpu.tensor_desc<8x16xf16>
gpu.return
}


// CHECK-LABEL: spirv.func @llvm_genx_raw_send2_v128i32_i1_v8i32
// CHECK (i8, i8, i1, i8, i8, i8, i32, i32, vector<8xi32>, vector<128xi32>)
// CHECK: -> vector<128xi32> "None" attributes
// CHECK: {VectorComputeFunctionINTEL, linkage_attributes = #spirv.linkage_attributes<linkage_name
// CHECK: = "llvm.genx.raw.send2.v128i32.i1.v8i32", linkage_type = <Import>>}
// CHECK-LABEL: spirv.func @load_nd
// CHECK: %[[ptr:.*]]: !spirv.ptr<!spirv.array<4096 x f16>, CrossWorkgroup>
// CHECK: %[[ptr_i64:.*]] = spirv.ConvertPtrToU %[[ptr]] : !spirv.ptr<!spirv.array<4096 x f16>, CrossWorkgroup> to i64
// CHECK: %{{.*}} = spirv.FunctionCall @llvm_genx_raw_send2_v128i32_i1_v8i32

gpu.func @load_nd(%src : memref<64x64xf16>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
%1 = xegpu.create_nd_tdesc %src[0, 0] { mode = vc} : memref<64x64xf16> -> !xegpu.tensor_desc<16x16xf16>
%3 = xegpu.load_nd %1 {vnni_axis = 0, mode = vc} : !xegpu.tensor_desc<16x16xf16> -> vector<8x16x2xf16>
gpu.return
}

// CHECK-LABEL: spirv.func @llvm_genx_dpas_nosrc0_v128f32_v128i32_v64i32(vector<128xi32>, vector<64xi32>, i32)
// CHECK: -> vector<128xf32> "None" attributes {VectorComputeFunctionINTEL, linkage_attributes =
// CHECK: #spirv.linkage_attributes<linkage_name = "llvm.genx.dpas.nosrc0.v128f32.v128i32.v64i32", linkage_type = <Import>>}
// CHECK-LABEL: spirv.func @dpas
// CHECK: (%[[A:.*]]: vector<64xi32>, %[[B:.*]]: vector<128xi32>)
// CHECK-NEXT: %[[cst134744586_i32:.*]] = spirv.Constant 134744586 : i32
// CHECK-NEXT: %{{.*}} = spirv.FunctionCall @llvm_genx_dpas_nosrc0_v128f32_v128i32_v64i32(%[[B]], %[[A]], %[[cst134744586_i32]])
// CHECK: (vector<128xi32>, vector<64xi32>, i32) -> vector<128xf32>
gpu.func @dpas(%A : vector<8x8x2xf16>, %B : vector<8x16x2xf16>)
kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
%C = xegpu.dpas %A, %B { mode = vc }: vector<8x8x2xf16>, vector<8x16x2xf16> -> vector<8x16xf32>
gpu.return
}


// CHECK: (i8, i8, i1, i8, i8, i8, i32, i32, vector<8xi32>, vector<128xf32>)
// CHECK: "None" attributes {VectorComputeFunctionINTEL, linkage_attributes = #spirv.linkage_attributes<linkage_name
// CHECK: = "llvm.genx.raw.sends2.noresult.i1.v8i32.v128f32", linkage_type = <Import>>}
// CHECK: (%[[value:.*]]: vector<128xf32>, %[[ptr:.*]]: !spirv.ptr<!spirv.array<4096 x f32>, CrossWorkgroup>)
// CHECK: %[[ptr_i64]] = spirv.ConvertPtrToU %[[ptr]] : !spirv.ptr<!spirv.array<4096 x f32>, CrossWorkgroup> to i64
// CHECK: spirv.FunctionCall @llvm_genx_raw_sends2_noresult_i1_v8i32_v128f32
gpu.func @store_nd(%value : vector<8x16xf32>, %dest : memref<64x64xf32>)
kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
%1 = xegpu.create_nd_tdesc %dest[0, 0] { mode = vc } : memref<64x64xf32> -> !xegpu.tensor_desc<8x16xf32>
xegpu.store_nd %value, %1 { mode = vc } : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32>
gpu.return
}
// CHECK: (i8, i8, i1, i8, i8, i32, i32, vector<8xi32>)
// CHECK: "None" attributes {VectorComputeFunctionINTEL, linkage_attributes = #spirv.linkage_attributes<linkage_name =
// CHECK: "llvm.genx.raw.send2.noresult.i1.v8i32", linkage_type = <Import>>}
// CHECK: (%[[ptr:.*]]: !spirv.ptr<!spirv.array<4096 x f16>, CrossWorkgroup>)
// CHECK: spirv.ConvertPtrToU %[[ptr]] : !spirv.ptr<!spirv.array<4096 x f16>, CrossWorkgroup> to i64
// CHECK: spirv.VectorInsertDynamic
// CHECK: spirv.FunctionCall @llvm_genx_raw_send2_noresult_i1_v8i32
gpu.func @prefetch(%src : memref<64x64xf16>)
kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} {
%0 = xegpu.create_nd_tdesc %src[0, 0] { mode = vc } : memref<64x64xf16> -> !xegpu.tensor_desc<8x16xf16>
xegpu.prefetch_nd %0 { mode = vc } : !xegpu.tensor_desc<8x16xf16>
gpu.return
}

}
19 changes: 10 additions & 9 deletions test/Dialect/XeGPU/IR/atomic_rmw.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,36 @@
// Verify the generic form can be parsed.
// RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s

#sg_map_fp32 = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>
// CHECK-LABEL: func @test_atomic_rmw({{.*}}) {
func.func @test_atomic_rmw(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x1xf32>, %mask : vector<16xi1>) {
%1 = xegpu.create_tdesc %src, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered>
%1 = xegpu.create_tdesc %src, %offsets: ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #sg_map_fp32>

// CHECK: xegpu.atomic_rmw
// CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16x1xf32>
xegpu.atomic_rmw "addf" %1, %mask, %value {mode = vc} : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16x1xf32> -> vector<16x1xf32>
// CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>>, vector<16xi1>, vector<16x1xf32>
xegpu.atomic_rmw "addf" %1, %mask, %value: !xegpu.tensor_desc<16xf32, #sg_map_fp32>, vector<16xi1>, vector<16x1xf32> -> vector<16x1xf32>

return
}

// CHECK-LABEL: func @test_atomic_rmw_0({{.*}}) {
func.func @test_atomic_rmw_0(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x2xf32>, %mask : vector<16xi1>) {
%1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered>
%1 = xegpu.create_tdesc %src, %offsets {chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #sg_map_fp32>

// CHECK: xegpu.atomic_rmw
// CHECK-SAME: !xegpu.tensor_desc<16x2xf32, #xegpu.scattered>, vector<16xi1>, vector<16x2xf32>
xegpu.atomic_rmw "mulf" %1, %mask, %value {mode = vc} : !xegpu.tensor_desc<16x2xf32, #xegpu.scattered>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32>
// CHECK-SAME: tensor_desc<16x2xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32>
xegpu.atomic_rmw "mulf" %1, %mask, %value : !xegpu.tensor_desc<16x2xf32, #sg_map_fp32>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32>

return
}

// CHECK-LABEL: func @test_atomic_rmw_1({{.*}}) {
func.func @test_atomic_rmw_1(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x2xi32>, %mask : vector<16xi1>) {
%1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>
%1 = xegpu.create_tdesc %src, %offsets {chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xi32, #sg_map_fp32>

// CHECK: xegpu.atomic_rmw
// CHECK-SAME: !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>, vector<16xi1>, vector<16x2xi32>
xegpu.atomic_rmw "andi" %1, %mask, %value {mode = vc} : !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32>
// CHECK-SAME: !xegpu.tensor_desc<16x2xi32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32>
xegpu.atomic_rmw "andi" %1, %mask, %value: !xegpu.tensor_desc<16x2xi32, #sg_map_fp32>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32>

return
}
38 changes: 38 additions & 0 deletions test/Dialect/XeGPU/IR/atomic_rmw_vc.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// RUN: imex-opt %s | FileCheck %s
// Verify the printed output can be parsed.
// RUN: imex-opt %s | imex-opt | FileCheck %s
// Verify the generic form can be parsed.
// RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s

// CHECK-LABEL: func @test_atomic_rmw({{.*}}) {
func.func @test_atomic_rmw(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x1xf32>, %mask : vector<16xi1>) {
%1 = xegpu.create_tdesc %src, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered>

// CHECK: xegpu.atomic_rmw
// CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16x1xf32>
xegpu.atomic_rmw "addf" %1, %mask, %value {mode = vc} : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16x1xf32> -> vector<16x1xf32>

return
}

// CHECK-LABEL: func @test_atomic_rmw_0({{.*}}) {
func.func @test_atomic_rmw_0(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x2xf32>, %mask : vector<16xi1>) {
%1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered>

// CHECK: xegpu.atomic_rmw
// CHECK-SAME: !xegpu.tensor_desc<16x2xf32, #xegpu.scattered>, vector<16xi1>, vector<16x2xf32>
xegpu.atomic_rmw "mulf" %1, %mask, %value {mode = vc} : !xegpu.tensor_desc<16x2xf32, #xegpu.scattered>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32>

return
}

// CHECK-LABEL: func @test_atomic_rmw_1({{.*}}) {
func.func @test_atomic_rmw_1(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x2xi32>, %mask : vector<16xi1>) {
%1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>

// CHECK: xegpu.atomic_rmw
// CHECK-SAME: !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>, vector<16xi1>, vector<16x2xi32>
xegpu.atomic_rmw "andi" %1, %mask, %value {mode = vc} : !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32>

return
}
Loading

0 comments on commit 7ac6b72

Please sign in to comment.