diff --git a/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td b/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td index 9d75719e2..4d1e04b1c 100644 --- a/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -21,13 +21,33 @@ def XeGPU_ScatteredAttr : XeGPUAttr<"Scattered", "scattered"> { } def XeGPU_SgMapAttr: XeGPUAttr<"SgMap", "sg_map"> { - let parameters = (ins - ArrayRefParameter<"unsigned">:$mmaBlockSize, + let parameters = (ins ArrayRefParameter<"unsigned">:$wiLayout, - ArrayRefParameter<"unsigned">:$wiData); - + ArrayRefParameter<"unsigned">:$wiData, + ArrayRefParameter<"unsigned">:$mmaBlockSize); + // In format of #xegpu.sg_map<{mma_block_size = [2, 4], wi_layout = [2, 4], wi_data = [2, 4]}> let assemblyFormat = "`<` custom($mmaBlockSize, $wiLayout, $wiData) `>`"; + + let extraClassDeclaration = [{ + bool hasMMABlockSizeAttr() { + return getMmaBlockSize().size() == 2; + } + }]; + + let builders = [ + AttrBuilder<(ins + "::llvm::ArrayRef":$wiLayout, + "::llvm::ArrayRef":$wiData, + CArg<"::llvm::ArrayRef", "{}">:$mmaBlockSize + ), [{ + assert(wiLayout.size() == 2 && wiData.size() == 2 && "wiLayout and wiData should be 2D arrays.\n"); + assert((mmaBlockSize.size() == 2 || mmaBlockSize.size() == 0) && "mmaBlockSize can be either empty or a 2D array.\n"); + return $_get($_ctxt, wiLayout, wiData, mmaBlockSize); + }]> + ]; + + let skipDefaultBuilders = 1; } def XeGPU_WgMapAttr: XeGPUAttr<"WgMap", "wg_map"> { @@ -35,6 +55,17 @@ def XeGPU_WgMapAttr: XeGPUAttr<"WgMap", "wg_map"> { ArrayRefParameter<"unsigned">:$sgLayout, ArrayRefParameter<"unsigned">:$sgData); + let builders = [ + AttrBuilder<(ins + "::llvm::ArrayRef":$sgLayout, + "::llvm::ArrayRef":$sgData + ), [{ + assert(sgLayout.size() == 2 && sgData.size() == 2 && "sgLayout and sgData should be 2D arrays.\n"); + return $_get($_ctxt, sgLayout, sgData); + }]> + ]; + let skipDefaultBuilders = 1; + // In format of #xegpu.wg_map<{sg_layout = [2, 4], sg_data = [2, 4]}> let assemblyFormat = "`<` custom($sgLayout, $sgData) `>`"; } @@ -44,7 +75,24 @@ def XeGPU_XeMapAttr: XeGPUAttr<"XeMap", "xe_map"> { XeGPU_WgMapAttr: $wg, XeGPU_SgMapAttr: $sg); - // In format of #xegpu.xe_map + let builders = [ + AttrBuilder<(ins + "::llvm::ArrayRef":$sgLayout, + "::llvm::ArrayRef":$sgData, + "::llvm::ArrayRef":$wiLayout, + "::llvm::ArrayRef":$wiData, + CArg<"::llvm::ArrayRef", "{}">:$mmaBlockSize + ), [{ + assert(sgLayout.size() == 2 && sgData.size() == 2 && "sgLayout and sgData should be 2D arrays.\n"); + assert(wiLayout.size() == 2 && wiData.size() == 2 && "wiLayout and wiData should be 2D arrays.\n"); + assert((mmaBlockSize.size() == 2 || mmaBlockSize.size() == 0) && "mmaBlockSize can be either empty or a 2D array.\n"); + auto wg = WgMapAttr::get($_ctxt, sgLayout, sgData); + auto sg = SgMapAttr::get($_ctxt, wiLayout, wiData, mmaBlockSize); + return $_get($_ctxt, wg, sg); + }]> + ]; + + // In format of #xegpu.xe_map let hasCustomAssemblyFormat = 1; } diff --git a/include/imex/Dialect/XeGPU/IR/XeGPUOps.h b/include/imex/Dialect/XeGPU/IR/XeGPUOps.h index 1248f6964..073587bdd 100644 --- a/include/imex/Dialect/XeGPU/IR/XeGPUOps.h +++ b/include/imex/Dialect/XeGPU/IR/XeGPUOps.h @@ -46,41 +46,6 @@ class TensorDescType; } // namespace xegpu } // namespace imex -namespace imex { -namespace xegpu { - -class BaseTensorDescType : public mlir::Type, - public mlir::ShapedType::Trait { -public: - using Type::Type; - - /// Returns the element type of this tensor type. - mlir::Type getElementType() const; - - /// Returns if this type is ranked, i.e. it has a known number of dimensions. - bool hasRank() const; - - /// Returns the shape of this tensor type. - llvm::ArrayRef getShape() const; - - /// Clone this type with the given shape and element type. If the - /// provided shape is `None`, the current shape of the type is used. - BaseTensorDescType cloneWith(std::optional> shape, - mlir::Type elementType) const; - - /// Return true if the specified element type is ok in a tensor. - static bool isValidElementType(Type type); - - /// Methods for support type inquiry through isa, cast, and dyn_cast. - static bool classof(Type type); - - /// Allow implicit conversion to ShapedType. - operator mlir::ShapedType() const { return cast(); } -}; - -} // namespace xegpu -} // namespace imex - #include #include #define GET_ATTRDEF_CLASSES diff --git a/include/imex/Dialect/XeGPU/IR/XeGPUOps.td b/include/imex/Dialect/XeGPU/IR/XeGPUOps.td index e2105c092..725299744 100644 --- a/include/imex/Dialect/XeGPU/IR/XeGPUOps.td +++ b/include/imex/Dialect/XeGPU/IR/XeGPUOps.td @@ -54,8 +54,7 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe both shape and strides are required to to carry the respect information. Otherwise, the operator is invalid. - The operation also supports two attributes: - * memory_scope (MemoryScopeAttr): indicates where the memory is located, "global" for global memory (default), and "slm" for shared memory. + The operation also supports the following attribute: * boundary_check (BoolAttr): indicates whether the operation detects the boundary and pads with zero for out-of-boundary access (default) Example 1 (suppose the tensor shape inferred by the compiler is 8x16): @@ -82,7 +81,6 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe Variadic: $shape, Variadic: $strides, DenseI64ArrayAttr: $static_offsets, - DefaultValuedAttr: $memory_scope, DefaultValuedAttr: $boundary_check, DefaultValuedAttr: $mode); @@ -95,23 +93,40 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe let builders = [ OpBuilder<(ins "::mlir::Type": $TensorDesc, "::mlir::Value": $source, "::mlir::ValueRange": $offsets, "::mlir::ValueRange": $shape, "::mlir::ValueRange": $strides, "::llvm::ArrayRef": $static_offsets, - CArg<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope, CArg<"bool", "true">: $boundary_check, CArg<"::imex::xegpu::Mode", "imex::xegpu::Mode::SIMT">: $mode), - [{ $_state.addOperands(source); + [{ + auto staticDims = std::count_if(static_offsets.begin(), static_offsets.end(), + [](int64_t d) { return !mlir::ShapedType::isDynamic(d); }); + auto dynamicDims = std::count_if(static_offsets.begin(), static_offsets.end(), + [](int64_t d) { return mlir::ShapedType::isDynamic(d); }); + + auto dims = offsets.size() + staticDims; + assert((isStaticShapedMemRef(source) && + dims == getRankOf(source) && + shape.size() == 0 && + strides.size() == 0 + ) || + ((!isMemRef(source) || dims == getRankOf(source)) && + shape.size() != 0 && + dims == shape.size() && + shape.size() == strides.size() + ) + ); + assert(offsets.size() == dynamicDims); + + $_state.addOperands(source); $_state.addOperands(offsets); $_state.addOperands(shape); $_state.addOperands(strides); $_state.addAttribute(getOperandSegmentSizesAttrName($_state.name), $_builder.getDenseI32ArrayAttr({1, static_cast(offsets.size()), static_cast(shape.size()), static_cast(strides.size())})); $_state.addAttribute(getStaticOffsetsAttrName($_state.name), $_builder.getDenseI64ArrayAttr(static_offsets)); - $_state.addAttribute(getMemoryScopeAttrName($_state.name), ::imex::xegpu::MemoryScopeAttr::get($_builder.getContext(), memory_scope)); $_state.addAttribute(getBoundaryCheckAttrName($_state.name), $_builder.getBoolAttr(boundary_check)); - $_state.addAttribute(getBoundaryCheckAttrName($_state.name), ::imex::xegpu::ModeAttr::get($_builder.getContext(), mode)); + $_state.addAttribute(getModeAttrName($_state.name), ::imex::xegpu::ModeAttr::get($_builder.getContext(), mode)); $_state.addTypes(TensorDesc); }]>, OpBuilder<(ins "::mlir::Type": $tdesc, "::mlir::Value": $source, "::llvm::ArrayRef": $offsets, - CArg<"::imex::xegpu::MemoryScope", "::imex::xegpu::MemoryScope::GLOBAL">:$memory_scope, CArg<"bool", "true">:$boundary_check, CArg<"::imex::xegpu::Mode", "imex::xegpu::Mode::SIMT">: $mode), - [{ assert(offsets.size() == getRankOf(source)); + [{ assert(isStaticShapedMemRef(source) && offsets.size() == getRankOf(source)); llvm::SmallVector staticOffsets; llvm::SmallVector dynamicOffsets; dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets); @@ -120,16 +135,14 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe ::mlir::ValueRange({}) /* empty dynamic shape */, ::mlir::ValueRange({}) /* empty dynamic strides */, staticOffsets /* static offsets */, - memory_scope, boundary_check, mode); }]>, OpBuilder<(ins "::mlir::Type": $tdesc, "::mlir::Value": $source, "::llvm::ArrayRef": $offsets, "::mlir::ValueRange": $shape, "::mlir::ValueRange": $stride, - CArg<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">:$memory_scope, CArg<"bool", "true">:$boundary_check, CArg<"::imex::xegpu::Mode", "imex::xegpu::Mode::SIMT">: $mode), - [{ assert((!isMemRef(source) || getRankOf(source) == offsets.size()) && shape.size() == stride.size() && + [{ assert((!isMemRef(source) || getRankOf(source) == offsets.size()) && shape.size() != 0 && shape.size() == stride.size() && offsets.size() == shape.size() && isIntegerOrDynamicShapedMemref(source)); llvm::SmallVector staticOffsets; @@ -141,7 +154,6 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe shape /* empty dynamic shape */, stride /* empty dynamic strides */, staticOffsets /* static offsets */, - memory_scope, boundary_check, mode); }]> ]; @@ -172,6 +184,59 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe assert(0 && "Unreachable"); } + void getOffsets(llvm::SmallVectorImpl &offsets) { + auto dynamicOffsets = getOffsets(); //dynamic offsets + auto staticOffsets = getStaticOffsets(); + + if (staticOffsets.size() == 0) { + offsets.assign(dynamicOffsets.begin(), dynamicOffsets.end()); + return; + } + + for (size_t i = 0, j = 0; i < staticOffsets.size(); i++) { + if (mlir::ShapedType::isDynamic(staticOffsets[i])) { + assert(j < dynamicOffsets.size()); + offsets.push_back(dynamicOffsets[j++]); + } else { + auto attr = mlir::IntegerAttr::get(mlir::IndexType::get(getContext()), staticOffsets[i]); + offsets.push_back(attr); + } + } + } + + void getShape(llvm::SmallVectorImpl &shape) { + if (isIntegerOrDynamicShapedMemref(getSource())) { + shape.append(getShape().begin(), getShape().end()); + } else { + for (auto dim: getSourceType().cast<::mlir::MemRefType>().getShape()) { + auto attr = mlir::IntegerAttr::get(mlir::IndexType::get(getContext()), dim); + shape.push_back(attr); + } + } + } + + void getStrides(llvm::SmallVectorImpl &strides) { + if (isIntegerOrDynamicShapedMemref(getSource())) { + strides.append(getStrides().begin(), getStrides().end()); + } else { + auto [staticStrides, offset] = mlir::getStridesAndOffset(getSourceType().cast()); + for (auto dim: staticStrides) { + auto attr = mlir::IntegerAttr::get(mlir::IndexType::get(getContext()), dim); + strides.push_back(attr); + } + } + } + + size_t getNumStaticOffsets() { + return std::count_if(getStaticOffsets().begin(), getStaticOffsets().end(), + [](int64_t dSize) { return !mlir::ShapedType::isDynamic(dSize); }); + } + + size_t getNumDynamicOffsets() { + return std::count_if(getStaticOffsets().begin(), getStaticOffsets().end(), + [](int64_t dSize) { return mlir::ShapedType::isDynamic(dSize); }); + } + size_t getOffsetsRank() { return getOffsets().size() + std::count_if(getStaticOffsets().begin(), getStaticOffsets().end(), [](int64_t dSize) { return !mlir::ShapedType::isDynamic(dSize); }); @@ -259,7 +324,6 @@ def XeGPU_CreateDescOp supportted group size, e.g., vector<16xindex>. And each element in the vector corresponds to a work item (SIMT lane) in the subgroup. In SIMT mode (default), it is an index scalar representing the offset of the access point. - * memory_scope: [optional attribute] indicates where the memory is located, "global" for global memory (default), and "slm" for shared memory. * chunk_size_per_lane: [optional attribute] indicates number of continious elements accessed for each offset, default is 1. Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64] @@ -283,7 +347,6 @@ def XeGPU_CreateDescOp let arguments = (ins XeGPU_BaseAddrType: $source, XeGPU_OffsetType: $offsets, - DefaultValuedAttr: $memory_scope, DefaultValuedAttr: $chunk_size_per_lane, DefaultValuedAttr: $mode); @@ -303,7 +366,7 @@ def XeGPU_CreateDescOp }]; - // Format: xegpu.create_tdesc %src, %offsets {mode=simt, memory_scope=slm, chunk_size_per_lane=1} + // Format: xegpu.create_tdesc %src, %offsets {mode=simt, chunk_size_per_lane=1} // : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> let hasCustomAssemblyFormat = 1; let hasVerifier = 1; diff --git a/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td b/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td index e7f0723f4..b15072834 100644 --- a/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td @@ -23,18 +23,15 @@ include "imex/Dialect/XeGPU/IR/XeGPUDialect.td" // An Integer array attribute with fixed 2 elements. def XeGPU_IntArrayAttr2: ConfinedAttr]>; def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>; -def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, F8E4M3FN, F8E5M2, F8E4M3FNUZ, F8E4M3B11FNUZ, F8E5M2FNUZ]>; +def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>; def XeGPU_ScalarType: AnyTypeOf<[XeGPU_IntType, XeGPU_FloatType]>; -def XeGPU_BaseAddrType: AnyTypeOf<[MemRefRankOf<[XeGPU_ScalarType], [1, 2]>, UI64]>; +def XeGPU_BaseAddrType: AnyTypeOf<[MemRefRankOf<[XeGPU_ScalarType], [1, 2]>, UI64, UI32, I64, I32]>; def XeGPU_DpasOpType: VectorOfRankAndType<[2, 3], [XeGPU_ScalarType]>; def XeGPU_OffsetType: AnyTypeOf<[VectorOfRankAndType<[1], [Index]>, Index]>; def XeGPU_MaskType: AnyTypeOf<[VectorOfRankAndType<[1,2], [I1]>, I1]>; def XeGPU_ValueType: AnyTypeOf<[VectorOfRankAndType<[1,2,3], [XeGPU_ScalarType]>, XeGPU_ScalarType]>; -// def XeGPU_VectorType: VectorOfRankAndType<[1,2,3], [XeGPU_ScalarType]>; -// def XeGPU_Vector3DType: VectorOfRankAndType<[3], [XeGPU_ScalarType]>; def XeGPU_Vector2DType: VectorOfRankAndType<[2], [XeGPU_ScalarType]>; -// def XeGPU_Vector1DType: VectorOfRankAndType<[1], [XeGPU_ScalarType]>; // common base class for types in XeGPU dialect class XeGPUTypeDef:$shape, "::mlir::Type":$elementType, + DefaultValuedParameter<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope, OptionalParameter<"::mlir::Attribute"> :$encoding); let builders = [ TypeBuilderWithInferredContext<(ins "::llvm::ArrayRef":$shape, "::mlir::Type":$elementType, + CArg<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope, CArg<"::mlir::Attribute", "{}">:$encoding ), [{ - return $_get(elementType.getContext(), shape, elementType, encoding); + return $_get(elementType.getContext(), shape, elementType, memory_scope, encoding); }]> ]; @@ -100,8 +99,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", } }]; - // let assemblyFormat = "`<` custom($shape, $elementType) (`,` custom($encoding)^)? `>`"; - let assemblyFormat = "`<` custom($shape, $elementType) (`,` $encoding^)? `>`"; + let assemblyFormat = "`<` custom($shape, $elementType)``custom($memory_scope, $encoding)`>`"; } #endif // _XEGPU_TYPES_TD_INCLUDED_ diff --git a/include/imex/Utils/XeUtils.h b/include/imex/Utils/XeUtils.h index ffb1660b5..1283987cd 100644 --- a/include/imex/Utils/XeUtils.h +++ b/include/imex/Utils/XeUtils.h @@ -38,7 +38,8 @@ template static std::string makeString(T array) { os << "["; for (auto i = 1; i < array.size(); i++) os << array[i - 1] << ", "; - os << array[array.size() - 1] << "]"; + if (array.size()) os << array[array.size() - 1] ; + os << "]"; os.flush(); return buf; } diff --git a/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 83b31b8e2..319d13552 100644 --- a/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -77,9 +77,54 @@ static void printShapeAndType(mlir::AsmPrinter &printer, printer << type; } -template -static mlir::LogicalResult parseArrayList(mlir::AsmParser &parser, - llvm::SmallVector &array, +static mlir::LogicalResult parseTensorDescAttr(mlir::AsmParser &parser, + imex::xegpu::MemoryScope &scope, + mlir::Attribute &encoding) { + // implies no attrbutes + if (mlir::failed(parser.parseOptionalComma())) + return mlir::success(); + + auto parseElt = [&]() -> mlir::ParseResult { + llvm::StringRef nameId; + + if (!parser.parseOptionalKeyword(&nameId, {"memory_scope"})) { + auto loc = parser.getCurrentLocation(); + if(parser.parseEqual()) + return mlir::failure(); + + auto attrOptional = ::mlir::FieldParser<::imex::xegpu::MemoryScope, ::imex::xegpu::MemoryScope>::parse(parser); + if(mlir::failed(attrOptional)) + return parser.emitError(loc, "Invalid memory scope attribute specification.\n"); + scope = *attrOptional; + return mlir::success(); + } else { + auto loc = parser.getCurrentLocation(); + auto attrOptional = ::mlir::FieldParser<::mlir::Attribute>::parse(parser); + if(mlir::failed(attrOptional)) + return parser.emitError(loc, "Failed to parse XeGPU_TensorDesc parameter 'encoding' which is to be a `::mlir::Attribute`.\n"); + encoding = *attrOptional; + return mlir::success(); + } + llvm_unreachable("Unexpected."); + }; + + if (parser.parseCommaSeparatedList(parseElt)) + return mlir::failure(); + + return mlir::success(); +} + +static void printTensorDescAttr(mlir::AsmPrinter &printer, + imex::xegpu::MemoryScope scope, + mlir::Attribute encoding) { + if (scope != imex::xegpu::MemoryScope::GLOBAL) + printer << ", memory_scope = " << scope; + if (encoding) printer << ", " << encoding; +} + +template +static mlir::LogicalResult parseArrayList(mlir::AsmParser &parser, + llvm::SmallVector &array, bool parsePrecedenceEqual = false) { mlir::FailureOr> result; // Parse literal '=' @@ -121,38 +166,27 @@ static mlir::LogicalResult parseSgMapAttrElements( auto loc = parser.getCurrentLocation(); auto parseElt = [&]() -> mlir::LogicalResult { return mlir::AsmParser::KeywordSwitch(parser) - .Case("mma_block_size", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, mmaBlockSize, true); - }) - .Case("wi_layout", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, layout, true); - }) - .Case("wi_data", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, data, true); - }) - .Default([&](llvm::StringRef keyword, llvm::SMLoc) { - llvm::dbgs() << "\n3. Default currLoc: " - << llvm::StringRef( - parser.getCurrentLocation().getPointer()) - << "\n"; - llvm::dbgs() << "\n3. keyword: " << keyword << "\n"; - return mlir::failure(); - }); - }; - - if (parser.parseLBrace()) - return mlir::failure(); - if (parser.parseCommaSeparatedList(parseElt)) - return mlir::failure(); - if (parser.parseRBrace()) - return mlir::failure(); - if (mmaBlockSize.size() != 2) { - parser.emitError(loc, - "failed to parse SgMapAttr: missing mma_block_size which " - "is to be a `llvm::ArrayRef` with size 2"); + .Case("mma_block_size", [&](llvm::StringRef, llvm::SMLoc) { + return parseArrayList(parser, mmaBlockSize, true); + }) + .Case("wi_layout", [&](llvm::StringRef, llvm::SMLoc) { + return parseArrayList(parser, layout, true); + }) + .Case("wi_data", [&](llvm::StringRef, llvm::SMLoc) { + return parseArrayList(parser, data, true); + }) + .Default([&](llvm::StringRef keyword, llvm::SMLoc) { + parser.emitError(loc, "SgMapAttr Parser meet an unexpected keywoard: ") << keyword << "\n"; + return mlir::failure(); + }); + }; + + if (parser.parseLBrace()) return mlir::failure(); + if (parser.parseCommaSeparatedList(parseElt)) return mlir::failure(); + if (parser.parseRBrace()) return mlir::failure(); + if (mmaBlockSize.size() != 2 && mmaBlockSize.size() != 0) { + parser.emitError(loc, "failed to parse SgMapAttr: mma_block_size should be a `llvm::ArrayRef` " + "with size 2 or empty. But it got ") << mmaBlockSize.size() << ".\n" ; return mlir::failure(); } if (layout.size() != 2) { @@ -173,8 +207,10 @@ static void printSgMapAttrElements(mlir::AsmPrinter &printer, llvm::ArrayRef layout, llvm::ArrayRef data) { printer << "{"; - printArrayElement(printer, "mma_block_size", mmaBlockSize); - printer << "," << ' '; + if (mmaBlockSize.size()) { + printArrayElement(printer, "mma_block_size", mmaBlockSize); + printer << "," << ' '; + } printArrayElement(printer, "wi_layout", layout); printer << "," << ' '; printArrayElement(printer, "wi_data", data); @@ -237,39 +273,30 @@ mlir::Attribute XeMapAttr::parse(mlir::AsmParser &parser, mlir::Type type) { if (parser.parseLess()) return {}; - auto parseElt = [&]() -> mlir::ParseResult { - mlir::OptionalParseResult result = - mlir::AsmParser::KeywordSwitch(parser) - .Case("sg", - [&](llvm::StringRef, llvm::SMLoc) { - if (parser.parseEqual()) - return mlir::failure(); - llvm::SmallVector mmaBlockSize; - llvm::SmallVector wiLayout; - llvm::SmallVector wiData; - if (mlir::failed(parseSgMapAttrElements( - parser, mmaBlockSize, wiLayout, wiData))) - return mlir::failure(); - sg = imex::xegpu::SgMapAttr::get( - parser.getContext(), mmaBlockSize, wiLayout, wiData); - return mlir::success(!!sg); - }) - .Case("wg", - [&](llvm::StringRef, llvm::SMLoc) { - if (parser.parseEqual()) - return mlir::failure(); - llvm::SmallVector sgLayout; - llvm::SmallVector sgData; - if (mlir::failed( - parseWgMapAttrElements(parser, sgLayout, sgData))) - return mlir::failure(); - wg = imex::xegpu::WgMapAttr::get(parser.getContext(), - sgLayout, sgData); - return mlir::success(!!wg); - }) - .Default([&](llvm::StringRef keyword, llvm::SMLoc) { - return std::nullopt; - }); + auto parseElt = [&]() -> mlir::ParseResult { + mlir::OptionalParseResult result = mlir::AsmParser::KeywordSwitch(parser) + .Case("sg", [&](llvm::StringRef, llvm::SMLoc) { + if (parser.parseEqual()) return mlir::failure(); + llvm::SmallVector mmaBlockSize; + llvm::SmallVector wiLayout; + llvm::SmallVector wiData; + if (mlir::failed(parseSgMapAttrElements(parser, mmaBlockSize, wiLayout, wiData))) + return mlir::failure(); + sg = imex::xegpu::SgMapAttr::get(parser.getContext(), wiLayout, wiData, mmaBlockSize); + return mlir::success(!!sg); + }) + .Case("wg", [&](llvm::StringRef, llvm::SMLoc) { + if (parser.parseEqual()) return mlir::failure(); + llvm::SmallVector sgLayout; + llvm::SmallVector sgData; + if(mlir::failed(parseWgMapAttrElements(parser, sgLayout, sgData))) + return mlir::failure(); + wg = imex::xegpu::WgMapAttr::get(parser.getContext(), sgLayout, sgData); + return mlir::success(!!wg); + }) + .Default([&](llvm::StringRef keyword, llvm::SMLoc) { + return std::nullopt; + }); return result.value(); }; diff --git a/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index a8f0a8aa6..dcfdbd0b4 100644 --- a/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -70,15 +70,6 @@ static void transpose(llvm::ArrayRef trans, shape[i] = old[trans[i]]; }; -static void dropOnes(std::vector &array) { - std::vector old = array; - array.clear(); - for (auto v : old) { - if (v != 1) - array.push_back(v); - } -}; - static bool isMappingAttr(mlir::Attribute attr) { return attr && (llvm::isa(attr) || llvm::isa(attr) || @@ -129,8 +120,7 @@ static mlir::ParseResult parseCustomEnumAttr(mlir::OpAsmParser &parser, auto loc = parser.getCurrentLocation(); auto attrOptional = mlir::FieldParser::parse(parser); if (mlir::failed(attrOptional)) - return parser.emitError(loc, "invalid ") - << "memory_scope attribute specification"; + return parser.emitError(loc, "invalid ") << "attribute specification"; auto attr = CustomEnumAttr::get(parser.getBuilder().getContext(), *attrOptional); result.addAttribute(attrKeyword, attr); @@ -187,10 +177,6 @@ parseOptionalAttrDict(mlir::OpAsmParser &parser, mlir::OperationState &result, if (parser.parseEqual()) return ::mlir::failure(); - if (nameId == "memory_scope") - return parseCustomEnumAttr(parser, result, - nameId); - if (nameId == "l1_hint" || nameId == "l2_hint" || nameId == "l3_hint") { if (isWrite) return parseCustomEnumAttr( @@ -296,8 +282,7 @@ mlir::ParseResult CreateNdDescOp::parse(mlir::OpAsmParser &parser, return ::mlir::failure(); } - if (parseOptionalAttrDict(parser, result, - {"memory_scope", "boundary_check", "mode"})) + if (parseOptionalAttrDict(parser, result, {"boundary_check", "mode"})) return mlir::failure(); if (parser.parseColon()) @@ -359,8 +344,6 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &printer) { printer << ' ' << "{"; printer << "mode = " << getMode(); printer << "," << ' '; - printer << "memory_scope = " << getMemoryScope(); - printer << "," << ' '; printer << "boundary_check = " << getBoundaryCheck(); printer << "}"; @@ -414,8 +397,7 @@ mlir::ParseResult CreateDescOp::parse(mlir::OpAsmParser &parser, if (parser.parseOperand(offsetsRawOperands[0])) return mlir::failure(); - if (parseOptionalAttrDict(parser, result, - {"memory_scope", "chunk_size_per_lane", "mode"})) + if (parseOptionalAttrDict(parser, result, {"chunk_size_per_lane", "mode"})) return mlir::failure(); if (parser.parseColon()) @@ -460,8 +442,6 @@ void CreateDescOp::print(::mlir::OpAsmPrinter &printer) { printer << ' ' << "{"; printer << "mode = " << getMode(); printer << "," << ' '; - printer << "memory_scope = " << getMemoryScope(); - printer << "," << ' '; printer << "chunk_size_per_lane = " << getChunkSizePerLane(); printer << "}"; @@ -664,7 +644,6 @@ mlir::LogicalResult LoadNDOp::verify() { "tdescShape[i] % sgData[i] == 0"); tdescShape[i] /= sgLayout[i]; } - // dropOnes(tdescShape); } if (sgMap) { @@ -675,16 +654,23 @@ mlir::LogicalResult LoadNDOp::verify() { if (tdescShape[i] % blockSize[i] != 0 || blockSize[i] % wiLayout[i] != 0 || blockSize[i] % wiData[i] != 0 || blockSize[i] % (wiLayout[i] * wiData[i]) != 0) { - return emitOpError( - "Invalid SgMapAttr. It should meet the following conditions: " - "blockSize[i] % wiLayout[i] == 0 && " - "blockSize[i] % wiData[i] == 0 && " - "blockSize[i] % wiData[i] == 0 && " - "tdescShape[i] % blockSize[i] == 0"); + return emitOpError("Invalid SgMapAttr. It should meet the following conditions: " + "tdescShape[i] % blockSize[i] == 0 && " + "blockSize[i] % wiLayout[i] == 0 && " + "blockSize[i] % wiData[i] == 0 && " + "blockSize[i] % (wiLayout[i] * wiData[i]) == 0 "); + } - auto tmp = blockSize[i] / wiLayout[i]; - tdescShape[i] /= blockSize[i]; - tdescShape[i] *= tmp; + } + + for (size_t i = 0; i < wiLayout.size(); i++) { + if (tdescShape[i] % wiData[i] != 0 || + tdescShape[i] % (wiLayout[i] * wiData[i]) != 0) { + return emitOpError("Invalid SgMapAttr. It should meet the following conditions: " + "tdescShape[i] % wiData[i] == 0 && " + "tdescShape[i] % (wiLayout[i] * wiData[i]) == 0 "); + } + tdescShape[i] /= wiLayout[i]; } } } @@ -702,8 +688,8 @@ mlir::LogicalResult LoadNDOp::verify() { auto vnni_factor = valueShape.back(); tdescShape[axis] /= vnni_factor; tdescShape.push_back(vnni_factor); - dropOnes(tdescShape); } + if (tdescShape != valueShape) return emitOpError( "Result shape doesn't match TensorDesc shape." @@ -879,14 +865,29 @@ mlir::LogicalResult StoreNDOp::verify() { auto wiLayout = sgMap.getWiLayout(); auto wiData = sgMap.getWiData(); for (size_t i = 0; i < shape.size(); i++) { - assert(blockSize[i] % (wiLayout[i] * wiData[i]) == 0); - assert(blockSize[i] % wiLayout[i] == 0); - assert(blockSize[i] % wiData[i] == 0); - assert(shape[i] % blockSize[i] == 0); - auto tmp = blockSize[i] / wiLayout[i]; - shape[i] /= blockSize[i]; - shape[i] *= tmp; + if (blockSize[i] % (wiLayout[i] * wiData[i]) != 0 || + blockSize[i] % wiLayout[i] != 0 || + blockSize[i] % wiData[i] == 0 || + shape[i] % blockSize[i] == 0) { + return emitOpError("Invalid SgMapAttr. It should meet the following conditions: " + "tdescShape[i] % blockSize[i] == 0 && " + "blockSize[i] % wiLayout[i] == 0 && " + "blockSize[i] % wiData[i] == 0 && " + "blockSize[i] % (wiLayout[i] * wiData[i]) == 0 "); + + } } + + for (size_t i = 0; i < wiLayout.size(); i++) { + if (shape[i] % wiData[i] != 0 || + shape[i] % (wiLayout[i] * wiData[i]) != 0) { + return emitOpError("Invalid SgMapAttr. It should meet the following conditions: " + "tdescShape[i] % wiData[i] == 0 && " + "tdescShape[i] % (wiLayout[i] * wiData[i]) == 0 "); + } + shape[i] /= wiLayout[i]; + } + } if (shape != valTy.getShape().vec()) @@ -977,8 +978,8 @@ mlir::LogicalResult DpasOp::verify() { // return emitOpError("Incorrect shapes for dpas op"); // } - if (lhsRank != rhsRank) { - return emitOpError("lhs and rhs rank does not match for dpas op"); + if (lhsRank != rhsRank || lhsRank != 3) { + return emitOpError("lhs and rhs rank does not match for dpas op, or their rank is not 3."); } return mlir::success(); @@ -1134,7 +1135,6 @@ mlir::LogicalResult LoadGatherOp::verify() { auto vnni_factor = valueShape.back(); tdescShape[axis] /= vnni_factor; tdescShape.push_back(vnni_factor); - dropOnes(tdescShape); } if (valueShape != tdescShape) diff --git a/test/Dialect/XeGPU/IR/XeGPUOps.mlir b/test/Dialect/XeGPU/IR/XeGPUOps.mlir index 98bb5b15e..cf1f34f60 100644 --- a/test/Dialect/XeGPU/IR/XeGPUOps.mlir +++ b/test/Dialect/XeGPU/IR/XeGPUOps.mlir @@ -25,10 +25,10 @@ func.func @test_create_nd_tdesc_vc(%src: memref<24x32xf32>) { // CHECK-LABEL: func @test_create_tdesc_vc({{.*}}) { func.func @test_create_tdesc_vc(%src: ui64, %offsets : vector<16 x index>) { // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offsets {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered> + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 2} + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, memory_scope = slm, #xegpu.scattered> + %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 2} + : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, memory_scope = slm, #xegpu.scattered> return } @@ -53,13 +53,13 @@ func.func @test_store_nd_vc(%src: memref<24x32xf16>, %dst: memref<24x32xf16>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %2 = xegpu.create_nd_tdesc %dst[%c0, %c1] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -92,7 +92,7 @@ func.func @test_update_nd_offset_vc(%src: memref<24x32xf32>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> @@ -112,7 +112,7 @@ func.func @test_update_nd_offset_vc(%src: memref<24x32xf32>) { // CHECK-LABEL: func @test_prefetch_nd_vc({{.*}}) { func.func @test_prefetch_nd_vc(%src: memref<24x32xf16>, %x : index, %y : index) { // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%x, %y] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK: xegpu.prefetch_nd diff --git a/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir b/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir index f23e6f659..72d8681d2 100644 --- a/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir +++ b/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir @@ -67,8 +67,9 @@ func.func @test_create_nd_tdesc_vc_5(%src: memref, %w : index, %h : ind %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc // CHECK-SAME: %arg0[%arg3, %arg4], [%arg2, %arg1], [%arg1, %c1] - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf32> - %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] {mode = vc, memory_scope = slm} : memref -> !xegpu.tensor_desc<8x16xf32> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf32, memory_scope = slm> + %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] {mode = vc} + : memref -> !xegpu.tensor_desc<8x16xf32, memory_scope = slm> return } @@ -77,8 +78,9 @@ func.func @test_create_nd_tdesc_vc_6(%src: memref, %w : index, %h : ind %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc // CHECK-SAME: %arg0[%arg3, %arg4], [%arg2, %arg1], [%arg1, %c1] - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf32> - %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] {mode = vc, memory_scope = slm, boundary_check = true} : memref -> !xegpu.tensor_desc<8x16xf32> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf32, memory_scope = slm> + %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] {mode = vc, boundary_check = true} + : memref -> !xegpu.tensor_desc<8x16xf32, memory_scope = slm> return } @@ -96,8 +98,9 @@ func.func @test_create_nd_tdesc_vc_7(%src: memref<1024xf32>, %offset : index) { func.func @test_create_nd_tdesc_vc_8(%src: memref, %w : index, %h : index, %x : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf32> - %1 = xegpu.create_nd_tdesc %src[8, %x], [%h, %w], [%w, %c1] {mode = vc, memory_scope = slm, boundary_check = true} : memref -> !xegpu.tensor_desc<8x16xf32> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf32, memory_scope = slm> + %1 = xegpu.create_nd_tdesc %src[8, %x], [%h, %w], [%w, %c1] {mode = vc, boundary_check = true} + : memref -> !xegpu.tensor_desc<8x16xf32, memory_scope = slm> return } @@ -105,9 +108,9 @@ func.func @test_create_nd_tdesc_vc_8(%src: memref, %w : index, %h : ind func.func @test_create_nd_tdesc_vc_9(%src: memref, %w : index, %h : index, %x : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = slm, boundary_check = true} - // CHECK-SAME: !xegpu.tensor_desc<64x128xf32, #xegpu.xe_map> - %1 = xegpu.create_nd_tdesc %src[8, %x], [%h, %w], [%w, %c1] {memory_scope = slm, boundary_check = true} : memref - -> !xegpu.tensor_desc<64x128xf32, #xegpu.xe_map> + // CHECK-SAME: {mode = simt, boundary_check = true} + // CHECK-SAME: !xegpu.tensor_desc<64x128xf32, memory_scope = slm, #xegpu.xe_map> + %1 = xegpu.create_nd_tdesc %src[8, %x], [%h, %w], [%w, %c1] {boundary_check = true} : memref + -> !xegpu.tensor_desc<64x128xf32, memory_scope = slm, #xegpu.xe_map> return } diff --git a/test/Dialect/XeGPU/IR/create_tdesc.mlir b/test/Dialect/XeGPU/IR/create_tdesc.mlir index 98e58b55b..cc6257ecf 100644 --- a/test/Dialect/XeGPU/IR/create_tdesc.mlir +++ b/test/Dialect/XeGPU/IR/create_tdesc.mlir @@ -7,8 +7,8 @@ // CHECK-LABEL: func @test_create_tdesc_vc({{.*}}) { func.func @test_create_tdesc_vc(%src: ui64, %offsets : vector<16 x index>) { - // CHECK: xegpu.create_tdesc %arg0, %arg1 - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 1} + // CHECK: xegpu.create_tdesc %arg0, %arg1 + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> return @@ -16,18 +16,18 @@ func.func @test_create_tdesc_vc(%src: ui64, %offsets : vector<16 x index>) { // CHECK-LABEL: func @test_create_tdesc_vc_2({{.*}}) { func.func @test_create_tdesc_vc_2(%src: ui64, %offsets : vector<16 x index>) { - // CHECK: xegpu.create_tdesc %arg0, %arg1 - // CHECK-SAME: {mode = vc, memory_scope = slm, chunk_size_per_lane = 1} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offsets {mode = vc, memory_scope=slm} - : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> + // CHECK: xegpu.create_tdesc %arg0, %arg1 + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, memory_scope = slm, #xegpu.scattered> + %1 = xegpu.create_tdesc %src, %offsets {mode = vc} : ui64, vector<16 x index> + -> !xegpu.tensor_desc<16xf32, memory_scope = slm, #xegpu.scattered> return } // CHECK-LABEL: func @test_create_tdesc_vc_3({{.*}}) { func.func @test_create_tdesc_vc_3(%src: ui64, %offsets : vector<16 x index>) { - // CHECK: xegpu.create_tdesc %arg0, %arg1 - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 8} + // CHECK: xegpu.create_tdesc %arg0, %arg1 + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 8} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 8} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> @@ -36,11 +36,11 @@ func.func @test_create_tdesc_vc_3(%src: ui64, %offsets : vector<16 x index>) { // CHECK-LABEL: func @test_create_tdesc_vc_4({{.*}}) { func.func @test_create_tdesc_vc_4(%src: ui64, %offsets : vector<16 x index>) { - // CHECK: xegpu.create_tdesc %arg0, %arg1 - // CHECK-SAME: {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offsets {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered> + // CHECK: xegpu.create_tdesc %arg0, %arg1 + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 2} + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, memory_scope = slm, #xegpu.scattered> + %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 2} + : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, memory_scope = slm, #xegpu.scattered> return } @@ -48,10 +48,10 @@ func.func @test_create_tdesc_vc_4(%src: ui64, %offsets : vector<16 x index>) { // CHECK-LABEL: func @test_create_tdesc_vc_5({{.*}}) { func.func @test_create_tdesc_vc_5(%src: memref, %offsets : vector<16 x index>) { // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - // CHECK-SAME: memref, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offsets {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - : memref, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered> + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 2} + // CHECK-SAME: memref, vector<16xindex> -> !xegpu.tensor_desc<16x2xf32, memory_scope = slm, #xegpu.scattered> + %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 2} + : memref, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, memory_scope = slm, #xegpu.scattered> return } @@ -59,19 +59,19 @@ func.func @test_create_tdesc_vc_5(%src: memref, %offsets : vector<16 x in // CHECK-LABEL: func @test_create_tdesc_vc_6({{.*}}) { func.func @test_create_tdesc_vc_6(%src: memref, %offset : index) { // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - // CHECK-SAME: memref, index -> !xegpu.tensor_desc<2xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offset {mode = vc, memory_scope = slm, chunk_size_per_lane = 2} - : memref, index -> !xegpu.tensor_desc<2xf32, #xegpu.scattered> + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 2} + // CHECK-SAME: memref, index -> !xegpu.tensor_desc<2xf32, memory_scope = slm, #xegpu.scattered> + %1 = xegpu.create_tdesc %src, %offset {mode = vc, chunk_size_per_lane = 2} + : memref, index -> !xegpu.tensor_desc<2xf32, memory_scope = slm, #xegpu.scattered> return } // CHECK-LABEL: func @test_create_tdesc_vc_7({{.*}}) { func.func @test_create_tdesc_vc_7(%src: memref, %offset : index) { // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = slm, chunk_size_per_lane = 1} - // CHECK-SAME: memref, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offset {mode = vc, memory_scope = slm, chunk_size_per_lane = 1} - : memref, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} + // CHECK-SAME: memref, index -> !xegpu.tensor_desc<1xf32, memory_scope = slm, #xegpu.scattered> + %1 = xegpu.create_tdesc %src, %offset {mode = vc, chunk_size_per_lane = 1} + : memref, index -> !xegpu.tensor_desc<1xf32, memory_scope = slm, #xegpu.scattered> return } diff --git a/test/Dialect/XeGPU/IR/invalid.mlir b/test/Dialect/XeGPU/IR/invalid.mlir index f3d68254c..b8f123d11 100644 --- a/test/Dialect/XeGPU/IR/invalid.mlir +++ b/test/Dialect/XeGPU/IR/invalid.mlir @@ -72,7 +72,7 @@ func.func @test_create_tdesc(%src: ui64, %offsets : vector<16x8xindex>) { func.func @test_load_gather(%src: ui64, %offsets : vector<16xindex>) { %0 = arith.constant dense<1>: vector<16x8xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 8} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 8} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 8} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf16, #xegpu.scattered> diff --git a/test/Dialect/XeGPU/IR/load_gather.mlir b/test/Dialect/XeGPU/IR/load_gather.mlir index b04dd022d..9201aa18c 100644 --- a/test/Dialect/XeGPU/IR/load_gather.mlir +++ b/test/Dialect/XeGPU/IR/load_gather.mlir @@ -9,7 +9,7 @@ func.func @test_load_gather_vc(%src: ui64, %offsets : vector<16xindex>) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc}: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> @@ -25,7 +25,7 @@ func.func @test_load_gather_vc(%src: ui64, %offsets : vector<16xindex>) { func.func @test_load_gather_vc_2(%src: ui64, %offsets : vector<16xindex>) { %0 = arith.constant dense<1>: vector<16x8xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 8} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 8} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 8} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> @@ -43,7 +43,7 @@ func.func @test_load_gather_vc_2(%src: ui64, %offsets : vector<16xindex>) { func.func @test_load_gather_vc_3(%src: ui64, %offset : index) { %0 = arith.constant dense<1>: vector<8xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 8} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 8} // CHECK-SAME: ui64, index -> !xegpu.tensor_desc<8xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offset {mode = vc, chunk_size_per_lane = 8} : ui64, index -> !xegpu.tensor_desc<8xf32, #xegpu.scattered> @@ -61,7 +61,7 @@ func.func @test_load_gather_vc_3(%src: ui64, %offset : index) { func.func @test_load_gather_vc_4(%src: ui64, %offsets : vector<16xindex>) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 1} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> diff --git a/test/Dialect/XeGPU/IR/load_nd.mlir b/test/Dialect/XeGPU/IR/load_nd.mlir index c67cab01b..91d2b6025 100644 --- a/test/Dialect/XeGPU/IR/load_nd.mlir +++ b/test/Dialect/XeGPU/IR/load_nd.mlir @@ -7,13 +7,14 @@ #sg_map_fp16_a = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}> #sg_map_fp16_b = #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}> #sg_map_fp16_c = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}> +#sg_map_fp16_d = #xegpu.sg_map<{wi_layout = [2, 8], wi_data = [1, 2]}> // CHECK-LABEL: func @test_load_nd_fp16({{.*}}) { func.func @test_load_nd_fp16(%A: memref<24x32xf16>, %B : memref<24x32xf16>, %C : memref<24x32xf16>) { %c0 = arith.constant 2 : index %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xf16> // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> %1 = xegpu.create_nd_tdesc %A[%c0, %c1] @@ -22,11 +23,11 @@ func.func @test_load_nd_fp16(%A: memref<24x32xf16>, %B : memref<24x32xf16>, %C : // CHECK: xegpu.load_nd // CHECK-SAME: {mode = simt, vnni_axis = 1} // CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> - // CHECK-SAME: -> vector<4x2xf16> - %2 = xegpu.load_nd %1 {vnni_axis = 1} : !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_a> -> vector<4x2xf16> + // CHECK-SAME: -> vector<4x1x2xf16> + %2 = xegpu.load_nd %1 {vnni_axis = 1} : !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_a> -> vector<4x1x2xf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xf16> // CHECK-SAME: -> !xegpu.tensor_desc<16x16xf16, #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> %3 = xegpu.create_nd_tdesc %B[%c0, %c1] @@ -35,11 +36,11 @@ func.func @test_load_nd_fp16(%A: memref<24x32xf16>, %B : memref<24x32xf16>, %C : // CHECK: xegpu.load_nd // CHECK-SAME: {mode = simt, vnni_axis = 0} // CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> - // CHECK-SAME: -> vector<8x2xf16> - %4 = xegpu.load_nd %3 {vnni_axis = 0} : !xegpu.tensor_desc<16x16xf16, #sg_map_fp16_b> -> vector<8x2xf16> + // CHECK-SAME: -> vector<8x1x2xf16> + %4 = xegpu.load_nd %3 {vnni_axis = 0} : !xegpu.tensor_desc<16x16xf16, #sg_map_fp16_b> -> vector<8x1x2xf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xf16> // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> %5 = xegpu.create_nd_tdesc %C[%c0, %c1] @@ -51,6 +52,18 @@ func.func @test_load_nd_fp16(%A: memref<24x32xf16>, %B : memref<24x32xf16>, %C : // CHECK-SAME: -> vector<8x1xf32> %6 = xegpu.load_nd %5 : !xegpu.tensor_desc<8x16xf32, #sg_map_fp16_c> -> vector<8x1xf32> + // CHECK: xegpu.create_nd_tdesc + // CHECK-SAME: {mode = simt, boundary_check = true} + // CHECK-SAME: memref<24x32xf16> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{wi_layout = [2, 8], wi_data = [1, 2]}>> + %7 = xegpu.create_nd_tdesc %A[%c0, %c1] + : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_d> + // CHECK: xegpu.load_nd + // CHECK-SAME: {mode = simt, vnni_axis = 1} + // CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: -> vector<4x1x2xf16> + %8 = xegpu.load_nd %7 {vnni_axis = 1} : !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_d> -> vector<4x1x2xf16> + return } @@ -63,7 +76,7 @@ func.func @test_load_nd_i8(%A: memref<64x64xi8>, %B : memref<64x64xi8>, %C : mem %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xi8> // CHECK-SAME: -> !xegpu.tensor_desc<8x32xi8, #xegpu.sg_map<{mma_block_size = [8, 32], wi_layout = [2, 8], wi_data = [1, 4]}>> %1 = xegpu.create_nd_tdesc %A[%c0, %c1] @@ -72,11 +85,11 @@ func.func @test_load_nd_i8(%A: memref<64x64xi8>, %B : memref<64x64xi8>, %C : mem // CHECK: xegpu.load_nd // CHECK-SAME: {mode = simt, vnni_axis = 1} // CHECK-SAME: !xegpu.tensor_desc<8x32xi8, #xegpu.sg_map<{mma_block_size = [8, 32], wi_layout = [2, 8], wi_data = [1, 4]}>> - // CHECK-SAME: -> vector<4x4xi8> - %2 = xegpu.load_nd %1 {vnni_axis = 1} : !xegpu.tensor_desc<8x32xi8, #sg_map_i8_a> -> vector<4x4xi8> + // CHECK-SAME: -> vector<4x1x4xi8> + %2 = xegpu.load_nd %1 {vnni_axis = 1} : !xegpu.tensor_desc<8x32xi8, #sg_map_i8_a> -> vector<4x1x4xi8> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xi8> // CHECK-SAME: -> !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map<{mma_block_size = [32, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> %3 = xegpu.create_nd_tdesc %B[%c0, %c1] @@ -85,11 +98,11 @@ func.func @test_load_nd_i8(%A: memref<64x64xi8>, %B : memref<64x64xi8>, %C : mem // CHECK: xegpu.load_nd // CHECK-SAME: {mode = simt, vnni_axis = 0} // CHECK-SAME: !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map<{mma_block_size = [32, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> - // CHECK-SAME: -> vector<8x4xi8> - %4 = xegpu.load_nd %3 {vnni_axis = 0} : !xegpu.tensor_desc<32x16xi8, #sg_map_i8_b> -> vector<8x4xi8> + // CHECK-SAME: -> vector<8x1x4xi8> + %4 = xegpu.load_nd %3 {vnni_axis = 0} : !xegpu.tensor_desc<32x16xi8, #sg_map_i8_b> -> vector<8x1x4xi8> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xi8> // CHECK-SAME: -> !xegpu.tensor_desc<8x16xi32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> %5 = xegpu.create_nd_tdesc %C[%c0, %c1] @@ -114,7 +127,7 @@ func.func @test_load_nd_f64(%A: memref<64x64xf64>, %B : memref<64x64xf64>, %C : %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xf64> // CHECK-SAME: -> !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> %1 = xegpu.create_nd_tdesc %A[%c0, %c1] @@ -127,7 +140,7 @@ func.func @test_load_nd_f64(%A: memref<64x64xf64>, %B : memref<64x64xf64>, %C : %2 = xegpu.load_nd %1 : !xegpu.tensor_desc<4x8xf64, #sg_map_f64_a> -> vector<2x1xf64> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xf64> // CHECK-SAME: -> !xegpu.tensor_desc<8x8xf64, #xegpu.sg_map<{mma_block_size = [8, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> %3 = xegpu.create_nd_tdesc %B[%c0, %c1] @@ -140,7 +153,7 @@ func.func @test_load_nd_f64(%A: memref<64x64xf64>, %B : memref<64x64xf64>, %C : %4 = xegpu.load_nd %3 : !xegpu.tensor_desc<8x8xf64, #sg_map_f64_b> -> vector<4x1xf64> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xf64> // CHECK-SAME: -> !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> %5 = xegpu.create_nd_tdesc %C[%c0, %c1] diff --git a/test/Dialect/XeGPU/IR/prefetch_nd.mlir b/test/Dialect/XeGPU/IR/prefetch_nd.mlir index 3604c91a6..402302610 100644 --- a/test/Dialect/XeGPU/IR/prefetch_nd.mlir +++ b/test/Dialect/XeGPU/IR/prefetch_nd.mlir @@ -19,7 +19,7 @@ func.func @test_prefetch_nd_tdesc_vc_0(%src: memref<24x32xf32>) { // CHECK-LABEL: func @test_prefetch_nd_tdesc_vc_1({{.*}}) { func.func @test_prefetch_nd_tdesc_vc_1(%src: memref<24x32xf16>, %x : index, %y : index) { // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%x, %y] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> diff --git a/test/Dialect/XeGPU/IR/store_nd.mlir b/test/Dialect/XeGPU/IR/store_nd.mlir index ceae5645c..47e714b6c 100644 --- a/test/Dialect/XeGPU/IR/store_nd.mlir +++ b/test/Dialect/XeGPU/IR/store_nd.mlir @@ -9,13 +9,13 @@ func.func @test_store_nd_vc_0(%src: memref<24x32xf16>, %dst: memref<24x32xf16>) %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %2 = xegpu.create_nd_tdesc %dst[%c0, %c1] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> diff --git a/test/Dialect/XeGPU/IR/store_scatter.mlir b/test/Dialect/XeGPU/IR/store_scatter.mlir index 6786692f7..19341dc74 100644 --- a/test/Dialect/XeGPU/IR/store_scatter.mlir +++ b/test/Dialect/XeGPU/IR/store_scatter.mlir @@ -9,13 +9,13 @@ func.func @test_store_scatter_vc(%src: ui64, %offsets : vector<16 x index>, %dst: ui64) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %2 = xegpu.create_tdesc %dst, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> @@ -38,13 +38,13 @@ func.func @test_store_scatter_vc(%src: ui64, %offsets : vector<16 x index>, %dst func.func @test_store_scatter(%src: ui64, %offsets : index, %dst: ui64) { %0 = arith.constant 1: i1 // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = simt, chunk_size_per_lane = 1} // CHECK-SAME: ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets : ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = simt, chunk_size_per_lane = 1} // CHECK-SAME: ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> %2 = xegpu.create_tdesc %dst, %offsets : ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> diff --git a/test/Dialect/XeGPU/IR/update_nd_offset.mlir b/test/Dialect/XeGPU/IR/update_nd_offset.mlir index 93403ea5f..4de5560e0 100644 --- a/test/Dialect/XeGPU/IR/update_nd_offset.mlir +++ b/test/Dialect/XeGPU/IR/update_nd_offset.mlir @@ -9,7 +9,7 @@ func.func @test_update_nd_offset_vc_0(%src: memref<24x32xf32>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, boundary_check = true} + // CHECK-SAME: {mode = vc, boundary_check = true} // CHECK-SAME: memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> diff --git a/test/Dialect/XeGPU/IR/update_offset.mlir b/test/Dialect/XeGPU/IR/update_offset.mlir index 539a72f48..416fd477a 100644 --- a/test/Dialect/XeGPU/IR/update_offset.mlir +++ b/test/Dialect/XeGPU/IR/update_offset.mlir @@ -9,7 +9,7 @@ func.func @test_update_offset_VC(%src: ui64, %offsets : vector<16 x index>) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, memory_scope = global, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> @@ -36,7 +36,7 @@ func.func @test_update_offset_VC(%src: ui64, %offsets : vector<16 x index>) { func.func @test_update_offset(%src: ui64, %offsets : index) { %0 = arith.constant dense<1>: vector<8xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = simt, memory_scope = global, chunk_size_per_lane = 8} + // CHECK-SAME: {mode = simt, chunk_size_per_lane = 8} // CHECK-SAME: ui64, index -> !xegpu.tensor_desc<8xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {chunk_size_per_lane = 8} : ui64, index -> !xegpu.tensor_desc<8xf32, #xegpu.scattered>