Skip to content

Commit

Permalink
This PR provides the following fixes for XeGPU dialect per feedback:
Browse files Browse the repository at this point in the history
- Allow 32-bit addresses in XeGPUBaseAttr.
- Allow mma_blcok_size of sgMap to be optional (empty)
- Improve builders of CreateNdDescOp to check parameter errors
- Fix a bug in CreateNdDescOp builder.
- Move memory_scope from operators into TensorDesc
  • Loading branch information
chencha3 committed Nov 2, 2023
1 parent 80adaab commit 61fd1f5
Show file tree
Hide file tree
Showing 18 changed files with 373 additions and 255 deletions.
58 changes: 53 additions & 5 deletions include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,51 @@ def XeGPU_ScatteredAttr : XeGPUAttr<"Scattered", "scattered"> {
}

def XeGPU_SgMapAttr: XeGPUAttr<"SgMap", "sg_map"> {
let parameters = (ins
ArrayRefParameter<"unsigned">:$mmaBlockSize,
let parameters = (ins
ArrayRefParameter<"unsigned">:$wiLayout,
ArrayRefParameter<"unsigned">:$wiData);

ArrayRefParameter<"unsigned">:$wiData,
ArrayRefParameter<"unsigned">:$mmaBlockSize);

// In format of #xegpu.sg_map<{mma_block_size = [2, 4], wi_layout = [2, 4], wi_data = [2, 4]}>
let assemblyFormat = "`<` custom<SgMapAttrElements>($mmaBlockSize, $wiLayout, $wiData) `>`";

let extraClassDeclaration = [{
bool hasMMABlockSizeAttr() {
return getMmaBlockSize().size() == 2;
}
}];

let builders = [
AttrBuilder<(ins
"::llvm::ArrayRef<unsigned>":$wiLayout,
"::llvm::ArrayRef<unsigned>":$wiData,
CArg<"::llvm::ArrayRef<unsigned>", "{}">:$mmaBlockSize
), [{
assert(wiLayout.size() == 2 && wiData.size() == 2 && "wiLayout and wiData should be 2D arrays.\n");
assert((mmaBlockSize.size() == 2 || mmaBlockSize.size() == 0) && "mmaBlockSize can be either empty or a 2D array.\n");
return $_get($_ctxt, wiLayout, wiData, mmaBlockSize);
}]>
];

let skipDefaultBuilders = 1;
}

def XeGPU_WgMapAttr: XeGPUAttr<"WgMap", "wg_map"> {
let parameters = (ins
ArrayRefParameter<"unsigned">:$sgLayout,
ArrayRefParameter<"unsigned">:$sgData);

let builders = [
AttrBuilder<(ins
"::llvm::ArrayRef<unsigned>":$sgLayout,
"::llvm::ArrayRef<unsigned>":$sgData
), [{
assert(sgLayout.size() == 2 && sgData.size() == 2 && "sgLayout and sgData should be 2D arrays.\n");
return $_get($_ctxt, sgLayout, sgData);
}]>
];
let skipDefaultBuilders = 1;

// In format of #xegpu.wg_map<{sg_layout = [2, 4], sg_data = [2, 4]}>
let assemblyFormat = "`<` custom<WgMapAttrElements>($sgLayout, $sgData) `>`";
}
Expand All @@ -44,7 +75,24 @@ def XeGPU_XeMapAttr: XeGPUAttr<"XeMap", "xe_map"> {
XeGPU_WgMapAttr: $wg,
XeGPU_SgMapAttr: $sg);

// In format of #xegpu.xe_map<wg = {sg_layout = [2, 4], sg_data = [2, 4]}, sg = {sg_layout = [2, 4], sg_data = [2, 4]}>
let builders = [
AttrBuilder<(ins
"::llvm::ArrayRef<unsigned>":$sgLayout,
"::llvm::ArrayRef<unsigned>":$sgData,
"::llvm::ArrayRef<unsigned>":$wiLayout,
"::llvm::ArrayRef<unsigned>":$wiData,
CArg<"::llvm::ArrayRef<unsigned>", "{}">:$mmaBlockSize
), [{
assert(sgLayout.size() == 2 && sgData.size() == 2 && "sgLayout and sgData should be 2D arrays.\n");
assert(wiLayout.size() == 2 && wiData.size() == 2 && "wiLayout and wiData should be 2D arrays.\n");
assert((mmaBlockSize.size() == 2 || mmaBlockSize.size() == 0) && "mmaBlockSize can be either empty or a 2D array.\n");
auto wg = WgMapAttr::get($_ctxt, sgLayout, sgData);
auto sg = SgMapAttr::get($_ctxt, wiLayout, wiData, mmaBlockSize);
return $_get($_ctxt, wg, sg);
}]>
];

// In format of #xegpu.xe_map<wg = {sg_layout = [2, 4], sg_data = [2, 4]}, sg = {mma_block_size = [2, 4], sg_layout = [2, 4], sg_data = [2, 4]}>
let hasCustomAssemblyFormat = 1;
}

Expand Down
35 changes: 0 additions & 35 deletions include/imex/Dialect/XeGPU/IR/XeGPUOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,41 +46,6 @@ class TensorDescType;
} // namespace xegpu
} // namespace imex

namespace imex {
namespace xegpu {

class BaseTensorDescType : public mlir::Type,
public mlir::ShapedType::Trait<BaseTensorDescType> {
public:
using Type::Type;

/// Returns the element type of this tensor type.
mlir::Type getElementType() const;

/// Returns if this type is ranked, i.e. it has a known number of dimensions.
bool hasRank() const;

/// Returns the shape of this tensor type.
llvm::ArrayRef<int64_t> getShape() const;

/// Clone this type with the given shape and element type. If the
/// provided shape is `None`, the current shape of the type is used.
BaseTensorDescType cloneWith(std::optional<llvm::ArrayRef<int64_t>> shape,
mlir::Type elementType) const;

/// Return true if the specified element type is ok in a tensor.
static bool isValidElementType(Type type);

/// Methods for support type inquiry through isa, cast, and dyn_cast.
static bool classof(Type type);

/// Allow implicit conversion to ShapedType.
operator mlir::ShapedType() const { return cast<mlir::ShapedType>(); }
};

} // namespace xegpu
} // namespace imex

#include <imex/Dialect/XeGPU/IR/XeGPUOpsDialect.h.inc>
#include <imex/Dialect/XeGPU/IR/XeGPUOpsEnums.h.inc>
#define GET_ATTRDEF_CLASSES
Expand Down
95 changes: 79 additions & 16 deletions include/imex/Dialect/XeGPU/IR/XeGPUOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,7 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe
both shape and strides are required to to carry the respect information. Otherwise,
the operator is invalid.

The operation also supports two attributes:
* memory_scope (MemoryScopeAttr): indicates where the memory is located, "global" for global memory (default), and "slm" for shared memory.
The operation also supports the following attribute:
* boundary_check (BoolAttr): indicates whether the operation detects the boundary and pads with zero for out-of-boundary access (default)

Example 1 (suppose the tensor shape inferred by the compiler is 8x16):
Expand All @@ -82,7 +81,6 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe
Variadic<Index>: $shape,
Variadic<Index>: $strides,
DenseI64ArrayAttr: $static_offsets,
DefaultValuedAttr<XeGPU_MemoryScopeAttr, "xegpu::MemoryScope::GLOBAL">: $memory_scope,
DefaultValuedAttr<BoolAttr, "true">: $boundary_check,
DefaultValuedAttr<XeGPU_ModeAttr, "imex::xegpu::Mode::SIMT">: $mode);

Expand All @@ -95,23 +93,40 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe
let builders = [
OpBuilder<(ins "::mlir::Type": $TensorDesc, "::mlir::Value": $source, "::mlir::ValueRange": $offsets,
"::mlir::ValueRange": $shape, "::mlir::ValueRange": $strides, "::llvm::ArrayRef<int64_t>": $static_offsets,
CArg<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope,
CArg<"bool", "true">: $boundary_check, CArg<"::imex::xegpu::Mode", "imex::xegpu::Mode::SIMT">: $mode),
[{ $_state.addOperands(source);
[{
auto staticDims = std::count_if(static_offsets.begin(), static_offsets.end(),
[](int64_t d) { return !mlir::ShapedType::isDynamic(d); });
auto dynamicDims = std::count_if(static_offsets.begin(), static_offsets.end(),
[](int64_t d) { return mlir::ShapedType::isDynamic(d); });

auto dims = offsets.size() + staticDims;
assert((isStaticShapedMemRef(source) &&
dims == getRankOf(source) &&
shape.size() == 0 &&
strides.size() == 0
) ||
((!isMemRef(source) || dims == getRankOf(source)) &&
shape.size() != 0 &&
dims == shape.size() &&
shape.size() == strides.size()
)
);
assert(offsets.size() == dynamicDims);

$_state.addOperands(source);
$_state.addOperands(offsets);
$_state.addOperands(shape);
$_state.addOperands(strides);
$_state.addAttribute(getOperandSegmentSizesAttrName($_state.name), $_builder.getDenseI32ArrayAttr({1, static_cast<int32_t>(offsets.size()), static_cast<int32_t>(shape.size()), static_cast<int32_t>(strides.size())}));
$_state.addAttribute(getStaticOffsetsAttrName($_state.name), $_builder.getDenseI64ArrayAttr(static_offsets));
$_state.addAttribute(getMemoryScopeAttrName($_state.name), ::imex::xegpu::MemoryScopeAttr::get($_builder.getContext(), memory_scope));
$_state.addAttribute(getBoundaryCheckAttrName($_state.name), $_builder.getBoolAttr(boundary_check));
$_state.addAttribute(getBoundaryCheckAttrName($_state.name), ::imex::xegpu::ModeAttr::get($_builder.getContext(), mode));
$_state.addAttribute(getModeAttrName($_state.name), ::imex::xegpu::ModeAttr::get($_builder.getContext(), mode));
$_state.addTypes(TensorDesc); }]>,

OpBuilder<(ins "::mlir::Type": $tdesc, "::mlir::Value": $source, "::llvm::ArrayRef<mlir::OpFoldResult>": $offsets,
CArg<"::imex::xegpu::MemoryScope", "::imex::xegpu::MemoryScope::GLOBAL">:$memory_scope,
CArg<"bool", "true">:$boundary_check, CArg<"::imex::xegpu::Mode", "imex::xegpu::Mode::SIMT">: $mode),
[{ assert(offsets.size() == getRankOf(source));
[{ assert(isStaticShapedMemRef(source) && offsets.size() == getRankOf(source));
llvm::SmallVector<int64_t> staticOffsets;
llvm::SmallVector<mlir::Value> dynamicOffsets;
dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
Expand All @@ -120,16 +135,14 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe
::mlir::ValueRange({}) /* empty dynamic shape */,
::mlir::ValueRange({}) /* empty dynamic strides */,
staticOffsets /* static offsets */,
memory_scope,
boundary_check,
mode); }]>,


OpBuilder<(ins "::mlir::Type": $tdesc, "::mlir::Value": $source, "::llvm::ArrayRef<mlir::OpFoldResult>": $offsets,
"::mlir::ValueRange": $shape, "::mlir::ValueRange": $stride,
CArg<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">:$memory_scope,
CArg<"bool", "true">:$boundary_check, CArg<"::imex::xegpu::Mode", "imex::xegpu::Mode::SIMT">: $mode),
[{ assert((!isMemRef(source) || getRankOf(source) == offsets.size()) && shape.size() == stride.size() &&
[{ assert((!isMemRef(source) || getRankOf(source) == offsets.size()) && shape.size() != 0 && shape.size() == stride.size() &&
offsets.size() == shape.size() && isIntegerOrDynamicShapedMemref(source));

llvm::SmallVector<int64_t> staticOffsets;
Expand All @@ -141,7 +154,6 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe
shape /* empty dynamic shape */,
stride /* empty dynamic strides */,
staticOffsets /* static offsets */,
memory_scope,
boundary_check,
mode); }]>
];
Expand Down Expand Up @@ -172,6 +184,59 @@ def XeGPU_CreateNdDescOp : XeGPU_Op<"create_nd_tdesc", [Pure, AttrSizedOperandSe
assert(0 && "Unreachable");
}

void getOffsets(llvm::SmallVectorImpl<mlir::OpFoldResult> &offsets) {
auto dynamicOffsets = getOffsets(); //dynamic offsets
auto staticOffsets = getStaticOffsets();

if (staticOffsets.size() == 0) {
offsets.assign(dynamicOffsets.begin(), dynamicOffsets.end());
return;
}

for (size_t i = 0, j = 0; i < staticOffsets.size(); i++) {
if (mlir::ShapedType::isDynamic(staticOffsets[i])) {
assert(j < dynamicOffsets.size());
offsets.push_back(dynamicOffsets[j++]);
} else {
auto attr = mlir::IntegerAttr::get(mlir::IndexType::get(getContext()), staticOffsets[i]);
offsets.push_back(attr);
}
}
}

void getShape(llvm::SmallVectorImpl<mlir::OpFoldResult> &shape) {
if (isIntegerOrDynamicShapedMemref(getSource())) {
shape.append(getShape().begin(), getShape().end());
} else {
for (auto dim: getSourceType().cast<::mlir::MemRefType>().getShape()) {
auto attr = mlir::IntegerAttr::get(mlir::IndexType::get(getContext()), dim);
shape.push_back(attr);
}
}
}

void getStrides(llvm::SmallVectorImpl<mlir::OpFoldResult> &strides) {
if (isIntegerOrDynamicShapedMemref(getSource())) {
strides.append(getStrides().begin(), getStrides().end());
} else {
auto [staticStrides, offset] = mlir::getStridesAndOffset(getSourceType().cast<mlir::MemRefType>());
for (auto dim: staticStrides) {
auto attr = mlir::IntegerAttr::get(mlir::IndexType::get(getContext()), dim);
strides.push_back(attr);
}
}
}

size_t getNumStaticOffsets() {
return std::count_if(getStaticOffsets().begin(), getStaticOffsets().end(),
[](int64_t dSize) { return !mlir::ShapedType::isDynamic(dSize); });
}

size_t getNumDynamicOffsets() {
return std::count_if(getStaticOffsets().begin(), getStaticOffsets().end(),
[](int64_t dSize) { return mlir::ShapedType::isDynamic(dSize); });
}

size_t getOffsetsRank() {
return getOffsets().size() + std::count_if(getStaticOffsets().begin(), getStaticOffsets().end(),
[](int64_t dSize) { return !mlir::ShapedType::isDynamic(dSize); });
Expand Down Expand Up @@ -259,7 +324,6 @@ def XeGPU_CreateDescOp
supportted group size, e.g., vector<16xindex>. And each element in the vector corresponds to a
work item (SIMT lane) in the subgroup.
In SIMT mode (default), it is an index scalar representing the offset of the access point.
* memory_scope: [optional attribute] indicates where the memory is located, "global" for global memory (default), and "slm" for shared memory.
* chunk_size_per_lane: [optional attribute] indicates number of continious elements accessed for each offset, default is 1.

Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64]
Expand All @@ -283,7 +347,6 @@ def XeGPU_CreateDescOp

let arguments = (ins XeGPU_BaseAddrType: $source,
XeGPU_OffsetType: $offsets,
DefaultValuedAttr<XeGPU_MemoryScopeAttr, "imex::xegpu::MemoryScope::GLOBAL">: $memory_scope,
DefaultValuedAttr<I32Attr, "1">: $chunk_size_per_lane,
DefaultValuedAttr<XeGPU_ModeAttr, "imex::xegpu::Mode::SIMT">: $mode);

Expand All @@ -303,7 +366,7 @@ def XeGPU_CreateDescOp

}];

// Format: xegpu.create_tdesc %src, %offsets {mode=simt, memory_scope=slm, chunk_size_per_lane=1}
// Format: xegpu.create_tdesc %src, %offsets {mode=simt, chunk_size_per_lane=1}
// : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered>
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
Expand Down
14 changes: 6 additions & 8 deletions include/imex/Dialect/XeGPU/IR/XeGPUTypes.td
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,15 @@ include "imex/Dialect/XeGPU/IR/XeGPUDialect.td"
// An Integer array attribute with fixed 2 elements.
def XeGPU_IntArrayAttr2: ConfinedAttr<DenseI64ArrayAttr, [DenseArrayCount<2>]>;
def XeGPU_IntType: AnyTypeOf<[I1, I8, I16, I32, I64, SI1, SI8, SI16, SI32, SI64, UI1, UI8, UI16, UI32, UI64]>;
def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, F8E4M3FN, F8E5M2, F8E4M3FNUZ, F8E4M3B11FNUZ, F8E5M2FNUZ]>;
def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>;
def XeGPU_ScalarType: AnyTypeOf<[XeGPU_IntType, XeGPU_FloatType]>;
def XeGPU_BaseAddrType: AnyTypeOf<[MemRefRankOf<[XeGPU_ScalarType], [1, 2]>, UI64]>;
def XeGPU_BaseAddrType: AnyTypeOf<[MemRefRankOf<[XeGPU_ScalarType], [1, 2]>, UI64, UI32, I64, I32]>;
def XeGPU_DpasOpType: VectorOfRankAndType<[2, 3], [XeGPU_ScalarType]>;
def XeGPU_OffsetType: AnyTypeOf<[VectorOfRankAndType<[1], [Index]>, Index]>;
def XeGPU_MaskType: AnyTypeOf<[VectorOfRankAndType<[1,2], [I1]>, I1]>;
def XeGPU_ValueType: AnyTypeOf<[VectorOfRankAndType<[1,2,3], [XeGPU_ScalarType]>, XeGPU_ScalarType]>;

// def XeGPU_VectorType: VectorOfRankAndType<[1,2,3], [XeGPU_ScalarType]>;
// def XeGPU_Vector3DType: VectorOfRankAndType<[3], [XeGPU_ScalarType]>;
def XeGPU_Vector2DType: VectorOfRankAndType<[2], [XeGPU_ScalarType]>;
// def XeGPU_Vector1DType: VectorOfRankAndType<[1], [XeGPU_ScalarType]>;

// common base class for types in XeGPU dialect
class XeGPUTypeDef<string name, string typeMnemonic,
Expand Down Expand Up @@ -72,15 +69,17 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",

let parameters = (ins ArrayRefParameter<"int64_t">:$shape,
"::mlir::Type":$elementType,
DefaultValuedParameter<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope,
OptionalParameter<"::mlir::Attribute"> :$encoding);

let builders = [
TypeBuilderWithInferredContext<(ins
"::llvm::ArrayRef<int64_t>":$shape,
"::mlir::Type":$elementType,
CArg<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope,
CArg<"::mlir::Attribute", "{}">:$encoding
), [{
return $_get(elementType.getContext(), shape, elementType, encoding);
return $_get(elementType.getContext(), shape, elementType, memory_scope, encoding);
}]>
];

Expand All @@ -100,8 +99,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc",
}
}];

// let assemblyFormat = "`<` custom<ShapeAndType>($shape, $elementType) (`,` custom<TensorDescAttr>($encoding)^)? `>`";
let assemblyFormat = "`<` custom<ShapeAndType>($shape, $elementType) (`,` $encoding^)? `>`";
let assemblyFormat = "`<` custom<ShapeAndType>($shape, $elementType)``custom<TensorDescAttr>($memory_scope, $encoding)`>`";
}

#endif // _XEGPU_TYPES_TD_INCLUDED_
3 changes: 2 additions & 1 deletion include/imex/Utils/XeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ template <typename T> static std::string makeString(T array) {
os << "[";
for (auto i = 1; i < array.size(); i++)
os << array[i - 1] << ", ";
os << array[array.size() - 1] << "]";
if (array.size()) os << array[array.size() - 1] ;
os << "]";
os.flush();
return buf;
}
Expand Down
Loading

0 comments on commit 61fd1f5

Please sign in to comment.