Skip to content

Commit

Permalink
AMDGPU: Add v_prng_b32 instruction for gfx950
Browse files Browse the repository at this point in the history
Rand num instruction for stochastic rounding.
  • Loading branch information
pravinjagtap authored and arsenm committed Nov 15, 2024
1 parent 54f5e02 commit d613fe2
Show file tree
Hide file tree
Showing 16 changed files with 252 additions and 3 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -522,5 +522,7 @@ TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64, "V4fiV2iV4fs",
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")
TARGET_BUILTIN(__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64, "V4fiV2iV4fs", "nc", "gfx12-insts,wavefrontsize64")

TARGET_BUILTIN(__builtin_amdgcn_prng_b32, "UiUi", "nc", "prng-inst")

#undef BUILTIN
#undef TARGET_BUILTIN
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/amdgpu-features.cl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
Expand Down
16 changes: 16 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -emit-llvm \
// RUN: -verify -o - %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -emit-llvm \
// RUN: -verify -o - %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 -emit-llvm \
// RUN: -verify -o - %s
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm \
// RUN: -verify -o - %s


// REQUIRES: amdgpu-registered-target

typedef unsigned int uint;
void test_prng_b32(global uint* out, uint a) {
*out = __builtin_amdgcn_prng_b32(a); // expected-error{{'__builtin_amdgcn_prng_b32' needs target feature prng-inst}}
}
21 changes: 21 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s
// REQUIRES: amdgpu-registered-target

typedef unsigned int uint;

// CHECK-LABEL: @test_prng_b32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
// CHECK-NEXT: ret void
//
void test_prng_b32(global uint* out, uint a) {
*out = __builtin_amdgcn_prng_b32(a);
}
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,10 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;

def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]
>, ClangBuiltin<"__builtin_amdgcn_prng_b32">;

} // TargetPrefix = "amdgcn"

// New-style image intrinsics
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -978,6 +978,12 @@ def FeatureVmemWriteVgprInOrder : SubtargetFeature<"vmem-write-vgpr-in-order",
"VMEM instructions of the same type write VGPR results in order"
>;

def FeaturePrngInst : SubtargetFeature<"prng-inst",
"HasPrngInst",
"true",
"Has v_prng_b32 instruction"
>;

//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
Expand Down Expand Up @@ -1495,7 +1501,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureFP8ConversionInsts,
FeatureCvtFP8VOP1Bug,
FeatureGFX950Insts,
FeatureAddressableLocalMemorySize163840
FeatureAddressableLocalMemorySize163840,
FeaturePrngInst
])>;

def FeatureISAVersion9_4_0 : FeatureSet<
Expand Down Expand Up @@ -2344,6 +2351,9 @@ def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">,
def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">,
AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>;

def HasPrngInst : Predicate<"Subtarget->hasPrngInst()">,
AssemblerPredicate<(all_of FeaturePrngInst)>;

def HasGDS : Predicate<"Subtarget->hasGDS()">;

def HasGWS : Predicate<"Subtarget->hasGWS()">;
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

break;
}
case Intrinsic::amdgcn_prng_b32: {
auto *Src = II.getArgOperand(0);
if (isa<UndefValue>(Src)) {
return IC.replaceInstUsesWith(II, Src);
}
}
}
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4515,6 +4515,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_u8_f32:
case Intrinsic::amdgcn_alignbyte:
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_prng_b32:
case Intrinsic::amdgcn_fdot2:
case Intrinsic::amdgcn_sdot2:
case Intrinsic::amdgcn_udot2:
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasSALUFloatInsts = false;
bool HasPseudoScalarTrans = false;
bool HasRestrictedSOffset = false;

bool HasPrngInst = false;
bool HasVcmpxPermlaneHazard = false;
bool HasVMEMtoScalarWriteHazard = false;
bool HasSMEMtoVectorWriteHazard = false;
Expand Down Expand Up @@ -1321,6 +1321,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// instruction.
unsigned maxHardClauseLength() const { return MaxHardClauseLength; }

bool hasPrngInst() const { return HasPrngInst; }

/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,9 @@ let SubtargetPredicate = isGFX11Plus in {
defm V_CVT_U32_U16 : VOP1Inst_t16<"v_cvt_u32_u16", VOP_I32_I16>;
} // End SubtargetPredicate = isGFX11Plus

let SubtargetPredicate = HasPrngInst in
defm V_PRNG_B32 : VOP1Inst <"v_prng_b32", VOP_I32_I32, int_amdgcn_prng_b32>;

foreach vt = Reg32Types.types in {
def : GCNPat<(int_amdgcn_permlane64 (vt VRegSrc_32:$src0)),
(vt (V_PERMLANE64_B32 (vt VRegSrc_32:$src0)))
Expand Down Expand Up @@ -1516,6 +1519,8 @@ defm V_CVT_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x55>;
defm V_CVT_PK_F32_FP8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x56>;
defm V_CVT_PK_F32_BF8 : VOP1_Real_NoDstSel_SDWA_gfx9<0x57>;

defm V_PRNG_B32 : VOP1_Real_gfx9 <0x58>;

class MovDPP8Pattern<Predicate Pred, Instruction Inst, ValueType vt> : GCNPat <
(vt (int_amdgcn_mov_dpp8 vt:$src, timm:$dpp8)),
(Inst VGPR_32:$src, VGPR_32:$src, (as_i32timm $dpp8), (i32 DPP8Mode.FI_0))> {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/TargetParser/TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gws"] = true;
break;
case GK_GFX950:
Features["prng-inst"] = true;
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

declare i32 @llvm.amdgcn.prng.b32(i32) #0

; GCN-LABEL: {{^}}prng_b32:
; GCN: v_prng_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @prng_b32(ptr addrspace(1) %out, i32 %src) #1 {
%prng = call i32 @llvm.amdgcn.prng.b32(i32 %src) #0
store i32 %prng, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}prng_b32_constant_4
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 4
define amdgpu_kernel void @prng_b32_constant_4(ptr addrspace(1) %out) #1 {
%prng = call i32 @llvm.amdgcn.prng.b32(i32 4) #0
store i32 %prng, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}prng_b32_constant_100
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 0x64
define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 {
%prng = call i32 @llvm.amdgcn.prng.b32(i32 100) #0
store i32 %prng, ptr addrspace(1) %out, align 4
ret void
}


attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
57 changes: 57 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_asm_vop1.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s

v_prng_b32 v5, v1
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, v255
// GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, s1
// GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, s101
// GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, vcc_lo
// GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, vcc_hi
// GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, ttmp15
// GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, m0
// GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, exec_lo
// GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, exec_hi
// GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, -1
// GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, 0.5
// GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v5, src_scc
// GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU

v_prng_b32 v255, 0xaf123456
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
31 changes: 31 additions & 0 deletions llvm/test/MC/AMDGPU/gfx950_asm_vop1_dpp16.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefixes=GFX950 %s

v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff]

v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0xe4,0x00,0xff]

v_prng_b32 v5, v1 row_mirror
// GFX950: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x40,0x01,0xff]

v_prng_b32 v5, v1 row_half_mirror
// GFX950: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x41,0x01,0xff]

v_prng_b32 v5, v1 row_shl:1
// GFX950: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x01,0x01,0xff]

v_prng_b32 v5, v1 row_shl:15
// GFX950: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x0f,0x01,0xff]

v_prng_b32 v5, v1 row_shr:1
// GFX950: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x11,0x01,0xff]

v_prng_b32 v5, v1 row_shr:15
// GFX950: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1f,0x01,0xff]

v_prng_b32 v5, v1 row_ror:1
// GFX950: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x21,0x01,0xff]

v_prng_b32 v5, v1 row_ror:15
// GFX950: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x2f,0x01,0xff]
43 changes: 43 additions & 0 deletions llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX950 %s

# GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e]
0x01,0xb1,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e]
0xff,0xb1,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e]
0x01,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e]
0x65,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e]
0x6a,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e]
0x6b,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e]
0x7b,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e]
0x7c,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e]
0x7e,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e]
0x7f,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e]
0xc1,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e]
0xf0,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e]
0xfd,0xb0,0x0a,0x7e

# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf]
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf
18 changes: 18 additions & 0 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6547,3 +6547,21 @@ define half @test_constant_fold_exp2_f16_neg_denorm() {
%val = call half @llvm.amdgcn.exp2.f16(half 0xH83ff)
ret half %val
}

; --------------------------------------------------------------------
; llvm.amdgcn.prng
; --------------------------------------------------------------------
declare i32 @llvm.amdgcn.prng.b32(i32)
define i32 @prng_undef_i32() {
; CHECK-LABEL: @prng_undef_i32(
; CHECK-NEXT: ret i32 undef
%prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
ret i32 %prng
}

define i32 @prng_poison_i32() {
; CHECK-LABEL: @prng_poison_i32(
; CHECK-NEXT: ret i32 poison
%prng = call i32 @llvm.amdgcn.prng.b32(i32 poison)
ret i32 %prng
}

0 comments on commit d613fe2

Please sign in to comment.