-
Notifications
You must be signed in to change notification settings - Fork 12k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AMDGPU: Add v_prng_b32 instruction for gfx950
Rand num instruction for stochastic rounding.
- Loading branch information
1 parent
54f5e02
commit d613fe2
Showing
16 changed files
with
252 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx906 -emit-llvm \ | ||
// RUN: -verify -o - %s | ||
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -emit-llvm \ | ||
// RUN: -verify -o - %s | ||
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 -emit-llvm \ | ||
// RUN: -verify -o - %s | ||
// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm \ | ||
// RUN: -verify -o - %s | ||
|
||
|
||
// REQUIRES: amdgpu-registered-target | ||
|
||
typedef unsigned int uint; | ||
void test_prng_b32(global uint* out, uint a) { | ||
*out = __builtin_amdgcn_prng_b32(a); // expected-error{{'__builtin_amdgcn_prng_b32' needs target feature prng-inst}} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | ||
// RUN: %clang_cc1 -cl-std=CL1.2 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -emit-llvm -o - %s | FileCheck %s | ||
// REQUIRES: amdgpu-registered-target | ||
|
||
typedef unsigned int uint; | ||
|
||
// CHECK-LABEL: @test_prng_b32( | ||
// CHECK-NEXT: entry: | ||
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) | ||
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5) | ||
// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr addrspace(5) [[OUT_ADDR]], align 8 | ||
// CHECK-NEXT: store i32 [[A:%.*]], ptr addrspace(5) [[A_ADDR]], align 4 | ||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4 | ||
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]]) | ||
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8 | ||
// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4 | ||
// CHECK-NEXT: ret void | ||
// | ||
void test_prng_b32(global uint* out, uint a) { | ||
*out = __builtin_amdgcn_prng_b32(a); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s | ||
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s | ||
|
||
declare i32 @llvm.amdgcn.prng.b32(i32) #0 | ||
|
||
; GCN-LABEL: {{^}}prng_b32: | ||
; GCN: v_prng_b32_e32 {{v[0-9]+}}, {{s[0-9]+}} | ||
define amdgpu_kernel void @prng_b32(ptr addrspace(1) %out, i32 %src) #1 { | ||
%prng = call i32 @llvm.amdgcn.prng.b32(i32 %src) #0 | ||
store i32 %prng, ptr addrspace(1) %out, align 4 | ||
ret void | ||
} | ||
|
||
; GCN-LABEL: {{^}}prng_b32_constant_4 | ||
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 4 | ||
define amdgpu_kernel void @prng_b32_constant_4(ptr addrspace(1) %out) #1 { | ||
%prng = call i32 @llvm.amdgcn.prng.b32(i32 4) #0 | ||
store i32 %prng, ptr addrspace(1) %out, align 4 | ||
ret void | ||
} | ||
|
||
; GCN-LABEL: {{^}}prng_b32_constant_100 | ||
; GCN: v_prng_b32_e32 {{v[0-9]+}}, 0x64 | ||
define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 { | ||
%prng = call i32 @llvm.amdgcn.prng.b32(i32 100) #0 | ||
store i32 %prng, ptr addrspace(1) %out, align 4 | ||
ret void | ||
} | ||
|
||
|
||
attributes #0 = { nounwind readnone } | ||
attributes #1 = { nounwind } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefix=GFX950 %s | ||
|
||
v_prng_b32 v5, v1 | ||
// GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, v255 | ||
// GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, s1 | ||
// GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, s101 | ||
// GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, vcc_lo | ||
// GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, vcc_hi | ||
// GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, ttmp15 | ||
// GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, m0 | ||
// GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, exec_lo | ||
// GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, exec_hi | ||
// GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, -1 | ||
// GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, 0.5 | ||
// GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v5, src_scc | ||
// GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU | ||
|
||
v_prng_b32 v255, 0xaf123456 | ||
// GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf] | ||
// GFX940-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -show-encoding %s | FileCheck --check-prefixes=GFX950 %s | ||
|
||
v_prng_b32 v5, v1 quad_perm:[3,2,1,0] | ||
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1b,0x00,0xff] | ||
|
||
v_prng_b32 v5, v1 quad_perm:[0,1,2,3] | ||
// GFX950: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0xe4,0x00,0xff] | ||
|
||
v_prng_b32 v5, v1 row_mirror | ||
// GFX950: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x40,0x01,0xff] | ||
|
||
v_prng_b32 v5, v1 row_half_mirror | ||
// GFX950: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x41,0x01,0xff] | ||
|
||
v_prng_b32 v5, v1 row_shl:1 | ||
// GFX950: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x01,0x01,0xff] | ||
|
||
v_prng_b32 v5, v1 row_shl:15 | ||
// GFX950: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x0f,0x01,0xff] | ||
|
||
v_prng_b32 v5, v1 row_shr:1 | ||
// GFX950: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x11,0x01,0xff] | ||
|
||
v_prng_b32 v5, v1 row_shr:15 | ||
// GFX950: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x1f,0x01,0xff] | ||
|
||
v_prng_b32 v5, v1 row_ror:1 | ||
// GFX950: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x21,0x01,0xff] | ||
|
||
v_prng_b32 v5, v1 row_ror:15 | ||
// GFX950: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xb0,0x0a,0x7e,0x01,0x2f,0x01,0xff] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx950 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX950 %s | ||
|
||
# GFX950: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0xb1,0x0a,0x7e] | ||
0x01,0xb1,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0xb1,0x0a,0x7e] | ||
0xff,0xb1,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0xb0,0x0a,0x7e] | ||
0x01,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, s101 ; encoding: [0x65,0xb0,0x0a,0x7e] | ||
0x65,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0xb0,0x0a,0x7e] | ||
0x6a,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0xb0,0x0a,0x7e] | ||
0x6b,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0xb0,0x0a,0x7e] | ||
0x7b,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, m0 ; encoding: [0x7c,0xb0,0x0a,0x7e] | ||
0x7c,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0xb0,0x0a,0x7e] | ||
0x7e,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0xb0,0x0a,0x7e] | ||
0x7f,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0xb0,0x0a,0x7e] | ||
0xc1,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0xb0,0x0a,0x7e] | ||
0xf0,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0xb0,0x0a,0x7e] | ||
0xfd,0xb0,0x0a,0x7e | ||
|
||
# GFX950: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf] | ||
0xff,0xb0,0xfe,0x7f,0x56,0x34,0x12,0xaf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters