-
Notifications
You must be signed in to change notification settings - Fork 15k
[AMDGPU] Add support for v_prng_b32
on gfx1250
#149450
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-clang Author: Shilei Tian (shiltian) ChangesCo-authored-by: Mekhanoshin, Stanislav <[email protected]> Patch is 48.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149450.diff 22 Files Affected:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index a9ea17642d6ad..52ccf254fe9ce 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -42,6 +42,24 @@ void test_s_wait_tensorcnt() {
__builtin_amdgcn_s_wait_tensorcnt(0);
}
+// CHECK-LABEL: @test_prng_b32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT: ret void
+//
+void test_prng_b32(global uint* out, uint a) {
+ *out = __builtin_amdgcn_prng_b32(a);
+}
+
// CHECK-LABEL: @test_tanh_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3d040fb705a8d..84e4fa1dc84aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4007,7 +4007,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_rsq_clamp:
- case Intrinsic::amdgcn_tanh: {
+ case Intrinsic::amdgcn_tanh:
+ case Intrinsic::amdgcn_prng_b32: {
// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(1);
return Src.isUndef() ? Src : SDValue();
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1bbbb610305e9..3ee90857b34b8 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1148,6 +1148,7 @@ defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
+defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
index 2faf375a97a86..e3cfbd41b923d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
@@ -1,5 +1,7 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.prng.b32(i32) #0
@@ -27,6 +29,13 @@ define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 {
ret void
}
+; GCN-LABEL: {{^}}prng_undef_i32:
+; SDAG-NOT: v_prng_b32
+define amdgpu_kernel void @prng_undef_i32(ptr addrspace(1) %out) #1 {
+ %prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
+ store i32 %prng, ptr addrspace(1) %out, align 4
+ ret void
+}
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
\ No newline at end of file
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 279bb262bff04..5f310a9954ad0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -163,6 +163,51 @@ v_tanh_bf16 v5, src_scc
v_tanh_bf16 v127, 0x8000
// GFX1250: v_tanh_bf16_e32 v127, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0 ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
v_rcp_bf16 v5, v1
// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 76272d25d92d4..aa2e028f661e1 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -169,6 +169,51 @@ v_tanh_bf16 v127, 0x8000
v_tanh_bf16 v5.h, v1.h
// GFX1250: v_tanh_bf16_e32 v5.h, v1.h ; encoding: [0x81,0x95,0x0a,0x7f]
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0 ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
v_rcp_bf16 v5, v1
// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 0a8ee84561d33..e1cd2e3043693 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -170,6 +170,58 @@ v_tanh_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 f
// GFX1250: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index d4afb9d9b2d9a..c1d3238b65cbd 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -178,6 +178,58 @@ v_tanh_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7f,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index a7cb6bf8de69c..100e9f92ff58b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -38,6 +38,18 @@ v_tanh_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 6acab7edc0d49..2ae103545443c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -46,6 +46,18 @@ v_tanh_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 7486d849253e8..9c6a9127d82e4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -127,6 +127,42 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp
v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp
// GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00]
+v_prng_b32_e64 v5, v1
+// GFX1250: v_prng_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xcb,0xd5,0x01,0x01,0x00,0x00]
+
+v_prng_b32_e64 v5, v255
+// GFX1250: v_prng_b32_e64 v5, v255 ; encoding: [0x...
[truncated]
|
@llvm/pr-subscribers-mc Author: Shilei Tian (shiltian) ChangesCo-authored-by: Mekhanoshin, Stanislav <[email protected]> Patch is 48.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149450.diff 22 Files Affected:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index a9ea17642d6ad..52ccf254fe9ce 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -42,6 +42,24 @@ void test_s_wait_tensorcnt() {
__builtin_amdgcn_s_wait_tensorcnt(0);
}
+// CHECK-LABEL: @test_prng_b32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT: ret void
+//
+void test_prng_b32(global uint* out, uint a) {
+ *out = __builtin_amdgcn_prng_b32(a);
+}
+
// CHECK-LABEL: @test_tanh_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3d040fb705a8d..84e4fa1dc84aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4007,7 +4007,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_rsq_clamp:
- case Intrinsic::amdgcn_tanh: {
+ case Intrinsic::amdgcn_tanh:
+ case Intrinsic::amdgcn_prng_b32: {
// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(1);
return Src.isUndef() ? Src : SDValue();
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1bbbb610305e9..3ee90857b34b8 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1148,6 +1148,7 @@ defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
+defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
defm V_CVT_F32_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
index 2faf375a97a86..e3cfbd41b923d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
@@ -1,5 +1,7 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.prng.b32(i32) #0
@@ -27,6 +29,13 @@ define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 {
ret void
}
+; GCN-LABEL: {{^}}prng_undef_i32:
+; SDAG-NOT: v_prng_b32
+define amdgpu_kernel void @prng_undef_i32(ptr addrspace(1) %out) #1 {
+ %prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
+ store i32 %prng, ptr addrspace(1) %out, align 4
+ ret void
+}
attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
\ No newline at end of file
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 279bb262bff04..5f310a9954ad0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -163,6 +163,51 @@ v_tanh_bf16 v5, src_scc
v_tanh_bf16 v127, 0x8000
// GFX1250: v_tanh_bf16_e32 v127, 0x8000 ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0 ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
v_rcp_bf16 v5, v1
// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 76272d25d92d4..aa2e028f661e1 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -169,6 +169,51 @@ v_tanh_bf16 v127, 0x8000
v_tanh_bf16 v5.h, v1.h
// GFX1250: v_tanh_bf16_e32 v5.h, v1.h ; encoding: [0x81,0x95,0x0a,0x7f]
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1 ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1 ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15 ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0 ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1 ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5 ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456 ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
v_rcp_bf16 v5, v1
// GFX1250: v_rcp_bf16_e32 v5, v1 ; encoding: [0x01,0xf3,0x0a,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 0a8ee84561d33..e1cd2e3043693 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -170,6 +170,58 @@ v_tanh_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 f
// GFX1250: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index d4afb9d9b2d9a..c1d3238b65cbd 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -178,6 +178,58 @@ v_tanh_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7f,0x81,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index a7cb6bf8de69c..100e9f92ff58b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -38,6 +38,18 @@ v_tanh_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX1250: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 6acab7edc0d49..2ae103545443c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -46,6 +46,18 @@ v_tanh_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_tanh_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
// GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 7486d849253e8..9c6a9127d82e4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -127,6 +127,42 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp
v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp
// GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00]
+v_prng_b32_e64 v5, v1
+// GFX1250: v_prng_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xcb,0xd5,0x01,0x01,0x00,0x00]
+
+v_prng_b32_e64 v5, v255
+// GFX1250: v_prng_b32_e64 v5, v255 ; encoding: [0x...
[truncated]
|
✅ With the latest revision this PR passed the undef deprecator. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. But has the builtin definition already been added in clang?
Yes, when we did gfx950 support. This is not really a new instruction in gfx1250. |
8bb64e1
to
487881c
Compare
@@ -4007,7 +4007,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( | |||
case Intrinsic::amdgcn_rcp_legacy: | |||
case Intrinsic::amdgcn_rsq_legacy: | |||
case Intrinsic::amdgcn_rsq_clamp: | |||
case Intrinsic::amdgcn_tanh: { | |||
case Intrinsic::amdgcn_tanh: | |||
case Intrinsic::amdgcn_prng_b32: { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is unrelated to base support, the comment is unrelated, and I'm not sure it's safe to fold this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is related to the changes in llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
And based on the LFSR algorithm, it looks safe to me to fold this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed it. Will do it in a follow up.
Co-authored-by: Mekhanoshin, Stanislav <[email protected]>
487881c
to
e35cd55
Compare
Co-authored-by: Mekhanoshin, Stanislav [email protected]