[AMDGPU] Add support for `v_prng_b32` on gfx1250 #149450

shiltian · 2025-07-18T04:42:47Z

Co-authored-by: Mekhanoshin, Stanislav [email protected]

shiltian · 2025-07-18T04:43:06Z

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-07-18T04:43:15Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-clang

Author: Shilei Tian (shiltian)

Changes

Co-authored-by: Mekhanoshin, Stanislav <[email protected]>

Patch is 48.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149450.diff

22 Files Affected:

(modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+18)
(modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+2-1)
(modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+1)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll (+10-1)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+45)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+45)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s (+52)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s (+52)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s (+12)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s (+12)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s (+36)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s (+36)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s (+44)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s (+44)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s (+4)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s (+4)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+45)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+39)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+9)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt (+36)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt (+33)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt (+5-2)

diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index a9ea17642d6ad..52ccf254fe9ce 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -42,6 +42,24 @@ void test_s_wait_tensorcnt() {
   __builtin_amdgcn_s_wait_tensorcnt(0);
 }
 
+// CHECK-LABEL: @test_prng_b32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_prng_b32(global uint* out, uint a) {
+  *out = __builtin_amdgcn_prng_b32(a);
+}
+
 // CHECK-LABEL: @test_tanh_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3d040fb705a8d..84e4fa1dc84aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4007,7 +4007,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
   case Intrinsic::amdgcn_rcp_legacy:
   case Intrinsic::amdgcn_rsq_legacy:
   case Intrinsic::amdgcn_rsq_clamp:
-  case Intrinsic::amdgcn_tanh: {
+  case Intrinsic::amdgcn_tanh:
+  case Intrinsic::amdgcn_prng_b32: {
     // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
     SDValue Src = N->getOperand(1);
     return Src.isUndef() ? Src : SDValue();
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1bbbb610305e9..3ee90857b34b8 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1148,6 +1148,7 @@ defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
 defm V_TANH_F32              : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
 defm V_TANH_F16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
 defm V_TANH_BF16             : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
+defm V_PRNG_B32              : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
 defm V_CVT_F32_BF16          : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
 defm V_CVT_PK_F16_FP8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
 defm V_CVT_PK_F16_BF8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
index 2faf375a97a86..e3cfbd41b923d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
 
 declare i32 @llvm.amdgcn.prng.b32(i32) #0
 
@@ -27,6 +29,13 @@ define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 {
   ret void
 }
 
+; GCN-LABEL: {{^}}prng_undef_i32:
+; SDAG-NOT: v_prng_b32
+define amdgpu_kernel void @prng_undef_i32(ptr addrspace(1) %out) #1 {
+  %prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
+  store i32 %prng, ptr addrspace(1) %out, align 4
+  ret void
+}
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
\ No newline at end of file
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 279bb262bff04..5f310a9954ad0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -163,6 +163,51 @@ v_tanh_bf16 v5, src_scc
 v_tanh_bf16 v127, 0x8000
 // GFX1250: v_tanh_bf16_e32 v127, 0x8000            ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
 
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1                   ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255                 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1                   ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105                 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo               ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi               ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15               ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0                   ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo              ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi              ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null                 ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1                   ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5                  ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc              ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456         ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
 v_rcp_bf16 v5, v1
 // GFX1250: v_rcp_bf16_e32 v5, v1                   ; encoding: [0x01,0xf3,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 76272d25d92d4..aa2e028f661e1 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -169,6 +169,51 @@ v_tanh_bf16 v127, 0x8000
 v_tanh_bf16 v5.h, v1.h
 // GFX1250: v_tanh_bf16_e32 v5.h, v1.h              ; encoding: [0x81,0x95,0x0a,0x7f]
 
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1                   ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255                 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1                   ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105                 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo               ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi               ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15               ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0                   ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo              ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi              ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null                 ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1                   ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5                  ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc              ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456         ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
 v_rcp_bf16 v5, v1
 // GFX1250: v_rcp_bf16_e32 v5, v1                   ; encoding: [0x01,0xf3,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 0a8ee84561d33..e1cd2e3043693 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -170,6 +170,58 @@ v_tanh_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 f
 // GFX1250: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index d4afb9d9b2d9a..c1d3238b65cbd 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -178,6 +178,58 @@ v_tanh_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
 // GFX1250: v_tanh_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7f,0x81,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index a7cb6bf8de69c..100e9f92ff58b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -38,6 +38,18 @@ v_tanh_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX1250: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 6acab7edc0d49..2ae103545443c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -46,6 +46,18 @@ v_tanh_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_tanh_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 7486d849253e8..9c6a9127d82e4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -127,6 +127,42 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp
 v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp
 // GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00]
 
+v_prng_b32_e64 v5, v1
+// GFX1250: v_prng_b32_e64 v5, v1                   ; encoding: [0x05,0x00,0xcb,0xd5,0x01,0x01,0x00,0x00]
+
+v_prng_b32_e64 v5, v255
+// GFX1250: v_prng_b32_e64 v5, v255                 ; encoding: [0x...
[truncated]

llvmbot · 2025-07-18T04:43:15Z

@llvm/pr-subscribers-mc

Author: Shilei Tian (shiltian)

Changes

Co-authored-by: Mekhanoshin, Stanislav <[email protected]>

Patch is 48.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149450.diff

22 Files Affected:

(modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+18)
(modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+2-1)
(modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+1)
(modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll (+10-1)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+45)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+45)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s (+52)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s (+52)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s (+12)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s (+12)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s (+36)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s (+36)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s (+44)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s (+44)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s (+4)
(modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s (+4)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+45)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+39)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+9)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt (+36)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt (+33)
(modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt (+5-2)

diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index a9ea17642d6ad..52ccf254fe9ce 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -42,6 +42,24 @@ void test_s_wait_tensorcnt() {
   __builtin_amdgcn_s_wait_tensorcnt(0);
 }
 
+// CHECK-LABEL: @test_prng_b32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.prng.b32(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP1]], ptr addrspace(1) [[TMP2]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_prng_b32(global uint* out, uint a) {
+  *out = __builtin_amdgcn_prng_b32(a);
+}
+
 // CHECK-LABEL: @test_tanh_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3d040fb705a8d..84e4fa1dc84aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4007,7 +4007,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
   case Intrinsic::amdgcn_rcp_legacy:
   case Intrinsic::amdgcn_rsq_legacy:
   case Intrinsic::amdgcn_rsq_clamp:
-  case Intrinsic::amdgcn_tanh: {
+  case Intrinsic::amdgcn_tanh:
+  case Intrinsic::amdgcn_prng_b32: {
     // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
     SDValue Src = N->getOperand(1);
     return Src.isUndef() ? Src : SDValue();
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1bbbb610305e9..3ee90857b34b8 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1148,6 +1148,7 @@ defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
 defm V_TANH_F32              : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
 defm V_TANH_F16              : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
 defm V_TANH_BF16             : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
+defm V_PRNG_B32              : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
 defm V_CVT_F32_BF16          : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16", "V_CVT_F32_BF16_gfx1250">;
 defm V_CVT_PK_F16_FP8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
 defm V_CVT_PK_F16_BF8        : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
index 2faf375a97a86..e3cfbd41b923d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,SDAG %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s
 
 declare i32 @llvm.amdgcn.prng.b32(i32) #0
 
@@ -27,6 +29,13 @@ define amdgpu_kernel void @prng_b32_constant_100(ptr addrspace(1) %out) #1 {
   ret void
 }
 
+; GCN-LABEL: {{^}}prng_undef_i32:
+; SDAG-NOT: v_prng_b32
+define amdgpu_kernel void @prng_undef_i32(ptr addrspace(1) %out) #1 {
+  %prng = call i32 @llvm.amdgcn.prng.b32(i32 undef)
+  store i32 %prng, ptr addrspace(1) %out, align 4
+  ret void
+}
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
\ No newline at end of file
+attributes #1 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
index 279bb262bff04..5f310a9954ad0 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s
@@ -163,6 +163,51 @@ v_tanh_bf16 v5, src_scc
 v_tanh_bf16 v127, 0x8000
 // GFX1250: v_tanh_bf16_e32 v127, 0x8000            ; encoding: [0xff,0x94,0xfe,0x7e,0x00,0x80,0x00,0x00]
 
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1                   ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255                 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1                   ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105                 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo               ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi               ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15               ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0                   ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo              ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi              ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null                 ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1                   ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5                  ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc              ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456         ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
 v_rcp_bf16 v5, v1
 // GFX1250: v_rcp_bf16_e32 v5, v1                   ; encoding: [0x01,0xf3,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 76272d25d92d4..aa2e028f661e1 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -169,6 +169,51 @@ v_tanh_bf16 v127, 0x8000
 v_tanh_bf16 v5.h, v1.h
 // GFX1250: v_tanh_bf16_e32 v5.h, v1.h              ; encoding: [0x81,0x95,0x0a,0x7f]
 
+v_prng_b32 v5, v1
+// GFX1250: v_prng_b32_e32 v5, v1                   ; encoding: [0x01,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, v255
+// GFX1250: v_prng_b32_e32 v5, v255                 ; encoding: [0xff,0x97,0x0a,0x7e]
+
+v_prng_b32 v5, s1
+// GFX1250: v_prng_b32_e32 v5, s1                   ; encoding: [0x01,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, s105
+// GFX1250: v_prng_b32_e32 v5, s105                 ; encoding: [0x69,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_lo
+// GFX1250: v_prng_b32_e32 v5, vcc_lo               ; encoding: [0x6a,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, vcc_hi
+// GFX1250: v_prng_b32_e32 v5, vcc_hi               ; encoding: [0x6b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, ttmp15
+// GFX1250: v_prng_b32_e32 v5, ttmp15               ; encoding: [0x7b,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, m0
+// GFX1250: v_prng_b32_e32 v5, m0                   ; encoding: [0x7d,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_lo
+// GFX1250: v_prng_b32_e32 v5, exec_lo              ; encoding: [0x7e,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, exec_hi
+// GFX1250: v_prng_b32_e32 v5, exec_hi              ; encoding: [0x7f,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, null
+// GFX1250: v_prng_b32_e32 v5, null                 ; encoding: [0x7c,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, -1
+// GFX1250: v_prng_b32_e32 v5, -1                   ; encoding: [0xc1,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, 0.5
+// GFX1250: v_prng_b32_e32 v5, 0.5                  ; encoding: [0xf0,0x96,0x0a,0x7e]
+
+v_prng_b32 v5, src_scc
+// GFX1250: v_prng_b32_e32 v5, src_scc              ; encoding: [0xfd,0x96,0x0a,0x7e]
+
+v_prng_b32 v255, 0xaf123456
+// GFX1250: v_prng_b32_e32 v255, 0xaf123456         ; encoding: [0xff,0x96,0xfe,0x7f,0x56,0x34,0x12,0xaf]
+
 v_rcp_bf16 v5, v1
 // GFX1250: v_rcp_bf16_e32 v5, v1                   ; encoding: [0x01,0xf3,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
index 0a8ee84561d33..e1cd2e3043693 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s
@@ -170,6 +170,58 @@ v_tanh_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 f
 // GFX1250: v_tanh_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0x94,0xfe,0x7e,0x7f,0x6f,0x35,0x30]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
index d4afb9d9b2d9a..c1d3238b65cbd 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s
@@ -178,6 +178,58 @@ v_tanh_bf16 v5.h, v1.h quad_perm:[3,2,1,0]
 // GFX1250: v_tanh_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x94,0x0a,0x7f,0x81,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 quad_perm:[3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1b,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 quad_perm:[0,1,2,3]
+// GFX1250: v_prng_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0xe4,0x00,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x40,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_half_mirror
+// GFX1250: v_prng_b32_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x41,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x01,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shl:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x0f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x11,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_shr:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x1f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:1
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x21,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_ror:15
+// GFX1250: v_prng_b32_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x2f,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x50,0x01,0xff]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX1250: v_prng_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x5f,0x01,0x01]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX1250: v_prng_b32_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0x96,0x0a,0x7e,0x01,0x60,0x09,0x13]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 quad_perm:[3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xf2,0x0a,0x7e,0x01,0x1b,0x00,0xff]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
index a7cb6bf8de69c..100e9f92ff58b 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s
@@ -38,6 +38,18 @@ v_tanh_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
 // GFX1250: v_tanh_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x94,0xfe,0x7e,0x7f,0x00,0x00,0x00]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
index 6acab7edc0d49..2ae103545443c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s
@@ -46,6 +46,18 @@ v_tanh_bf16 v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_tanh_bf16_dpp v5.h, v1.h dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x94,0x0a,0x7f,0x81,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX1250: v_prng_b32_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0x96,0x0a,0x7e,0x01,0x77,0x39,0x05]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
+v_prng_b32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX1250: v_prng_b32_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0x96,0xfe,0x7f,0xff,0x00,0x00,0x00]
+// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+
 v_rcp_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX1250: v_rcp_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xf2,0x0a,0x7e,0x01,0x77,0x39,0x05]
 // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index 7486d849253e8..9c6a9127d82e4 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -127,6 +127,42 @@ v_cvt_f32_fp8 v1, v3 byte_sel:1 clamp
 v_cvt_f32_fp8 v1, v3 byte_sel:2 clamp
 // GFX1250: v_cvt_f32_fp8_e64 v1, v3 byte_sel:2 clamp ; encoding: [0x01,0x88,0xec,0xd5,0x03,0x01,0x00,0x00]
 
+v_prng_b32_e64 v5, v1
+// GFX1250: v_prng_b32_e64 v5, v1                   ; encoding: [0x05,0x00,0xcb,0xd5,0x01,0x01,0x00,0x00]
+
+v_prng_b32_e64 v5, v255
+// GFX1250: v_prng_b32_e64 v5, v255                 ; encoding: [0x...
[truncated]

github-actions · 2025-07-18T04:45:14Z

✅ With the latest revision this PR passed the undef deprecator.

changpeng

LGTM. But has the builtin definition already been added in clang?

shiltian · 2025-07-18T04:52:29Z

LGTM. But has the builtin definition already been added in clang?

Yes, when we did gfx950 support. This is not really a new instruction in gfx1250.

arsenm · 2025-07-18T05:57:58Z

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

@@ -4007,7 +4007,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
  case Intrinsic::amdgcn_rcp_legacy:
  case Intrinsic::amdgcn_rsq_legacy:
  case Intrinsic::amdgcn_rsq_clamp:
-  case Intrinsic::amdgcn_tanh: {
+  case Intrinsic::amdgcn_tanh:
+  case Intrinsic::amdgcn_prng_b32: {


This is unrelated to base support, the comment is unrelated, and I'm not sure it's safe to fold this

It is related to the changes in llvm/test/CodeGen/AMDGPU/llvm.amdgcn.prng.ll.

And based on the LFSR algorithm, it looks safe to me to fold this.

I removed it. Will do it in a follow up.

Co-authored-by: Mekhanoshin, Stanislav <[email protected]>

shiltian · 2025-07-18T14:58:06Z

Merge activity

Jul 18, 2:58 PM UTC: A user started a stack merge that includes this pull request via Graphite.
Jul 18, 2:59 PM UTC: @shiltian merged this pull request with Graphite.

shiltian requested review from changpeng and rampitec July 18, 2025 04:42

llvmbot added clang Clang issues not falling into any other category backend:AMDGPU llvm:mc Machine (object) code labels Jul 18, 2025

shiltian mentioned this pull request Jul 18, 2025

[Clang][AMDGPU] Add the missing builtin __builtin_amdgcn_sqrt_bf16 #149447

Merged

changpeng approved these changes Jul 18, 2025

View reviewed changes

shiltian force-pushed the users/shiltian/v_prng_b32 branch from 8bb64e1 to 487881c Compare July 18, 2025 05:33

arsenm reviewed Jul 18, 2025

View reviewed changes

Base automatically changed from users/shiltian/v_sqrt_bf16-builtin to main July 18, 2025 12:43

[AMDGPU] Add support for v_prng_b32 on gfx1250

e35cd55

Co-authored-by: Mekhanoshin, Stanislav <[email protected]>

shiltian force-pushed the users/shiltian/v_prng_b32 branch from 487881c to e35cd55 Compare July 18, 2025 12:44

remove undef folding to move it forward

63a8bdb

shiltian mentioned this pull request Jul 18, 2025

[AMDGPU] Add support for v_permlane16_swap_b32 on gfx1250 #149518

Merged

arsenm approved these changes Jul 18, 2025

View reviewed changes

shiltian merged commit 95b69e0 into main Jul 18, 2025
9 checks passed

shiltian deleted the users/shiltian/v_prng_b32 branch July 18, 2025 14:59

shiltian mentioned this pull request Jul 18, 2025

[AMDGPU] Add support for v_sat_pk4_i4_[i8,u8] on gfx1250 #149528

Merged

This was referenced Jul 23, 2025

test abhinavgaba/llvm-project#2

Closed

Add dataFence plugin interface abhinavgaba/llvm-project#3

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Add support for `v_prng_b32` on gfx1250 #149450

[AMDGPU] Add support for `v_prng_b32` on gfx1250 #149450

Uh oh!

shiltian commented Jul 18, 2025

Uh oh!

shiltian commented Jul 18, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 18, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 18, 2025

Uh oh!

github-actions bot commented Jul 18, 2025 •

edited

Loading

Uh oh!

changpeng left a comment •

edited

Loading

Uh oh!

shiltian commented Jul 18, 2025

Uh oh!

arsenm Jul 18, 2025

Uh oh!

shiltian Jul 18, 2025

Uh oh!

shiltian Jul 18, 2025

Uh oh!

shiltian Jul 18, 2025

Uh oh!

shiltian commented Jul 18, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

[AMDGPU] Add support for v_prng_b32 on gfx1250 #149450

[AMDGPU] Add support for v_prng_b32 on gfx1250 #149450

Uh oh!

Conversation

shiltian commented Jul 18, 2025

Uh oh!

shiltian commented Jul 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 18, 2025

Uh oh!

github-actions bot commented Jul 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

changpeng left a comment • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

shiltian commented Jul 18, 2025

Uh oh!

arsenm Jul 18, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian Jul 18, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian Jul 18, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian Jul 18, 2025

Choose a reason for hiding this comment

Uh oh!

shiltian commented Jul 18, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Uh oh!

[AMDGPU] Add support for `v_prng_b32` on gfx1250 #149450

[AMDGPU] Add support for `v_prng_b32` on gfx1250 #149450

shiltian commented Jul 18, 2025 •

edited

Loading

llvmbot commented Jul 18, 2025 •

edited

Loading

github-actions bot commented Jul 18, 2025 •

edited

Loading

changpeng left a comment •

edited

Loading

shiltian commented Jul 18, 2025 •

edited

Loading