Skip to content

Commit d52e78e

Browse files
authored
[AMDGPU] Add clamp support to v_add_{max|min}_{i|u}32 (#164489)
1 parent 228a353 commit d52e78e

File tree

6 files changed

+122
-86
lines changed

6 files changed

+122
-86
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -775,10 +775,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
775775
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
776776

777777
let SubtargetPredicate = HasAddMinMaxInsts, isCommutable = 1, isReMaterializable = 1 in {
778-
defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP_I32_I32_I32_I32>;
779-
defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP_I32_I32_I32_I32>;
780-
defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP_I32_I32_I32_I32>;
781-
defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP_I32_I32_I32_I32>;
778+
defm V_ADD_MAX_I32 : VOP3Inst <"v_add_max_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
779+
defm V_ADD_MAX_U32 : VOP3Inst <"v_add_max_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
780+
defm V_ADD_MIN_I32 : VOP3Inst <"v_add_min_i32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
781+
defm V_ADD_MIN_U32 : VOP3Inst <"v_add_min_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
782782
}
783783

784784
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;

llvm/test/CodeGen/AMDGPU/add-max.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
66
; GCN-LABEL: add_max_u32_vvv:
77
; GCN: ; %bb.0:
8-
; GCN-NEXT: v_add_max_u32_e64 v0, v0, v1, v2
8+
; GCN-NEXT: v_add_max_u32 v0, v0, v1, v2
99
; GCN-NEXT: ; return to shader part epilog
1010
%add = add i32 %a, %b
1111
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -16,7 +16,7 @@ define amdgpu_ps float @add_max_u32_vvv(i32 %a, i32 %b, i32 %c) {
1616
define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
1717
; GCN-LABEL: add_max_u32_svv:
1818
; GCN: ; %bb.0:
19-
; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, v1
19+
; GCN-NEXT: v_add_max_u32 v0, s0, v0, v1
2020
; GCN-NEXT: ; return to shader part epilog
2121
%add = add i32 %a, %b
2222
%max = call i32 @llvm.umax.i32(i32 %add, i32 %c)
@@ -27,7 +27,7 @@ define amdgpu_ps float @add_max_u32_svv(i32 inreg %a, i32 %b, i32 %c) {
2727
define amdgpu_ps float @add_max_u32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
2828
; SDAG-LABEL: add_max_u32_ssv:
2929
; SDAG: ; %bb.0:
30-
; SDAG-NEXT: v_add_max_u32_e64 v0, s0, s1, v0
30+
; SDAG-NEXT: v_add_max_u32 v0, s0, s1, v0
3131
; SDAG-NEXT: ; return to shader part epilog
3232
;
3333
; GISEL-LABEL: add_max_u32_ssv:
@@ -59,7 +59,7 @@ define amdgpu_ps float @add_max_u32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c
5959
define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
6060
; GCN-LABEL: add_max_u32_vsi:
6161
; GCN: ; %bb.0:
62-
; GCN-NEXT: v_add_max_u32_e64 v0, v0, s0, 4
62+
; GCN-NEXT: v_add_max_u32 v0, v0, s0, 4
6363
; GCN-NEXT: ; return to shader part epilog
6464
%add = add i32 %a, %b
6565
%max = call i32 @llvm.umax.i32(i32 %add, i32 4)
@@ -70,7 +70,7 @@ define amdgpu_ps float @add_max_u32_vsi(i32 %a, i32 inreg %b) {
7070
define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
7171
; GCN-LABEL: add_max_u32_svl:
7272
; GCN: ; %bb.0:
73-
; GCN-NEXT: v_add_max_u32_e64 v0, s0, v0, 0x64
73+
; GCN-NEXT: v_add_max_u32 v0, s0, v0, 0x64
7474
; GCN-NEXT: ; return to shader part epilog
7575
%add = add i32 %a, %b
7676
%max = call i32 @llvm.umax.i32(i32 %add, i32 100)
@@ -81,7 +81,7 @@ define amdgpu_ps float @add_max_u32_svl(i32 inreg %a, i32 %b) {
8181
define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
8282
; SDAG-LABEL: add_max_u32_slv:
8383
; SDAG: ; %bb.0:
84-
; SDAG-NEXT: v_add_max_u32_e64 v0, 0x64, s0, v0
84+
; SDAG-NEXT: v_add_max_u32 v0, 0x64, s0, v0
8585
; SDAG-NEXT: ; return to shader part epilog
8686
;
8787
; GISEL-LABEL: add_max_u32_slv:
@@ -99,7 +99,7 @@ define amdgpu_ps float @add_max_u32_slv(i32 inreg %a, i32 %b) {
9999
define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
100100
; GCN-LABEL: add_max_i32_vvv:
101101
; GCN: ; %bb.0:
102-
; GCN-NEXT: v_add_max_i32_e64 v0, v0, v1, v2
102+
; GCN-NEXT: v_add_max_i32 v0, v0, v1, v2
103103
; GCN-NEXT: ; return to shader part epilog
104104
%add = add i32 %a, %b
105105
%max = call i32 @llvm.smax.i32(i32 %add, i32 %c)
@@ -110,7 +110,7 @@ define amdgpu_ps float @add_max_i32_vvv(i32 %a, i32 %b, i32 %c) {
110110
define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
111111
; GCN-LABEL: add_min_u32_vvv:
112112
; GCN: ; %bb.0:
113-
; GCN-NEXT: v_add_min_u32_e64 v0, v0, v1, v2
113+
; GCN-NEXT: v_add_min_u32 v0, v0, v1, v2
114114
; GCN-NEXT: ; return to shader part epilog
115115
%add = add i32 %a, %b
116116
%max = call i32 @llvm.umin.i32(i32 %add, i32 %c)
@@ -121,7 +121,7 @@ define amdgpu_ps float @add_min_u32_vvv(i32 %a, i32 %b, i32 %c) {
121121
define amdgpu_ps float @add_min_i32_vvv(i32 %a, i32 %b, i32 %c) {
122122
; GCN-LABEL: add_min_i32_vvv:
123123
; GCN: ; %bb.0:
124-
; GCN-NEXT: v_add_min_i32_e64 v0, v0, v1, v2
124+
; GCN-NEXT: v_add_min_i32 v0, v0, v1, v2
125125
; GCN-NEXT: ; return to shader part epilog
126126
%add = add i32 %a, %b
127127
%max = call i32 @llvm.smin.i32(i32 %add, i32 %c)

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39137,7 +39137,7 @@ define bfloat @v_sitofp_i64_to_bf16(i64 %x) {
3913739137
; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 31, v2
3913839138
; GFX1250-NEXT: v_add_nc_u32_e32 v2, 32, v2
3913939139
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
39140-
; GFX1250-NEXT: v_add_min_u32_e64 v2, v3, -1, v2
39140+
; GFX1250-NEXT: v_add_min_u32 v2, v3, -1, v2
3914139141
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v2, v[0:1]
3914239142
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3914339143
; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
@@ -39487,8 +39487,8 @@ define <2 x bfloat> @v_sitofp_v2i64_to_v2bf16(<2 x i64> %x) {
3948739487
; GFX1250-NEXT: v_dual_ashrrev_i32 v5, 31, v5 :: v_dual_ashrrev_i32 v4, 31, v4
3948839488
; GFX1250-NEXT: v_dual_add_nc_u32 v5, 32, v5 :: v_dual_add_nc_u32 v4, 32, v4
3948939489
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
39490-
; GFX1250-NEXT: v_add_min_u32_e64 v5, v7, -1, v5
39491-
; GFX1250-NEXT: v_add_min_u32_e64 v4, v6, -1, v4
39490+
; GFX1250-NEXT: v_add_min_u32 v5, v7, -1, v5
39491+
; GFX1250-NEXT: v_add_min_u32 v4, v6, -1, v4
3949239492
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3949339493
; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v5, v[0:1]
3949439494
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v4, v[2:3]
@@ -39979,9 +39979,9 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
3997939979
; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
3998039980
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3998139981
; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8
39982-
; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7
39982+
; GFX1250TRUE16-NEXT: v_add_min_u32 v7, v10, -1, v7
3998339983
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
39984-
; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6
39984+
; GFX1250TRUE16-NEXT: v_add_min_u32 v6, v9, -1, v6
3998539985
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
3998639986
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3998739987
; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
@@ -39991,7 +39991,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
3999139991
; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
3999239992
; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
3999339993
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
39994-
; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8
39994+
; GFX1250TRUE16-NEXT: v_add_min_u32 v8, v11, -1, v8
3999539995
; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
3999639996
; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3999739997
; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40027,8 +40027,8 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
4002740027
; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
4002840028
; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
4002940029
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
40030-
; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
40031-
; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
40030+
; GFX1250FAKE16-NEXT: v_add_min_u32 v6, v10, -1, v6
40031+
; GFX1250FAKE16-NEXT: v_add_min_u32 v7, v11, -1, v7
4003240032
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4003340033
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
4003440034
; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
@@ -40038,7 +40038,7 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
4003840038
; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
4003940039
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4004040040
; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2
40041-
; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
40041+
; GFX1250FAKE16-NEXT: v_add_min_u32 v8, v9, -1, v8
4004240042
; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4004340043
; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
4004440044
; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
@@ -40656,18 +40656,18 @@ define <4 x bfloat> @v_sitofp_v4i64_to_v4bf16(<4 x i64> %x) {
4065640656
; GFX1250-NEXT: v_dual_add_nc_u32 v9, 32, v9 :: v_dual_add_nc_u32 v8, 32, v8
4065740657
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
4065840658
; GFX1250-NEXT: v_dual_ashrrev_i32 v10, 31, v10 :: v_dual_bitop2_b32 v11, v0, v1 bitop3:0x14
40659-
; GFX1250-NEXT: v_add_min_u32_e64 v9, v13, -1, v9
40659+
; GFX1250-NEXT: v_add_min_u32 v9, v13, -1, v9
4066040660
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
40661-
; GFX1250-NEXT: v_add_min_u32_e64 v8, v12, -1, v8
40661+
; GFX1250-NEXT: v_add_min_u32 v8, v12, -1, v8
4066240662
; GFX1250-NEXT: v_dual_ashrrev_i32 v11, 31, v11 :: v_dual_add_nc_u32 v10, 32, v10
4066340663
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4066440664
; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v9, v[4:5]
4066540665
; GFX1250-NEXT: v_lshlrev_b64_e32 v[6:7], v8, v[6:7]
4066640666
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
4066740667
; GFX1250-NEXT: v_add_nc_u32_e32 v11, 32, v11
40668-
; GFX1250-NEXT: v_add_min_u32_e64 v10, v14, -1, v10
40668+
; GFX1250-NEXT: v_add_min_u32 v10, v14, -1, v10
4066940669
; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
40670-
; GFX1250-NEXT: v_add_min_u32_e64 v11, v15, -1, v11
40670+
; GFX1250-NEXT: v_add_min_u32 v11, v15, -1, v11
4067140671
; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v10, v[2:3]
4067240672
; GFX1250-NEXT: v_min_u32_e32 v6, 1, v6
4067340673
; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4

llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -218,64 +218,76 @@ v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp
218218
// GFX1250: v_mad_nc_i64_i32 v[2:3], s4, v7, v[8:9] clamp ; encoding: [0x02,0x80,0xfb,0xd6,0x04,0x0e,0x22,0x04]
219219

220220
v_add_min_i32 v2, s4, v7, v8
221-
// GFX1250: v_add_min_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
221+
// GFX1250: v_add_min_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0e,0x22,0x04]
222222

223223
v_add_min_i32 v2, v4, 0, 1
224-
// GFX1250: v_add_min_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
224+
// GFX1250: v_add_min_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x01,0x05,0x02]
225225

226226
v_add_min_i32 v2, v4, 3, s2
227-
// GFX1250: v_add_min_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
227+
// GFX1250: v_add_min_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x07,0x09,0x00]
228228

229229
v_add_min_i32 v2, s4, 4, v2
230-
// GFX1250: v_add_min_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
230+
// GFX1250: v_add_min_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x08,0x09,0x04]
231231

232232
v_add_min_i32 v2, v4, v7, 12345
233-
// GFX1250: v_add_min_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
233+
// GFX1250: v_add_min_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x60,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
234+
235+
v_add_min_i32 v0, v1, v2, v3 clamp
236+
// GFX1250: v_add_min_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x60,0xd6,0x01,0x05,0x0e,0x04]
234237

235238
v_add_max_i32 v2, s4, v7, v8
236-
// GFX1250: v_add_max_i32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
239+
// GFX1250: v_add_max_i32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0e,0x22,0x04]
237240

238241
v_add_max_i32 v2, v4, 0, 1
239-
// GFX1250: v_add_max_i32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
242+
// GFX1250: v_add_max_i32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x01,0x05,0x02]
240243

241244
v_add_max_i32 v2, v4, 3, s2
242-
// GFX1250: v_add_max_i32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
245+
// GFX1250: v_add_max_i32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x07,0x09,0x00]
243246

244247
v_add_max_i32 v2, s4, 4, v2
245-
// GFX1250: v_add_max_i32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
248+
// GFX1250: v_add_max_i32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x08,0x09,0x04]
246249

247250
v_add_max_i32 v2, v4, v7, 12345
248-
// GFX1250: v_add_max_i32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
251+
// GFX1250: v_add_max_i32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5e,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
252+
253+
v_add_max_i32 v0, v1, v2, v3 clamp
254+
// GFX1250: v_add_max_i32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5e,0xd6,0x01,0x05,0x0e,0x04]
249255

250256
v_add_min_u32 v2, s4, v7, v8
251-
// GFX1250: v_add_min_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
257+
// GFX1250: v_add_min_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0e,0x22,0x04]
252258

253259
v_add_min_u32 v2, v4, 0, 1
254-
// GFX1250: v_add_min_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
260+
// GFX1250: v_add_min_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x01,0x05,0x02]
255261

256262
v_add_min_u32 v2, v4, 3, s2
257-
// GFX1250: v_add_min_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
263+
// GFX1250: v_add_min_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x07,0x09,0x00]
258264

259265
v_add_min_u32 v2, s4, 4, v2
260-
// GFX1250: v_add_min_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
266+
// GFX1250: v_add_min_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x08,0x09,0x04]
261267

262268
v_add_min_u32 v2, v4, v7, 12345
263-
// GFX1250: v_add_min_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
269+
// GFX1250: v_add_min_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x61,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
270+
271+
v_add_min_u32 v0, v1, v2, v3 clamp
272+
// GFX1250: v_add_min_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x61,0xd6,0x01,0x05,0x0e,0x04]
264273

265274
v_add_max_u32 v2, s4, v7, v8
266-
// GFX1250: v_add_max_u32_e64 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
275+
// GFX1250: v_add_max_u32 v2, s4, v7, v8 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0e,0x22,0x04]
267276

268277
v_add_max_u32 v2, v4, 0, 1
269-
// GFX1250: v_add_max_u32_e64 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
278+
// GFX1250: v_add_max_u32 v2, v4, 0, 1 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x01,0x05,0x02]
270279

271280
v_add_max_u32 v2, v4, 3, s2
272-
// GFX1250: v_add_max_u32_e64 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
281+
// GFX1250: v_add_max_u32 v2, v4, 3, s2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x07,0x09,0x00]
273282

274283
v_add_max_u32 v2, s4, 4, v2
275-
// GFX1250: v_add_max_u32_e64 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
284+
// GFX1250: v_add_max_u32 v2, s4, 4, v2 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x08,0x09,0x04]
276285

277286
v_add_max_u32 v2, v4, v7, 12345
278-
// GFX1250: v_add_max_u32_e64 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
287+
// GFX1250: v_add_max_u32 v2, v4, v7, 0x3039 ; encoding: [0x02,0x00,0x5f,0xd6,0x04,0x0f,0xfe,0x03,0x39,0x30,0x00,0x00]
288+
289+
v_add_max_u32 v0, v1, v2, v3 clamp
290+
// GFX1250: v_add_max_u32 v0, v1, v2, v3 clamp ; encoding: [0x00,0x80,0x5f,0xd6,0x01,0x05,0x0e,0x04]
279291

280292
v_cvt_pk_bf16_f32 v5, v1, v2
281293
// GFX1250: v_cvt_pk_bf16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x6d,0xd7,0x01,0x05,0x02,0x00]

0 commit comments

Comments
 (0)