@@ -3390,6 +3390,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
33903390; GFX1264_DPP-NEXT: v_readlane_b32 s2, v2, 31
33913391; GFX1264_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
33923392; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
3393+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
33933394; GFX1264_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
33943395; GFX1264_DPP-NEXT: v_mov_b32_e32 v4, 0
33953396; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -3445,6 +3446,7 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
34453446; GFX1264_DPP-NEXT: v_mov_b32_e32 v9, v5
34463447; GFX1264_DPP-NEXT: v_readfirstlane_b32 s3, v7
34473448; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
3449+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
34483450; GFX1264_DPP-NEXT: v_add_co_u32 v6, vcc, s2, v8
34493451; GFX1264_DPP-NEXT: s_mov_b32 s2, s6
34503452; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -6954,6 +6956,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
69546956; GFX1264_DPP-NEXT: v_readlane_b32 s2, v2, 31
69556957; GFX1264_DPP-NEXT: v_mov_b32_dpp v3, v4 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
69566958; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
6959+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
69576960; GFX1264_DPP-NEXT: v_add_co_u32_e64_dpp v2, vcc, v2, s2 quad_perm:[0,1,2,3] row_mask:0xc bank_mask:0xf
69586961; GFX1264_DPP-NEXT: v_mov_b32_e32 v4, 0
69596962; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -7009,6 +7012,7 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
70097012; GFX1264_DPP-NEXT: v_mov_b32_e32 v9, v5
70107013; GFX1264_DPP-NEXT: v_readfirstlane_b32 s3, v7
70117014; GFX1264_DPP-NEXT: s_wait_alu 0xf1ff
7015+ ; GFX1264_DPP-NEXT: s_delay_alu instid0(VALU_DEP_3)
70127016; GFX1264_DPP-NEXT: v_sub_co_u32 v6, vcc, s2, v8
70137017; GFX1264_DPP-NEXT: s_mov_b32 s2, s6
70147018; GFX1264_DPP-NEXT: s_wait_alu 0xfffd
@@ -8233,6 +8237,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
82338237; GFX1264-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
82348238; GFX1264-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
82358239; GFX1264-TRUE16-NEXT: s_wait_alu 0xf1ff
8240+ ; GFX1264-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
82368241; GFX1264-TRUE16-NEXT: v_mad_u16 v0.l, s10, v4.l, s2
82378242; GFX1264-TRUE16-NEXT: s_mov_b32 s2, -1
82388243; GFX1264-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8298,6 +8303,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
82988303; GFX1264-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
82998304; GFX1264-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
83008305; GFX1264-FAKE16-NEXT: s_wait_alu 0xf1ff
8306+ ; GFX1264-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
83018307; GFX1264-FAKE16-NEXT: v_mad_u16 v0, s10, v4, s2
83028308; GFX1264-FAKE16-NEXT: s_mov_b32 s2, -1
83038309; GFX1264-FAKE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8364,6 +8370,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
83648370; GFX1232-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
83658371; GFX1232-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
83668372; GFX1232-TRUE16-NEXT: s_wait_alu 0xf1ff
8373+ ; GFX1232-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
83678374; GFX1232-TRUE16-NEXT: v_mad_u16 v0.l, s8, v4.l, s2
83688375; GFX1232-TRUE16-NEXT: s_mov_b32 s2, -1
83698376; GFX1232-TRUE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8429,6 +8436,7 @@ define amdgpu_kernel void @uniform_add_i8(ptr addrspace(1) %result, ptr addrspac
84298436; GFX1232-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
84308437; GFX1232-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
84318438; GFX1232-FAKE16-NEXT: s_wait_alu 0xf1ff
8439+ ; GFX1232-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
84328440; GFX1232-FAKE16-NEXT: v_mad_u16 v0, s8, v4, s2
84338441; GFX1232-FAKE16-NEXT: s_mov_b32 s2, -1
84348442; GFX1232-FAKE16-NEXT: buffer_store_b8 v0, off, s[0:3], null
@@ -8818,7 +8826,7 @@ define amdgpu_kernel void @uniform_or_i16(ptr addrspace(1) %result, ptr addrspac
88188826; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, exec_lo, 0
88198827; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v0, exec_hi, v0
88208828; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
8821- ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
8829+ ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
88228830; GFX7LESS-NEXT: s_and_saveexec_b64 s[4:5], vcc
88238831; GFX7LESS-NEXT: s_cbranch_execz .LBB15_2
88248832; GFX7LESS-NEXT: ; %bb.1:
@@ -9328,7 +9336,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
93289336; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s6, 0
93299337; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v4, s7, v0
93309338; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
9331- ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
9339+ ; GFX7LESS-NEXT: ; implicit-def: $vgpr0
93329340; GFX7LESS-NEXT: s_and_saveexec_b64 s[8:9], vcc
93339341; GFX7LESS-NEXT: s_cbranch_execz .LBB16_4
93349342; GFX7LESS-NEXT: ; %bb.1:
@@ -9931,6 +9939,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
99319939; GFX1264-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
99329940; GFX1264-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
99339941; GFX1264-TRUE16-NEXT: s_wait_alu 0xf1ff
9942+ ; GFX1264-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
99349943; GFX1264-TRUE16-NEXT: v_mad_u16 v0.l, s10, v4.l, s2
99359944; GFX1264-TRUE16-NEXT: s_mov_b32 s2, -1
99369945; GFX1264-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -9996,6 +10005,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
999610005; GFX1264-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
999710006; GFX1264-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
999810007; GFX1264-FAKE16-NEXT: s_wait_alu 0xf1ff
10008+ ; GFX1264-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
999910009; GFX1264-FAKE16-NEXT: v_mad_u16 v0, s10, v4, s2
1000010010; GFX1264-FAKE16-NEXT: s_mov_b32 s2, -1
1000110011; GFX1264-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -10062,6 +10072,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
1006210072; GFX1232-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
1006310073; GFX1232-TRUE16-NEXT: v_readfirstlane_b32 s2, v0
1006410074; GFX1232-TRUE16-NEXT: s_wait_alu 0xf1ff
10075+ ; GFX1232-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1006510076; GFX1232-TRUE16-NEXT: v_mad_u16 v0.l, s8, v4.l, s2
1006610077; GFX1232-TRUE16-NEXT: s_mov_b32 s2, -1
1006710078; GFX1232-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -10127,6 +10138,7 @@ define amdgpu_kernel void @uniform_add_i16(ptr addrspace(1) %result, ptr addrspa
1012710138; GFX1232-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
1012810139; GFX1232-FAKE16-NEXT: v_readfirstlane_b32 s2, v0
1012910140; GFX1232-FAKE16-NEXT: s_wait_alu 0xf1ff
10141+ ; GFX1232-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1013010142; GFX1232-FAKE16-NEXT: v_mad_u16 v0, s8, v4, s2
1013110143; GFX1232-FAKE16-NEXT: s_mov_b32 s2, -1
1013210144; GFX1232-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
@@ -12703,6 +12715,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
1270312715; GFX1264-FAKE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
1270412716; GFX1264-FAKE16-NEXT: v_cmp_u_f32_e64 s[0:1], v0, v0
1270512717; GFX1264-FAKE16-NEXT: s_wait_alu 0xfffd
12718+ ; GFX1264-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1270612719; GFX1264-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc
1270712720; GFX1264-FAKE16-NEXT: s_wait_alu 0xf1ff
1270812721; GFX1264-FAKE16-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[0:1]
@@ -12816,6 +12829,7 @@ define amdgpu_kernel void @uniform_fadd_v2bf16(ptr addrspace(1) %result, ptr add
1281612829; GFX1232-FAKE16-NEXT: v_add3_u32 v4, v4, v2, 0x7fff
1281712830; GFX1232-FAKE16-NEXT: v_cmp_u_f32_e64 s0, v0, v0
1281812831; GFX1232-FAKE16-NEXT: s_wait_alu 0xfffd
12832+ ; GFX1232-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1281912833; GFX1232-FAKE16-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc_lo
1282012834; GFX1232-FAKE16-NEXT: s_wait_alu 0xf1ff
1282112835; GFX1232-FAKE16-NEXT: v_cndmask_b32_e64 v0, v3, v5, s0
0 commit comments