@@ -75,10 +75,10 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
7575; GCN-O0-NEXT: s_waitcnt expcnt(0)
7676; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
7777; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
78- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
79- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
78+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
8079; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0
8180; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1
81+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
8282; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
8383; GCN-O0-NEXT: s_mov_b32 s0, 0
8484; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -104,15 +104,16 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) {
104104; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
105105; GCN-O0-NEXT: s_cbranch_execz .LBB0_3
106106; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
107- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
108107; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
109108; GCN-O0-NEXT: s_waitcnt expcnt(0)
110109; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
111110; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
112111; GCN-O0-NEXT: s_waitcnt vmcnt(0)
113112; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
114113; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
114+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
115115; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
116+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
116117; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
117118; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
118119; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -248,10 +249,10 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
248249; GCN-O0-NEXT: s_waitcnt expcnt(0)
249250; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
250251; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
251- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
252- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
252+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
253253; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0
254254; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1
255+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
255256; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
256257; GCN-O0-NEXT: s_mov_b32 s0, 0
257258; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1
@@ -277,15 +278,16 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
277278; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
278279; GCN-O0-NEXT: s_cbranch_execz .LBB1_4
279280; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then
280- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
281281; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
282282; GCN-O0-NEXT: s_waitcnt expcnt(0)
283283; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
284284; GCN-O0-NEXT: s_mov_b64 exec, s[8:9]
285285; GCN-O0-NEXT: s_waitcnt vmcnt(0)
286286; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
287287; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
288+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
288289; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
290+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
289291; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
290292; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
291293; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -311,7 +313,6 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
311313; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1]
312314; GCN-O0-NEXT: s_branch .LBB1_5
313315; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end
314- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
315316; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1
316317; GCN-O0-NEXT: s_waitcnt expcnt(0)
317318; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
@@ -322,7 +323,9 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a
322323; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3]
323324; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
324325; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
326+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload
325327; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
328+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
326329; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
327330; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
328331; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -508,15 +511,16 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
508511; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1]
509512; GCN-O0-NEXT: s_cbranch_execz .LBB2_5
510513; GCN-O0-NEXT: ; %bb.3: ; %bb.then
511- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
512514; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
513515; GCN-O0-NEXT: s_waitcnt expcnt(0)
514516; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
515517; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
516518; GCN-O0-NEXT: s_waitcnt vmcnt(0)
517519; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
518520; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
521+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
519522; GCN-O0-NEXT: v_mov_b32_e32 v0, 1
523+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
520524; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
521525; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
522526; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -532,15 +536,16 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) {
532536; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64
533537; GCN-O0-NEXT: s_branch .LBB2_5
534538; GCN-O0-NEXT: .LBB2_4: ; %bb.else
535- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
536539; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
537540; GCN-O0-NEXT: s_waitcnt expcnt(0)
538541; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
539542; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
540543; GCN-O0-NEXT: s_waitcnt vmcnt(0)
541544; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0
542545; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1
546+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
543547; GCN-O0-NEXT: v_mov_b32_e32 v0, 2
548+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
544549; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0
545550; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1
546551; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
@@ -943,20 +948,21 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a
943948; GCN-O0-NEXT: s_mov_b64 exec, s[0:1]
944949; GCN-O0-NEXT: s_cbranch_execz .LBB4_2
945950; GCN-O0-NEXT: ; %bb.1: ; %bb.then
946- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
947- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
948951; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1
949952; GCN-O0-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
950953; GCN-O0-NEXT: s_mov_b64 exec, s[6:7]
951954; GCN-O0-NEXT: s_waitcnt vmcnt(0)
952955; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0
953956; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1
957+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
958+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload
954959; GCN-O0-NEXT: s_mov_b32 s2, 0xf000
955960; GCN-O0-NEXT: s_mov_b32 s4, 0
956961; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
957962; GCN-O0-NEXT: s_mov_b32 s5, s2
958963; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3
959964; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5]
965+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
960966; GCN-O0-NEXT: v_ashrrev_i32_e64 v2, 31, v0
961967; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
962968; GCN-O0-NEXT: v_mov_b32_e32 v1, v2
@@ -1092,14 +1098,14 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
10921098; GCN-O0-NEXT: s_waitcnt expcnt(0)
10931099; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
10941100; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
1095- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1096- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
1101+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
10971102; GCN-O0-NEXT: v_readlane_b32 s8, v0, 2
10981103; GCN-O0-NEXT: v_readlane_b32 s9, v0, 3
10991104; GCN-O0-NEXT: v_readlane_b32 s6, v0, 0
11001105; GCN-O0-NEXT: v_readlane_b32 s7, v0, 1
11011106; GCN-O0-NEXT: v_writelane_b32 v0, s6, 4
11021107; GCN-O0-NEXT: v_writelane_b32 v0, s7, 5
1108+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
11031109; GCN-O0-NEXT: s_mov_b32 s4, 0x207
11041110; GCN-O0-NEXT: s_waitcnt vmcnt(0)
11051111; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, s4
@@ -1122,11 +1128,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
11221128; GCN-O0-NEXT: s_waitcnt expcnt(0)
11231129; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
11241130; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
1125- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
1126- ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
1131+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
11271132; GCN-O0-NEXT: v_readlane_b32 s4, v0, 6
11281133; GCN-O0-NEXT: v_readlane_b32 s5, v0, 7
11291134; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1135+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
11301136; GCN-O0-NEXT: s_mov_b32 s6, 0
11311137; GCN-O0-NEXT: s_waitcnt vmcnt(0)
11321138; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v1, s6
@@ -1216,18 +1222,20 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
12161222; GCN-O0-NEXT: s_branch .LBB5_6
12171223; GCN-O0-NEXT: .LBB5_5: ; %Flow2
12181224; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
1219- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1220- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
1221- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
1222- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
1223- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
12241225; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
1226+ ; GCN-O0-NEXT: s_waitcnt expcnt(1)
12251227; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
12261228; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
12271229; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12281230; GCN-O0-NEXT: v_readlane_b32 s4, v4, 10
12291231; GCN-O0-NEXT: v_readlane_b32 s5, v4, 11
12301232; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1233+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1234+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
1235+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
1236+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
1237+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1238+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12311239; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
12321240; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12331241; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
@@ -1236,18 +1244,20 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
12361244; GCN-O0-NEXT: s_branch .LBB5_7
12371245; GCN-O0-NEXT: .LBB5_6: ; %Flow
12381246; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
1239- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1240- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1241- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1242- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1243- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
12441247; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
1248+ ; GCN-O0-NEXT: s_waitcnt expcnt(1)
12451249; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
12461250; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
12471251; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12481252; GCN-O0-NEXT: v_readlane_b32 s4, v4, 12
12491253; GCN-O0-NEXT: v_readlane_b32 s5, v4, 13
12501254; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5]
1255+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1256+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1257+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1258+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1259+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
1260+ ; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12511261; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
12521262; GCN-O0-NEXT: s_waitcnt vmcnt(0)
12531263; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
@@ -1291,11 +1301,6 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
12911301; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
12921302; GCN-O0-NEXT: .LBB5_9: ; %Flow3
12931303; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1
1294- ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1295- ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1296- ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1297- ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1298- ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
12991304; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
13001305; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload
13011306; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
@@ -1307,6 +1312,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
13071312; GCN-O0-NEXT: v_readlane_b32 s7, v4, 5
13081313; GCN-O0-NEXT: v_readlane_b32 s4, v4, 14
13091314; GCN-O0-NEXT: v_readlane_b32 s5, v4, 15
1315+ ; GCN-O0-NEXT: s_waitcnt expcnt(0)
1316+ ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1317+ ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1318+ ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1319+ ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
13101320; GCN-O0-NEXT: s_and_b64 s[4:5], exec, s[4:5]
13111321; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
13121322; GCN-O0-NEXT: s_mov_b64 s[6:7], 0
@@ -1321,6 +1331,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 {
13211331; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1
13221332; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill
13231333; GCN-O0-NEXT: s_mov_b64 exec, s[14:15]
1334+ ; GCN-O0-NEXT: s_waitcnt vmcnt(1)
13241335; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
13251336; GCN-O0-NEXT: s_waitcnt vmcnt(0)
13261337; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
0 commit comments