@@ -475,28 +475,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475
475
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476
476
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477
477
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479
+ ; GFX9-O0-NEXT: s_nop 0
480
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
478
481
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
479
482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480
483
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
481
484
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
482
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485
- ; GFX9-O0-NEXT: s_nop 0
486
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
485
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
490
486
; GFX9-O0-NEXT: s_nop 0
491
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
487
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
494
489
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
495
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
490
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
497
491
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
498
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
492
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
500
493
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
501
494
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
502
495
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -507,7 +500,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
507
500
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
508
501
; GFX9-O0-NEXT: s_mov_b32 s14, s13
509
502
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
510
- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
511
503
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
512
504
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
513
505
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1046,10 +1038,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1046
1038
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1047
1039
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1048
1040
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1049
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1050
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1051
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1052
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1041
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1042
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1043
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1044
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1053
1045
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1054
1046
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1055
1047
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2667,28 +2659,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2667
2659
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2668
2660
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2669
2661
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2662
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2663
+ ; GFX9-O0-NEXT: s_nop 0
2664
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2670
2665
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2671
2666
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2672
2667
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2673
2668
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2674
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2675
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2676
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2677
- ; GFX9-O0-NEXT: s_nop 0
2678
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2679
- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2680
- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2681
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2669
+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2682
2670
; GFX9-O0-NEXT: s_nop 0
2683
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2684
- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2685
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2671
+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2672
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2686
2673
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2687
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2688
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2674
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2689
2675
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2690
- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2691
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2676
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2692
2677
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
2693
2678
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
2694
2679
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2699,7 +2684,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2699
2684
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
2700
2685
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2701
2686
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2702
- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2703
2687
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2704
2688
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
2705
2689
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3238,10 +3222,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3238
3222
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3239
3223
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3240
3224
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3241
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3242
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3243
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3244
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3225
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3226
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3227
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3228
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3245
3229
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3246
3230
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3247
3231
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments