@@ -475,21 +475,28 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475
475
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476
476
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477
477
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479
- ; GFX9-O0-NEXT: s_nop 0
480
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
481
478
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
482
479
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
483
480
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
484
481
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
485
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
482
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485
+ ; GFX9-O0-NEXT: s_nop 0
486
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486
490
; GFX9-O0-NEXT: s_nop 0
487
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
491
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492
+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
489
494
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
490
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
495
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
491
497
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
492
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
498
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
493
500
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
494
501
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
495
502
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -500,6 +507,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
500
507
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
501
508
; GFX9-O0-NEXT: s_mov_b32 s14, s13
502
509
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
510
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
503
511
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
504
512
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
505
513
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1035,10 +1043,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1035
1043
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1036
1044
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1037
1045
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1038
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1039
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1040
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1041
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1046
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1047
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1048
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1049
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1042
1050
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1043
1051
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1044
1052
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2656,21 +2664,28 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2656
2664
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2657
2665
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2658
2666
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2659
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2660
- ; GFX9-O0-NEXT: s_nop 0
2661
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2662
2667
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2663
2668
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2664
2669
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2665
2670
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2666
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2671
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2672
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2673
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2674
+ ; GFX9-O0-NEXT: s_nop 0
2675
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2676
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2677
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2678
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2667
2679
; GFX9-O0-NEXT: s_nop 0
2668
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2669
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2680
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2681
+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2682
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2670
2683
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2671
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2684
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2685
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2672
2686
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2673
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2687
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2688
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2674
2689
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
2675
2690
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
2676
2691
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2681,6 +2696,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2681
2696
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
2682
2697
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2683
2698
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2699
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2684
2700
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2685
2701
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
2686
2702
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3216,10 +3232,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3216
3232
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3217
3233
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3218
3234
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3219
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3220
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3221
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3222
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3235
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3236
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3237
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3238
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3223
3239
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3224
3240
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3225
3241
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments