Skip to content

Commit f30ba4e

Browse files
committed
[DAG] visitFREEZE - always allow freezing multiple operands
Always try to fold freeze(op(....)) -> op(freeze(),freeze(),freeze(),...). This patch proposes we drop the opt-in limit for opcodes that are allowed to push a freeze through the op to freeze all its operands, bringing us more in line with how InstCombine handles it. I'm struggling to find a strong reason for this limit apart from the DAG freeze handling being immature for so long - as we've improved coverage in canCreateUndefOrPoison/isGuaranteedNotToBeUndefOrPoison it looks like the regressions are not as severe. If there's no objections to this approach I will yak shave some of the remaining regressions. Hopefully this will help some of the regression issues in #143102 etc.
1 parent 1d60d91 commit f30ba4e

File tree

16 files changed

+1731
-1810
lines changed

16 files changed

+1731
-1810
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16609,22 +16609,14 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1660916609
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1661016610
// Try to push freeze through instructions that propagate but don't produce
1661116611
// poison as far as possible. If an operand of freeze follows three
16612-
// conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16613-
// guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
16612+
// conditions 1) one-use, and 2) does not produce poison then push
1661416613
// the freeze through to the operands that are not guaranteed non-poison.
1661516614
// NOTE: we will strip poison-generating flags, so ignore them here.
1661616615
if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
1661716616
/*ConsiderFlags*/ false) ||
1661816617
N0->getNumValues() != 1 || !N0->hasOneUse())
1661916618
return SDValue();
1662016619

16621-
bool AllowMultipleMaybePoisonOperands =
16622-
N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
16623-
N0.getOpcode() == ISD::BUILD_VECTOR ||
16624-
N0.getOpcode() == ISD::BUILD_PAIR ||
16625-
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
16626-
N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;
16627-
1662816620
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
1662916621
// ones" or "constant" into something that depends on FrozenUndef. We can
1663016622
// instead pick undef values to keep those properties, while at the same time
@@ -16657,10 +16649,6 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1665716649
MaybePoisonOperandNumbers.push_back(OpNo);
1665816650
if (!HadMaybePoisonOperands)
1665916651
continue;
16660-
if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16661-
// Multiple maybe-poison ops when not allowed - bail out.
16662-
return SDValue();
16663-
}
1666416652
}
1666516653
// NOTE: the whole op may be not guaranteed to not be undef or poison because
1666616654
// it could create undef or poison due to it's poison-generating flags.

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -475,21 +475,28 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475475
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476476
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477477
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479-
; GFX9-O0-NEXT: s_nop 0
480-
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
481478
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
482479
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
483480
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
484481
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
485-
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
482+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485+
; GFX9-O0-NEXT: s_nop 0
486+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486490
; GFX9-O0-NEXT: s_nop 0
487-
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
491+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492+
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
489494
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
490-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
495+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
491497
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
492-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
498+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
493500
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
494501
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
495502
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -500,6 +507,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
500507
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
501508
; GFX9-O0-NEXT: s_mov_b32 s14, s13
502509
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
510+
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
503511
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
504512
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
505513
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1035,10 +1043,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10351043
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10361044
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10371045
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1038-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1039-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1040-
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1041-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1046+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1047+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1048+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1049+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
10421050
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10431051
; GFX9-O0-NEXT: s_mov_b32 s5, s6
10441052
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2656,21 +2664,28 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26562664
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26572665
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
26582666
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2659-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2660-
; GFX9-O0-NEXT: s_nop 0
2661-
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
26622667
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26632668
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26642669
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
26652670
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2666-
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2671+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2672+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2673+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2674+
; GFX9-O0-NEXT: s_nop 0
2675+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2676+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2677+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2678+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
26672679
; GFX9-O0-NEXT: s_nop 0
2668-
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2669-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2680+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2681+
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2682+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
26702683
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2671-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2684+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2685+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
26722686
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2673-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2687+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2688+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
26742689
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
26752690
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
26762691
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2681,6 +2696,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26812696
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
26822697
; GFX9-O0-NEXT: s_mov_b32 s14, s13
26832698
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2699+
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
26842700
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
26852701
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
26862702
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3216,10 +3232,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
32163232
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
32173233
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
32183234
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3219-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3220-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3221-
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3222-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3235+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3236+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3237+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3238+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
32233239
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
32243240
; GFX9-O0-NEXT: s_mov_b32 s5, s6
32253241
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)

llvm/test/CodeGen/AMDGPU/rem_i128.ll

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -513,21 +513,28 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
513513
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
514514
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
515515
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
516-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
517-
; GFX9-O0-NEXT: s_nop 0
518-
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
519516
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
520517
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
521518
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
522519
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
523-
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
520+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
521+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
522+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
523+
; GFX9-O0-NEXT: s_nop 0
524+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
525+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
526+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
527+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
524528
; GFX9-O0-NEXT: s_nop 0
525-
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
526-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
529+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
530+
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
531+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
527532
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
528-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
533+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
534+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
529535
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
530-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
536+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
537+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
531538
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
532539
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
533540
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -538,6 +545,7 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
538545
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
539546
; GFX9-O0-NEXT: s_mov_b32 s14, s13
540547
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
548+
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
541549
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
542550
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
543551
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1073,10 +1081,10 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) {
10731081
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10741082
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10751083
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1076-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1077-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1078-
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1079-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1084+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1085+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1086+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1087+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
10801088
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10811089
; GFX9-O0-NEXT: s_mov_b32 s5, s6
10821090
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -1889,21 +1897,28 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
18891897
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
18901898
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
18911899
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
1892-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
1893-
; GFX9-O0-NEXT: s_nop 0
1894-
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
18951900
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
18961901
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
18971902
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
18981903
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
1899-
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
1904+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
1905+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
1906+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
1907+
; GFX9-O0-NEXT: s_nop 0
1908+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
1909+
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
1910+
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
1911+
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
19001912
; GFX9-O0-NEXT: s_nop 0
1901-
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
1902-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
1913+
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
1914+
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
1915+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
19031916
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
1904-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
1917+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
1918+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
19051919
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
1906-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
1920+
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
1921+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
19071922
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
19081923
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
19091924
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -1914,6 +1929,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
19141929
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
19151930
; GFX9-O0-NEXT: s_mov_b32 s14, s13
19161931
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
1932+
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
19171933
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
19181934
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
19191935
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -2449,10 +2465,10 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) {
24492465
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
24502466
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
24512467
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
2452-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
2453-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
2454-
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
2455-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
2468+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
2469+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
2470+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
2471+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
24562472
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
24572473
; GFX9-O0-NEXT: s_mov_b32 s5, s6
24582474
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)

llvm/test/CodeGen/NVPTX/i1-select.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -94,27 +94,27 @@ define i32 @test_select_i1_basic(i32 %v1, i32 %v2, i32 %v3, i32 %true, i32 %fals
9494
define i32 @test_select_i1_basic_folding(i32 %v1, i32 %v2, i32 %v3, i32 %true, i32 %false) {
9595
; CHECK-LABEL: test_select_i1_basic_folding(
9696
; CHECK: {
97-
; CHECK-NEXT: .reg .pred %p<13>;
98-
; CHECK-NEXT: .reg .b32 %r<7>;
97+
; CHECK-NEXT: .reg .pred %p<12>;
98+
; CHECK-NEXT: .reg .b32 %r<9>;
9999
; CHECK-EMPTY:
100100
; CHECK-NEXT: // %bb.0:
101101
; CHECK-NEXT: ld.param.b32 %r1, [test_select_i1_basic_folding_param_0];
102102
; CHECK-NEXT: setp.eq.s32 %p1, %r1, 0;
103-
; CHECK-NEXT: ld.param.b32 %r2, [test_select_i1_basic_folding_param_1];
104-
; CHECK-NEXT: setp.ne.s32 %p2, %r2, 0;
105-
; CHECK-NEXT: setp.eq.s32 %p3, %r2, 0;
106-
; CHECK-NEXT: ld.param.b32 %r3, [test_select_i1_basic_folding_param_2];
107-
; CHECK-NEXT: setp.eq.s32 %p4, %r3, 0;
108-
; CHECK-NEXT: ld.param.b32 %r4, [test_select_i1_basic_folding_param_3];
103+
; CHECK-NEXT: ld.param.b32 %r3, [test_select_i1_basic_folding_param_1];
104+
; CHECK-NEXT: setp.ne.s32 %p2, %r3, 0;
105+
; CHECK-NEXT: setp.eq.s32 %p3, %r3, 0;
106+
; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_folding_param_2];
107+
; CHECK-NEXT: setp.eq.s32 %p4, %r5, 0;
108+
; CHECK-NEXT: ld.param.b32 %r6, [test_select_i1_basic_folding_param_3];
109109
; CHECK-NEXT: xor.pred %p6, %p1, %p3;
110-
; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_folding_param_4];
110+
; CHECK-NEXT: ld.param.b32 %r7, [test_select_i1_basic_folding_param_4];
111111
; CHECK-NEXT: and.pred %p7, %p6, %p4;
112-
; CHECK-NEXT: and.pred %p9, %p2, %p4;
113-
; CHECK-NEXT: and.pred %p10, %p3, %p7;
114-
; CHECK-NEXT: or.pred %p11, %p10, %p9;
115-
; CHECK-NEXT: xor.pred %p12, %p11, %p3;
116-
; CHECK-NEXT: selp.b32 %r6, %r4, %r5, %p12;
117-
; CHECK-NEXT: st.param.b32 [func_retval0], %r6;
112+
; CHECK-NEXT: and.pred %p8, %p2, %p4;
113+
; CHECK-NEXT: and.pred %p9, %p3, %p7;
114+
; CHECK-NEXT: or.pred %p10, %p9, %p8;
115+
; CHECK-NEXT: xor.pred %p11, %p10, %p3;
116+
; CHECK-NEXT: selp.b32 %r8, %r6, %r7, %p11;
117+
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
118118
; CHECK-NEXT: ret;
119119
%b1 = icmp eq i32 %v1, 0
120120
%b2 = icmp eq i32 %v2, 0

0 commit comments

Comments
 (0)