Skip to content

Commit 0f3cb82

Browse files
committed
[DAG] visitFREEZE - limit freezing of multiple operands
This is a partial revert of #145939 (I've kept the BUILD_VECTOR(FREEZE(UNDEF), FREEZE(UNDEF), elt2, ...) canonicalization) as we're getting reports of infinite loops (#148084). The issue appears to be due to deep chains of nodes and how visitFREEZE replaces all instances of a node with a common frozen version - the other users then get added back to the worklist but might no longer be able to confirm a node isn't poison due to recursion depth limits on isGuaranteedNotToBeUndefOrPoison. The issue still exists with the old implementation but by only allowing a single frozen operand it helps prevent cases of interdependent frozen nodes. I'm still working on supporting multiple operands as its critical for topological DAG handling but need to get a fix in for trunk and 21.x.
1 parent 95201b2 commit 0f3cb82

19 files changed

+2070
-1960
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16738,14 +16738,27 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1673816738
// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
1673916739
// Try to push freeze through instructions that propagate but don't produce
1674016740
// poison as far as possible. If an operand of freeze follows three
16741-
// conditions 1) one-use, and 2) does not produce poison then push
16741+
// conditions 1) one-use, 2) does not produce poison, and 3) has all but one
16742+
// guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
1674216743
// the freeze through to the operands that are not guaranteed non-poison.
1674316744
// NOTE: we will strip poison-generating flags, so ignore them here.
1674416745
if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
1674516746
/*ConsiderFlags*/ false) ||
1674616747
N0->getNumValues() != 1 || !N0->hasOneUse())
1674716748
return SDValue();
1674816749

16750+
// TOOD: we should always allow multiple operands, however this increases the
16751+
// likelihood of infinite loops due to the ReplaceAllUsesOfValueWith call
16752+
// below causing later nodes that share frozen operands to fold again and no
16753+
// longer being able to confirm other operands are not poison due to recursion
16754+
// depth limits on isGuaranteedNotToBeUndefOrPoison.
16755+
bool AllowMultipleMaybePoisonOperands =
16756+
N0.getOpcode() == ISD::SELECT_CC || N0.getOpcode() == ISD::SETCC ||
16757+
N0.getOpcode() == ISD::BUILD_VECTOR ||
16758+
N0.getOpcode() == ISD::BUILD_PAIR ||
16759+
N0.getOpcode() == ISD::VECTOR_SHUFFLE ||
16760+
N0.getOpcode() == ISD::CONCAT_VECTORS || N0.getOpcode() == ISD::FMUL;
16761+
1674916762
// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
1675016763
// ones" or "constant" into something that depends on FrozenUndef. We can
1675116764
// instead pick undef values to keep those properties, while at the same time
@@ -16772,8 +16785,16 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
1677216785
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
1677316786
/*Depth*/ 1))
1677416787
continue;
16775-
if (MaybePoisonOperands.insert(Op).second)
16788+
bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
16789+
bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op).second;
16790+
if (IsNewMaybePoisonOperand)
1677616791
MaybePoisonOperandNumbers.push_back(OpNo);
16792+
if (!HadMaybePoisonOperands)
16793+
continue;
16794+
if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
16795+
// Multiple maybe-poison ops when not allowed - bail out.
16796+
return SDValue();
16797+
}
1677716798
}
1677816799
// NOTE: the whole op may be not guaranteed to not be undef or poison because
1677916800
// it could create undef or poison due to it's poison-generating flags.

llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7791,7 +7791,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
77917791
;
77927792
; GFX6-LABEL: sdiv_i64_pow2_shl_denom:
77937793
; GFX6: ; %bb.0:
7794-
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
7794+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
77957795
; GFX6-NEXT: s_mov_b32 s7, 0xf000
77967796
; GFX6-NEXT: s_mov_b32 s6, -1
77977797
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
@@ -7927,7 +7927,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
79277927
;
79287928
; GFX9-LABEL: sdiv_i64_pow2_shl_denom:
79297929
; GFX9: ; %bb.0:
7930-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
7930+
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
79317931
; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
79327932
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
79337933
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0
@@ -8982,7 +8982,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
89828982
;
89838983
; GFX6-LABEL: srem_i64_pow2_shl_denom:
89848984
; GFX6: ; %bb.0:
8985-
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
8985+
; GFX6-NEXT: s_load_dword s0, s[4:5], 0xd
89868986
; GFX6-NEXT: s_mov_b32 s7, 0xf000
89878987
; GFX6-NEXT: s_mov_b32 s6, -1
89888988
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
@@ -9116,7 +9116,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
91169116
;
91179117
; GFX9-LABEL: srem_i64_pow2_shl_denom:
91189118
; GFX9: ; %bb.0:
9119-
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x34
9119+
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x34
91209120
; GFX9-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
91219121
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
91229122
; GFX9-NEXT: s_lshl_b64 s[0:1], 0x1000, s0

llvm/test/CodeGen/AMDGPU/div_i128.ll

Lines changed: 24 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -475,28 +475,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475475
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476476
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477477
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479+
; GFX9-O0-NEXT: s_nop 0
480+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
478481
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
479482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480483
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
481484
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
482-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483-
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485-
; GFX9-O0-NEXT: s_nop 0
486-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488-
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
485+
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
490486
; GFX9-O0-NEXT: s_nop 0
491-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
487+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
494489
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
495-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
490+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
497491
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
498-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
492+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
500493
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
501494
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
502495
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -507,7 +500,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
507500
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
508501
; GFX9-O0-NEXT: s_mov_b32 s14, s13
509502
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
510-
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
511503
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
512504
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
513505
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1046,10 +1038,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10461038
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10471039
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10481040
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1049-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1050-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1051-
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1052-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1041+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1042+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1043+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1044+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10531045
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10541046
; GFX9-O0-NEXT: s_mov_b32 s5, s6
10551047
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2667,28 +2659,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26672659
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26682660
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
26692661
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2662+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2663+
; GFX9-O0-NEXT: s_nop 0
2664+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
26702665
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26712666
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26722667
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
26732668
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2674-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2675-
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2676-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2677-
; GFX9-O0-NEXT: s_nop 0
2678-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2679-
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2680-
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2681-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2669+
; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
26822670
; GFX9-O0-NEXT: s_nop 0
2683-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2684-
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2685-
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2671+
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2672+
; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
26862673
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2687-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2688-
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2674+
; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
26892675
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2690-
; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2691-
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2676+
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
26922677
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
26932678
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
26942679
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2699,7 +2684,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26992684
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
27002685
; GFX9-O0-NEXT: s_mov_b32 s14, s13
27012686
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2702-
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
27032687
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
27042688
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
27052689
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3238,10 +3222,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
32383222
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
32393223
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
32403224
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3241-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3242-
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3243-
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3244-
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3225+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3226+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3227+
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3228+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
32453229
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
32463230
; GFX9-O0-NEXT: s_mov_b32 s5, s6
32473231
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)

0 commit comments

Comments
 (0)