Skip to content

Commit 3d04a3b

Browse files
committed
[SelectionDAG] Improve isGuaranteedNotToBeUndefOrPoison
Add special handling of EXTRACT_SUBVECTOR, INSERT_SUBVECTOR, EXTRACT_VECTOR_ELT, INSERT_VECTOR_ELT and SCALAR_TO_VECTOR in isGuaranteedNotToBeUndefOrPoison. Make use of DemandedElts to improve the analysis and only check relevant elements for each operand. Also start using DemandedElts in the recursive calls that check isGuaranteedNotToBeUndefOrPoison for all operands for operations that do not create undef/poison. We can do that for a number of elementwise operations for which the DemandedElts can be applied to every operand (e.g. ADD, OR, BITREVERSE, TRUNCATE).
1 parent 66da9f3 commit 3d04a3b

File tree

3 files changed

+146
-22
lines changed

3 files changed

+146
-22
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5461,6 +5461,83 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54615461
}
54625462
return true;
54635463

5464+
case ISD::EXTRACT_SUBVECTOR: {
5465+
SDValue Src = Op.getOperand(0);
5466+
if (Src.getValueType().isScalableVector())
5467+
break;
5468+
uint64_t Idx = Op.getConstantOperandVal(1);
5469+
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
5470+
APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
5471+
return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
5472+
Depth + 1);
5473+
}
5474+
5475+
case ISD::INSERT_SUBVECTOR: {
5476+
if (Op.getValueType().isScalableVector())
5477+
break;
5478+
SDValue Src = Op.getOperand(0);
5479+
SDValue Sub = Op.getOperand(1);
5480+
uint64_t Idx = Op.getConstantOperandVal(2);
5481+
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
5482+
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
5483+
APInt DemandedSrcElts = DemandedElts;
5484+
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
5485+
5486+
if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison(
5487+
Sub, DemandedSubElts, PoisonOnly, Depth + 1))
5488+
return false;
5489+
if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison(
5490+
Src, DemandedSrcElts, PoisonOnly, Depth + 1))
5491+
return false;
5492+
return true;
5493+
}
5494+
5495+
case ISD::EXTRACT_VECTOR_ELT: {
5496+
SDValue Src = Op.getOperand(0);
5497+
auto *IndexC = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5498+
EVT SrcVT = Src.getValueType();
5499+
if (SrcVT.isFixedLengthVector() && IndexC &&
5500+
IndexC->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
5501+
APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
5502+
IndexC->getZExtValue());
5503+
return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
5504+
Depth + 1);
5505+
}
5506+
break;
5507+
}
5508+
5509+
case ISD::INSERT_VECTOR_ELT: {
5510+
SDValue InVec = Op.getOperand(0);
5511+
SDValue InVal = Op.getOperand(1);
5512+
SDValue EltNo = Op.getOperand(2);
5513+
EVT VT = InVec.getValueType();
5514+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
5515+
if (IndexC && VT.isFixedLengthVector() &&
5516+
IndexC->getZExtValue() < VT.getVectorNumElements()) {
5517+
if (DemandedElts[IndexC->getZExtValue()] &&
5518+
!isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1))
5519+
return false;
5520+
APInt InVecDemandedElts = DemandedElts;
5521+
InVecDemandedElts.clearBit(IndexC->getZExtValue());
5522+
if (!!InVecDemandedElts &&
5523+
!isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
5524+
PoisonOnly, Depth + 1))
5525+
return false;
5526+
return true;
5527+
}
5528+
break;
5529+
}
5530+
5531+
case ISD::SCALAR_TO_VECTOR:
5532+
// Check upper (known undef) elements.
5533+
if (DemandedElts.ugt(1) && !PoisonOnly)
5534+
return false;
5535+
// Check element zero.
5536+
if (DemandedElts[0] && !isGuaranteedNotToBeUndefOrPoison(
5537+
Op.getOperand(0), PoisonOnly, Depth + 1))
5538+
return false;
5539+
return true;
5540+
54645541
case ISD::SPLAT_VECTOR:
54655542
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
54665543
Depth + 1);
@@ -5483,6 +5560,52 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54835560
return true;
54845561
}
54855562

5563+
case ISD::SHL:
5564+
case ISD::SRL:
5565+
case ISD::SRA:
5566+
// Shift amount operand is checked by canCreateUndefOrPoison. So it is
5567+
// enough to check operand 0 if Op can't create undef/poison.
5568+
return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
5569+
/*ConsiderFlags*/ true, Depth) &&
5570+
isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
5571+
PoisonOnly, Depth + 1);
5572+
5573+
case ISD::BSWAP:
5574+
case ISD::CTPOP:
5575+
case ISD::BITREVERSE:
5576+
case ISD::AND:
5577+
case ISD::OR:
5578+
case ISD::XOR:
5579+
case ISD::ADD:
5580+
case ISD::SUB:
5581+
case ISD::MUL:
5582+
case ISD::SADDSAT:
5583+
case ISD::UADDSAT:
5584+
case ISD::SSUBSAT:
5585+
case ISD::USUBSAT:
5586+
case ISD::SSHLSAT:
5587+
case ISD::USHLSAT:
5588+
case ISD::SMIN:
5589+
case ISD::SMAX:
5590+
case ISD::UMIN:
5591+
case ISD::UMAX:
5592+
case ISD::ZERO_EXTEND:
5593+
case ISD::SIGN_EXTEND:
5594+
case ISD::ANY_EXTEND:
5595+
case ISD::TRUNCATE:
5596+
case ISD::VSELECT: {
5597+
// If Op can't create undef/poison and none of its operands are undef/poison
5598+
// then Op is never undef/poison. A difference from the more common check
5599+
// below, outside the switch, is that we handle elementwise operations for
5600+
// which the DemandedElts mask is valid for all operands here.
5601+
return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
5602+
/*ConsiderFlags*/ true, Depth) &&
5603+
all_of(Op->ops(), [&](SDValue V) {
5604+
return isGuaranteedNotToBeUndefOrPoison(V, DemandedElts,
5605+
PoisonOnly, Depth + 1);
5606+
});
5607+
}
5608+
54865609
// TODO: Search for noundef attributes from library functions.
54875610

54885611
// TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.

llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,9 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
172172
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
173173
; X86-NEXT: sbbl %eax, %edi
174174
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
175-
; X86-NEXT: movl 52(%ebp), %ecx
176-
; X86-NEXT: movl %ecx, %edx
175+
; X86-NEXT: movl 52(%ebp), %esi
176+
; X86-NEXT: movl %esi, %edx
177177
; X86-NEXT: sarl $31, %edx
178-
; X86-NEXT: movl %ecx, %esi
179178
; X86-NEXT: xorl %edx, %esi
180179
; X86-NEXT: movl 48(%ebp), %ecx
181180
; X86-NEXT: xorl %edx, %ecx
@@ -204,14 +203,14 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
204203
; X86-NEXT: sete %al
205204
; X86-NEXT: orb %cl, %al
206205
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
207-
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
208-
; X86-NEXT: bsrl %eax, %edx
206+
; X86-NEXT: bsrl %esi, %edx
209207
; X86-NEXT: xorl $31, %edx
210-
; X86-NEXT: addl $32, %edx
211-
; X86-NEXT: bsrl %esi, %ecx
208+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
209+
; X86-NEXT: bsrl %eax, %ecx
212210
; X86-NEXT: xorl $31, %ecx
211+
; X86-NEXT: addl $32, %ecx
213212
; X86-NEXT: testl %esi, %esi
214-
; X86-NEXT: cmovel %edx, %ecx
213+
; X86-NEXT: cmovnel %edx, %ecx
215214
; X86-NEXT: bsrl %ebx, %edx
216215
; X86-NEXT: xorl $31, %edx
217216
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -226,14 +225,14 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
226225
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
227226
; X86-NEXT: orl %esi, %edx
228227
; X86-NEXT: cmovnel %ecx, %edi
229-
; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
230-
; X86-NEXT: xorl $31, %edx
231-
; X86-NEXT: addl $32, %edx
232228
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
233-
; X86-NEXT: bsrl %eax, %ecx
229+
; X86-NEXT: bsrl %eax, %edx
230+
; X86-NEXT: xorl $31, %edx
231+
; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
234232
; X86-NEXT: xorl $31, %ecx
233+
; X86-NEXT: addl $32, %ecx
235234
; X86-NEXT: testl %eax, %eax
236-
; X86-NEXT: cmovel %edx, %ecx
235+
; X86-NEXT: cmovnel %edx, %ecx
237236
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
238237
; X86-NEXT: bsrl %ebx, %esi
239238
; X86-NEXT: xorl $31, %esi
@@ -380,9 +379,9 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
380379
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
381380
; X86-NEXT: adcl $-1, %eax
382381
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
383-
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
384-
; X86-NEXT: adcl $-1, %eax
385-
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
382+
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
383+
; X86-NEXT: adcl $-1, %ecx
384+
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
386385
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
387386
; X86-NEXT: adcl $-1, %ecx
388387
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill

llvm/test/CodeGen/X86/pr62286.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ define i64 @PR62286(i32 %a) {
2828
; AVX1-NEXT: vmovd %edi, %xmm0
2929
; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
3030
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
31+
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
32+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
3133
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
32-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
3334
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
3435
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3536
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
@@ -58,12 +59,13 @@ define i64 @PR62286(i32 %a) {
5859
; AVX512-LABEL: PR62286:
5960
; AVX512: # %bb.0:
6061
; AVX512-NEXT: vmovd %edi, %xmm0
61-
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
62-
; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
63-
; AVX512-NEXT: movw $4369, %ax # imm = 0x1111
62+
; AVX512-NEXT: movb $8, %al
6463
; AVX512-NEXT: kmovd %eax, %k1
65-
; AVX512-NEXT: vpaddd %zmm0, %zmm0, %zmm1 {%k1}
66-
; AVX512-NEXT: vpmovsxdq %ymm1, %zmm0
64+
; AVX512-NEXT: vpexpandd %ymm0, %ymm1 {%k1} {z}
65+
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
66+
; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0
67+
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
68+
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
6769
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
6870
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
6971
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1

0 commit comments

Comments
 (0)