Skip to content

Commit e2556d3

Browse files
committed
[SelectionDAG] Deal with POISON for INSERT_VECTOR_ELT/INSERT_SUBVECTOR (part 2)
Add support in isGuaranteedNotToBeUndefOrPoison to avoid regressions seen after a previous commit fixing #141034.
1 parent c695230 commit e2556d3

File tree

9 files changed

+205
-386
lines changed

9 files changed

+205
-386
lines changed

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,6 +1881,12 @@ LLVM_ABI SDValue peekThroughExtractSubvectors(SDValue V);
18811881
/// If \p V is not a truncation, it is returned as-is.
18821882
LLVM_ABI SDValue peekThroughTruncates(SDValue V);
18831883

1884+
/// Recursively peek through INSERT_VECTOR_ELT nodes, returning the source
1885+
/// vector operand of \p V, as long as \p V is an INSERT_VECTOR_ELT operation
1886+
/// that do not insert into any of the demanded vector elts.
1887+
LLVM_ABI SDValue peekThroughInsertVectorElt(SDValue V,
1888+
const APInt &DemandedElts);
1889+
18841890
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
18851891
/// constant is canonicalized to be operand 1.
18861892
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs = false);

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5454,6 +5454,59 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54545454
}
54555455
return true;
54565456

5457+
case ISD::INSERT_SUBVECTOR: {
5458+
if (Op.getValueType().isScalableVector())
5459+
break;
5460+
SDValue Src = Op.getOperand(0);
5461+
SDValue Sub = Op.getOperand(1);
5462+
uint64_t Idx = Op.getConstantOperandVal(2);
5463+
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
5464+
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
5465+
APInt DemandedSrcElts = DemandedElts;
5466+
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
5467+
5468+
if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison(
5469+
Sub, DemandedSubElts, PoisonOnly, Depth + 1))
5470+
return false;
5471+
if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison(
5472+
Src, DemandedSrcElts, PoisonOnly, Depth + 1))
5473+
return false;
5474+
return true;
5475+
}
5476+
5477+
case ISD::INSERT_VECTOR_ELT: {
5478+
SDValue InVec = Op.getOperand(0);
5479+
SDValue InVal = Op.getOperand(1);
5480+
SDValue EltNo = Op.getOperand(2);
5481+
EVT VT = InVec.getValueType();
5482+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
5483+
if (IndexC && VT.isFixedLengthVector() &&
5484+
IndexC->getZExtValue() < VT.getVectorNumElements()) {
5485+
if (DemandedElts[IndexC->getZExtValue()] &&
5486+
!isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1))
5487+
return false;
5488+
APInt InVecDemandedElts = DemandedElts;
5489+
InVecDemandedElts.clearBit(IndexC->getZExtValue());
5490+
if (!!InVecDemandedElts &&
5491+
!isGuaranteedNotToBeUndefOrPoison(
5492+
peekThroughInsertVectorElt(InVec, InVecDemandedElts),
5493+
InVecDemandedElts, PoisonOnly, Depth + 1))
5494+
return false;
5495+
return true;
5496+
}
5497+
break;
5498+
}
5499+
5500+
case ISD::SCALAR_TO_VECTOR:
5501+
// If only demanding upper (undef) elements.
5502+
if (DemandedElts.ugt(1))
5503+
return PoisonOnly;
5504+
// If only demanding element 0, or only considering poison.
5505+
if (PoisonOnly || DemandedElts == 0)
5506+
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
5507+
Depth + 1);
5508+
return false;
5509+
54575510
case ISD::SPLAT_VECTOR:
54585511
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
54595512
Depth + 1);
@@ -12471,6 +12524,23 @@ SDValue llvm::peekThroughTruncates(SDValue V) {
1247112524
return V;
1247212525
}
1247312526

12527+
SDValue llvm::peekThroughInsertVectorElt(SDValue V, const APInt &DemandedElts) {
12528+
while (V.getOpcode() == ISD::INSERT_VECTOR_ELT) {
12529+
SDValue InVec = V.getOperand(0);
12530+
SDValue EltNo = V.getOperand(2);
12531+
EVT VT = InVec.getValueType();
12532+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
12533+
if (IndexC && VT.isFixedLengthVector() &&
12534+
IndexC->getZExtValue() < VT.getVectorNumElements() &&
12535+
!DemandedElts[IndexC->getZExtValue()]) {
12536+
V = InVec;
12537+
continue;
12538+
}
12539+
break;
12540+
}
12541+
return V;
12542+
}
12543+
1247412544
bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
1247512545
if (V.getOpcode() != ISD::XOR)
1247612546
return false;

llvm/test/CodeGen/Thumb2/mve-vld3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -663,8 +663,8 @@ define void @vld3_v2i8(ptr %src, ptr %dst) {
663663
; CHECK: @ %bb.0: @ %entry
664664
; CHECK-NEXT: .pad #8
665665
; CHECK-NEXT: sub sp, #8
666-
; CHECK-NEXT: ldrd r2, r0, [r0]
667-
; CHECK-NEXT: strd r2, r0, [sp]
666+
; CHECK-NEXT: ldrd r0, r2, [r0]
667+
; CHECK-NEXT: strd r0, r2, [sp]
668668
; CHECK-NEXT: mov r0, sp
669669
; CHECK-NEXT: vldrb.u16 q0, [r0]
670670
; CHECK-NEXT: vmov.u16 r0, q0[4]

llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll

Lines changed: 22 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -262,54 +262,37 @@ define <4 x float> @merge_4f32_f32_45zz(ptr %ptr) nounwind uwtable noinline ssp
262262
define <4 x float> @merge_4f32_f32_012u(ptr %ptr) nounwind uwtable noinline ssp {
263263
; SSE2-LABEL: merge_4f32_f32_012u:
264264
; SSE2: # %bb.0:
265-
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
266265
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
267-
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
268-
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
269-
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
270-
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
266+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
267+
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
271268
; SSE2-NEXT: retq
272269
;
273270
; SSE41-LABEL: merge_4f32_f32_012u:
274271
; SSE41: # %bb.0:
275-
; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
276-
; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
277-
; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
278-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
279-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
280-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
272+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
273+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
281274
; SSE41-NEXT: retq
282275
;
283276
; AVX-LABEL: merge_4f32_f32_012u:
284277
; AVX: # %bb.0:
285-
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
286-
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
287-
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
288-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
289-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
290-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
278+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
279+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
291280
; AVX-NEXT: retq
292281
;
293282
; X86-SSE1-LABEL: merge_4f32_f32_012u:
294283
; X86-SSE1: # %bb.0:
295284
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
296-
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
285+
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
286+
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
297287
; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
298-
; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
299-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
300-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
301-
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
288+
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
302289
; X86-SSE1-NEXT: retl
303290
;
304291
; X86-SSE41-LABEL: merge_4f32_f32_012u:
305292
; X86-SSE41: # %bb.0:
306293
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
307-
; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
308-
; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
309-
; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
310-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
311-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
312-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
294+
; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
295+
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
313296
; X86-SSE41-NEXT: retl
314297
%ptr1 = getelementptr inbounds float, ptr %ptr, i64 1
315298
%ptr2 = getelementptr inbounds float, ptr %ptr, i64 2
@@ -326,54 +309,37 @@ define <4 x float> @merge_4f32_f32_012u(ptr %ptr) nounwind uwtable noinline ssp
326309
define <4 x float> @merge_4f32_f32_019u(ptr %ptr) nounwind uwtable noinline ssp {
327310
; SSE2-LABEL: merge_4f32_f32_019u:
328311
; SSE2: # %bb.0:
329-
; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
330312
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
331-
; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
332-
; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
333-
; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
334-
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
313+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
314+
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
335315
; SSE2-NEXT: retq
336316
;
337317
; SSE41-LABEL: merge_4f32_f32_019u:
338318
; SSE41: # %bb.0:
339-
; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
340-
; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
341-
; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
342-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
343-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
344-
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
319+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
320+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
345321
; SSE41-NEXT: retq
346322
;
347323
; AVX-LABEL: merge_4f32_f32_019u:
348324
; AVX: # %bb.0:
349-
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
350-
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
351-
; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
352-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
353-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
354-
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
325+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
326+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
355327
; AVX-NEXT: retq
356328
;
357329
; X86-SSE1-LABEL: merge_4f32_f32_019u:
358330
; X86-SSE1: # %bb.0:
359331
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
360-
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
332+
; X86-SSE1-NEXT: xorps %xmm0, %xmm0
333+
; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
361334
; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
362-
; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
363-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
364-
; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
365-
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
335+
; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
366336
; X86-SSE1-NEXT: retl
367337
;
368338
; X86-SSE41-LABEL: merge_4f32_f32_019u:
369339
; X86-SSE41: # %bb.0:
370340
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
371-
; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
372-
; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
373-
; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
374-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
375-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
376-
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
341+
; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
342+
; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
377343
; X86-SSE41-NEXT: retl
378344
%ptr1 = getelementptr inbounds float, ptr %ptr, i64 1
379345
%ptr2 = getelementptr inbounds float, ptr %ptr, i64 9

0 commit comments

Comments
 (0)