Skip to content

Commit 6711099

Browse files
authored
[AArch64] canCreateUndefOrPoisonForTargetNode - AArch64ISD::VASHR\VLSHR\VSHL can't create undef/poison (#156445)
We can always fold freeze(VSHIFT(x,c)) -> VSHIFT(freeze(x),c) as VASHR\VLSHR\VSHL should always have an in-range constant shift amount. Test coverage can be tricky, so I've hijacked some computeKnownBits/ComputeNumSignBits tests to show that value tracking can still analyze the shift node as the FREEZE will have been discarded by the canCreateUndefOrPoison/isGuaranteedNotToBeUndefOrPoison logic in getFreeze().
1 parent 4b362f1 commit 6711099

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31004,6 +31004,9 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode(
3100431004
case AArch64ISD::MOVIshift:
3100531005
case AArch64ISD::MVNImsl:
3100631006
case AArch64ISD::MVNIshift:
31007+
case AArch64ISD::VASHR:
31008+
case AArch64ISD::VLSHR:
31009+
case AArch64ISD::VSHL:
3100731010
return false;
3100831011
}
3100931012
return TargetLowering::canCreateUndefOrPoisonForTargetNode(

llvm/test/CodeGen/AArch64/vector-compress.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,15 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) {
1212
; CHECK-NEXT: shl.4s v1, v1, #31
1313
; CHECK-NEXT: cmlt.4s v1, v1, #0
1414
; CHECK-NEXT: mov.s w9, v1[1]
15-
; CHECK-NEXT: mov.s w10, v1[2]
1615
; CHECK-NEXT: fmov w11, s1
16+
; CHECK-NEXT: mov.s w10, v1[2]
17+
; CHECK-NEXT: and x12, x11, #0x1
1718
; CHECK-NEXT: bfi x8, x11, #2, #1
18-
; CHECK-NEXT: and x11, x11, #0x1
19-
; CHECK-NEXT: and x9, x9, #0x1
20-
; CHECK-NEXT: and w10, w10, #0x1
21-
; CHECK-NEXT: add x9, x11, x9
2219
; CHECK-NEXT: mov x11, sp
20+
; CHECK-NEXT: and x9, x9, #0x1
21+
; CHECK-NEXT: add x9, x12, x9
2322
; CHECK-NEXT: st1.s { v0 }[1], [x8]
24-
; CHECK-NEXT: add w10, w9, w10
23+
; CHECK-NEXT: sub w10, w9, w10
2524
; CHECK-NEXT: orr x9, x11, x9, lsl #2
2625
; CHECK-NEXT: bfi x11, x10, #2, #2
2726
; CHECK-NEXT: st1.s { v0 }[2], [x9]
@@ -93,7 +92,8 @@ define <2 x double> @test_compress_v2f64(<2 x double> %vec, <2 x i1> %mask) {
9392
; CHECK-NEXT: shl.2d v1, v1, #63
9493
; CHECK-NEXT: cmlt.2d v1, v1, #0
9594
; CHECK-NEXT: fmov x9, d1
96-
; CHECK-NEXT: bfi x8, x9, #3, #1
95+
; CHECK-NEXT: and x9, x9, #0x8
96+
; CHECK-NEXT: orr x8, x8, x9
9797
; CHECK-NEXT: st1.d { v0 }[1], [x8]
9898
; CHECK-NEXT: ldr q0, [sp], #16
9999
; CHECK-NEXT: ret
@@ -420,16 +420,15 @@ define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) {
420420
; CHECK-NEXT: shl.4s v1, v1, #31
421421
; CHECK-NEXT: cmlt.4s v1, v1, #0
422422
; CHECK-NEXT: mov.s w8, v1[1]
423-
; CHECK-NEXT: mov.s w9, v1[2]
424423
; CHECK-NEXT: fmov w10, s1
424+
; CHECK-NEXT: mov.s w9, v1[2]
425+
; CHECK-NEXT: and x12, x10, #0x1
425426
; CHECK-NEXT: bfi x11, x10, #2, #1
426-
; CHECK-NEXT: and x10, x10, #0x1
427-
; CHECK-NEXT: and x8, x8, #0x1
428-
; CHECK-NEXT: and w9, w9, #0x1
429-
; CHECK-NEXT: add x8, x10, x8
430427
; CHECK-NEXT: mov x10, sp
428+
; CHECK-NEXT: and x8, x8, #0x1
429+
; CHECK-NEXT: add x8, x12, x8
431430
; CHECK-NEXT: st1.s { v0 }[1], [x11]
432-
; CHECK-NEXT: add w9, w8, w9
431+
; CHECK-NEXT: sub w9, w8, w9
433432
; CHECK-NEXT: orr x8, x10, x8, lsl #2
434433
; CHECK-NEXT: bfi x10, x9, #2, #2
435434
; CHECK-NEXT: st1.s { v0 }[2], [x8]

llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,9 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_VASHR) {
172172
auto VecA = DAG->getConstant(0xaa, Loc, VecVT);
173173
auto Op2 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, VecA, Shift);
174174
EXPECT_EQ(DAG->ComputeNumSignBits(Op2), 5u);
175+
// VASHR can't create undef/poison - FREEZE(VASHR(C1,C2)) -> VASHR(C1,C2).
176+
auto Fr2 = DAG->getFreeze(Op2);
177+
EXPECT_EQ(DAG->ComputeNumSignBits(Fr2), 5u);
175178
}
176179

177180
TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) {
@@ -564,6 +567,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VASHR) {
564567
Known = DAG->computeKnownBits(Op1);
565568
EXPECT_EQ(Known.Zero, APInt(8, 0x00));
566569
EXPECT_EQ(Known.One, APInt(8, 0xFF));
570+
571+
auto Fr1 = DAG->getFreeze(Op1);
572+
Known = DAG->computeKnownBits(Fr1);
573+
EXPECT_EQ(Known.Zero, APInt(8, 0x00));
574+
EXPECT_EQ(Known.One, APInt(8, 0xFF));
567575
}
568576

569577
// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.
@@ -584,6 +592,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VLSHR) {
584592
Known = DAG->computeKnownBits(Op1);
585593
EXPECT_EQ(Known.Zero, APInt(8, 0xFE));
586594
EXPECT_EQ(Known.One, APInt(8, 0x1));
595+
596+
auto Fr1 = DAG->getFreeze(Op1);
597+
Known = DAG->computeKnownBits(Fr1);
598+
EXPECT_EQ(Known.Zero, APInt(8, 0xFE));
599+
EXPECT_EQ(Known.One, APInt(8, 0x1));
587600
}
588601

589602
// Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits.
@@ -604,6 +617,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VSHL) {
604617
Known = DAG->computeKnownBits(Op1);
605618
EXPECT_EQ(Known.Zero, APInt(8, 0x7F));
606619
EXPECT_EQ(Known.One, APInt(8, 0x80));
620+
621+
auto Fr1 = DAG->getFreeze(Op1);
622+
Known = DAG->computeKnownBits(Fr1);
623+
EXPECT_EQ(Known.Zero, APInt(8, 0x7F));
624+
EXPECT_EQ(Known.One, APInt(8, 0x80));
607625
}
608626

609627
TEST_F(AArch64SelectionDAGTest, isSplatValue_Fixed_BUILD_VECTOR) {

0 commit comments

Comments
 (0)