From db4d62a38eed0439572f8e276290a2aed92d90d2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 2 Sep 2025 12:26:26 +0100 Subject: [PATCH 1/2] [AArch64] canCreateUndefOrPoisonForTargetNode - AArch64ISD::VASHR can't create undef/poison We can always fold freeze(vashr(x,y)) -> vashr(freeze(x),freeze(y)) as VASHR has defined behaviour for out-of-range shift amounts. Test coverage can be tricky, so I've hijacked a ComputeNumSignBits test to show that value tracking can still analyse the VASHR node as the FREEZE will have been discarded by the canCreateUndefOrPoison/isGuaranteedNotToBeUndefOrPoison logic in getFreeze(). If this AArch64SelectionDAGTest.cpp approach is OK I'm intending to use it in #149323 once #155696 has landed. --- .../Target/AArch64/AArch64ISelLowering.cpp | 13 ++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 6 +++++ llvm/test/CodeGen/AArch64/vector-compress.ll | 25 +++++++++---------- .../AArch64/AArch64SelectionDAGTest.cpp | 3 +++ 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b7011e0ea1669..aaf95361a3b9a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -30702,6 +30702,19 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode( Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); } +bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { + + // TODO: Add more target nodes. + switch (Op.getOpcode()) { + case AArch64ISD::VASHR: + return false; + } + return TargetLowering::canCreateUndefOrPoisonForTargetNode( + Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth); +} + bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const { return Op.getOpcode() == AArch64ISD::DUP || Op.getOpcode() == AArch64ISD::MOVI || diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 46738365080f9..d06aa11cf1dd9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -868,6 +868,12 @@ class AArch64TargetLowering : public TargetLowering { TargetLoweringOpt &TLO, unsigned Depth) const override; + bool canCreateUndefOrPoisonForTargetNode(SDValue Op, + const APInt &DemandedElts, + const SelectionDAG &DAG, + bool PoisonOnly, bool ConsiderFlags, + unsigned Depth) const override; + bool isTargetCanonicalConstantNode(SDValue Op) const override; // With the exception of data-predicate transitions, no instructions are diff --git a/llvm/test/CodeGen/AArch64/vector-compress.ll b/llvm/test/CodeGen/AArch64/vector-compress.ll index a580913d40d95..67a0379d05244 100644 --- a/llvm/test/CodeGen/AArch64/vector-compress.ll +++ b/llvm/test/CodeGen/AArch64/vector-compress.ll @@ -12,16 +12,15 @@ define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) { ; CHECK-NEXT: shl.4s v1, v1, #31 ; CHECK-NEXT: cmlt.4s v1, v1, #0 ; CHECK-NEXT: mov.s w9, v1[1] -; CHECK-NEXT: mov.s w10, v1[2] ; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: mov.s w10, v1[2] +; CHECK-NEXT: and x12, x11, #0x1 ; CHECK-NEXT: bfi x8, x11, #2, #1 -; CHECK-NEXT: and x11, x11, #0x1 -; CHECK-NEXT: and x9, x9, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: add x9, x11, x9 ; CHECK-NEXT: mov x11, sp +; CHECK-NEXT: and x9, x9, #0x1 +; CHECK-NEXT: add x9, x12, x9 ; CHECK-NEXT: st1.s { v0 }[1], [x8] -; CHECK-NEXT: add w10, w9, w10 +; CHECK-NEXT: sub w10, w9, w10 ; CHECK-NEXT: orr x9, x11, x9, lsl #2 ; CHECK-NEXT: bfi x11, x10, #2, #2 ; CHECK-NEXT: st1.s { v0 }[2], [x9] @@ -93,7 +92,8 @@ define <2 x double> @test_compress_v2f64(<2 x double> %vec, <2 x i1> %mask) { ; CHECK-NEXT: shl.2d v1, v1, #63 ; CHECK-NEXT: cmlt.2d v1, v1, #0 ; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: bfi x8, x9, #3, #1 +; CHECK-NEXT: and x9, x9, #0x8 +; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: st1.d { v0 }[1], [x8] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret @@ -420,16 +420,15 @@ define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) { ; CHECK-NEXT: shl.4s v1, v1, #31 ; CHECK-NEXT: cmlt.4s v1, v1, #0 ; CHECK-NEXT: mov.s w8, v1[1] -; CHECK-NEXT: mov.s w9, v1[2] ; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: mov.s w9, v1[2] +; CHECK-NEXT: and x12, x10, #0x1 ; CHECK-NEXT: bfi x11, x10, #2, #1 -; CHECK-NEXT: and x10, x10, #0x1 -; CHECK-NEXT: and x8, x8, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: add x8, x10, x8 ; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: and x8, x8, #0x1 +; CHECK-NEXT: add x8, x12, x8 ; CHECK-NEXT: st1.s { v0 }[1], [x11] -; CHECK-NEXT: add w9, w8, w9 +; CHECK-NEXT: sub w9, w8, w9 ; CHECK-NEXT: orr x8, x10, x8, lsl #2 ; CHECK-NEXT: bfi x10, x9, #2, #2 ; CHECK-NEXT: st1.s { v0 }[2], [x8] diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp index f06f03bb35a5d..77a6c12e97373 100644 --- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp @@ -172,6 +172,9 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_VASHR) { auto VecA = DAG->getConstant(0xaa, Loc, VecVT); auto Op2 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, VecA, Shift); EXPECT_EQ(DAG->ComputeNumSignBits(Op2), 5u); + // VASHR can't create undef/poison - FREEZE(VASHR(C1,C2)) -> VASHR(C1,C2). + auto Fr2 = DAG->getFreeze(Op2); + EXPECT_EQ(DAG->ComputeNumSignBits(Fr2), 5u); } TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) { From 2cec029d4fea18f50c7564f54294f74d7deb8259 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Sep 2025 14:57:58 +0100 Subject: [PATCH 2/2] canCreateUndefOrPoisonForTargetNode - add VLSHR/VSHL handling --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 ++ .../Target/AArch64/AArch64SelectionDAGTest.cpp | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f2aea5f795d60..131a3ccc0a5d7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -30966,6 +30966,8 @@ bool AArch64TargetLowering::canCreateUndefOrPoisonForTargetNode( // TODO: Add more target nodes. switch (Op.getOpcode()) { case AArch64ISD::VASHR: + case AArch64ISD::VLSHR: + case AArch64ISD::VSHL: return false; } return TargetLowering::canCreateUndefOrPoisonForTargetNode( diff --git a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp index af4debb93aefa..0d86a084d1983 100644 --- a/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SelectionDAGTest.cpp @@ -526,6 +526,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VASHR) { Known = DAG->computeKnownBits(Op1); EXPECT_EQ(Known.Zero, APInt(8, 0x00)); EXPECT_EQ(Known.One, APInt(8, 0xFF)); + + auto Fr1 = DAG->getFreeze(Op1); + Known = DAG->computeKnownBits(Fr1); + EXPECT_EQ(Known.Zero, APInt(8, 0x00)); + EXPECT_EQ(Known.One, APInt(8, 0xFF)); } // Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits. @@ -546,6 +551,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VLSHR) { Known = DAG->computeKnownBits(Op1); EXPECT_EQ(Known.Zero, APInt(8, 0xFE)); EXPECT_EQ(Known.One, APInt(8, 0x1)); + + auto Fr1 = DAG->getFreeze(Op1); + Known = DAG->computeKnownBits(Fr1); + EXPECT_EQ(Known.Zero, APInt(8, 0xFE)); + EXPECT_EQ(Known.One, APInt(8, 0x1)); } // Piggy-backing on the AArch64 tests to verify SelectionDAG::computeKnownBits. @@ -566,6 +576,11 @@ TEST_F(AArch64SelectionDAGTest, ComputeKnownBits_VSHL) { Known = DAG->computeKnownBits(Op1); EXPECT_EQ(Known.Zero, APInt(8, 0x7F)); EXPECT_EQ(Known.One, APInt(8, 0x80)); + + auto Fr1 = DAG->getFreeze(Op1); + Known = DAG->computeKnownBits(Fr1); + EXPECT_EQ(Known.Zero, APInt(8, 0x7F)); + EXPECT_EQ(Known.One, APInt(8, 0x80)); } TEST_F(AArch64SelectionDAGTest, isSplatValue_Fixed_BUILD_VECTOR) {