From 5b4197cc56474ea40342e21683cd80732c170389 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 13 May 2025 15:02:30 +0100 Subject: [PATCH 1/4] [SwitchLowering] Support merging 0 and power-of-2 case. --- .../llvm/CodeGen/GlobalISel/IRTranslator.h | 14 +- .../llvm/CodeGen/SwitchLoweringUtils.h | 5 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 23 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 16 +- llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 35 ++ .../AArch64/switch-cases-to-branch-and.ll | 322 ++++++++---------- 6 files changed, 211 insertions(+), 204 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 3d7ccd55ee042..c6af880794bfa 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -405,13 +405,13 @@ class IRTranslator : public MachineFunctionPass { BranchProbability UnhandledProbs, SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, bool FallthroughUnreachable); - bool lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond, - MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable, - BranchProbability UnhandledProbs, - MachineBasicBlock *CurMBB, - MachineIRBuilder &MIB, - MachineBasicBlock *SwitchMBB); + bool lowerSwitchAndOrRangeWorkItem(SwitchCG::CaseClusterIt I, Value *Cond, + MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable, + BranchProbability UnhandledProbs, + MachineBasicBlock *CurMBB, + MachineIRBuilder &MIB, + MachineBasicBlock *SwitchMBB); bool lowerBitTestWorkItem( SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h index 9f1d6f7b4f952..6b7cb8d9ce45a 100644 --- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h +++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h @@ -35,7 +35,8 @@ enum CaseClusterKind { /// A cluster of cases suitable for jump table lowering. CC_JumpTable, /// A cluster of cases suitable for bit test lowering. - CC_BitTests + CC_BitTests, + CC_And }; /// A cluster of case labels. @@ -141,6 +142,8 @@ struct CaseBlock { BranchProbability TrueProb, FalseProb; bool IsUnpredictable; + bool EmitAnd = false; + // Constructor for SelectionDAG. CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, const Value *cmpmiddle, MachineBasicBlock *truebb, diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 541269ab6bfce..a808a1232f8e9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1058,18 +1058,15 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W, } return true; } -bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, - Value *Cond, - MachineBasicBlock *Fallthrough, - bool FallthroughUnreachable, - BranchProbability UnhandledProbs, - MachineBasicBlock *CurMBB, - MachineIRBuilder &MIB, - MachineBasicBlock *SwitchMBB) { +bool IRTranslator::lowerSwitchAndOrRangeWorkItem( + SwitchCG::CaseClusterIt I, Value *Cond, MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable, BranchProbability UnhandledProbs, + MachineBasicBlock *CurMBB, MachineIRBuilder &MIB, + MachineBasicBlock *SwitchMBB) { using namespace SwitchCG; const Value *RHS, *LHS, *MHS; CmpInst::Predicate Pred; - if (I->Low == I->High) { + if (I->Low == I->High || I->Kind == CC_And) { // Check Cond == I->Low. Pred = CmpInst::ICMP_EQ; LHS = Cond; @@ -1087,6 +1084,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, // The false probability is the sum of all unhandled cases. CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs); + CB.EmitAnd = I->Kind == CC_And; emitSwitchCase(CB, SwitchMBB, MIB); return true; @@ -1326,10 +1324,11 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, } break; } + case CC_And: case CC_Range: { - if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough, - FallthroughUnreachable, UnhandledProbs, - CurMBB, MIB, SwitchMBB)) { + if (!lowerSwitchAndOrRangeWorkItem(I, Cond, Fallthrough, + FallthroughUnreachable, UnhandledProbs, + CurMBB, MIB, SwitchMBB)) { LLVM_DEBUG(dbgs() << "Failed to lower switch range"); return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 62ba801f69929..9dfcafc5b205d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2887,7 +2887,17 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType()); // Build the setcc now. - if (!CB.CmpMHS) { + if (CB.EmitAnd) { + SDLoc dl = getCurSDLoc(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), CB.CmpRHS->getType(), true); + SDValue C = DAG.getConstant(*cast(CB.CmpRHS), dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue CondLHS = getValue(CB.CmpLHS); + SDValue And = DAG.getNode(ISD::AND, dl, C.getValueType(), CondLHS, C); + Cond = DAG.getSetCC(dl, MVT::i1, And, Zero, ISD::SETEQ); + } else if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && @@ -12308,10 +12318,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } break; } + case CC_And: case CC_Range: { const Value *RHS, *LHS, *MHS; ISD::CondCode CC; - if (I->Low == I->High) { + if (I->Low == I->High || I->Kind == CC_And) { // Check Cond == I->Low. CC = ISD::SETEQ; LHS = Cond; @@ -12333,6 +12344,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, getCurSDLoc(), I->Prob, UnhandledProbs); + CB.EmitAnd = I->Kind == CC_And; if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index 038c499fe236e..e534e27416c87 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -362,6 +362,41 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters, } } Clusters.resize(DstIndex); + + // Check if the clusters contain one checking for 0 and another one checking + // for a power-of-2 constant with matching destinations. Those clusters can be + // combined to a single ane with CC_And. + unsigned ZeroIdx = -1; + for (const auto &[Idx, C] : enumerate(Clusters)) { + if (C.Kind != CC_Range || C.Low != C.High) + continue; + if (C.Low->isZero()) { + ZeroIdx = Idx; + break; + } + } + if (ZeroIdx == -1u) + return; + + unsigned Pow2Idx = -1; + for (const auto &[Idx, C] : enumerate(Clusters)) { + if (C.Kind != CC_Range || C.Low != C.High || C.MBB != Clusters[ZeroIdx].MBB) + continue; + if (C.Low->getValue().isPowerOf2()) { + Pow2Idx = Idx; + break; + } + } + if (Pow2Idx == -1u) + return; + + APInt Pow2 = Clusters[Pow2Idx].Low->getValue(); + APInt NewC = (Pow2 + 1) * -1; + Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC); + Clusters[ZeroIdx].High = ConstantInt::get(SI->getContext(), NewC); + Clusters[ZeroIdx].Kind = CC_And; + Clusters[ZeroIdx].Prob += Clusters[Pow2Idx].Prob; + Clusters.erase(Clusters.begin() + Pow2Idx); } bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters, diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll index 775ab3fe110e0..271ecf0889993 100644 --- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll +++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll @@ -5,57 +5,47 @@ define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) { ; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w8, #20 ; =0x14 +; CHECK-NEXT: mov w8, #100 ; =0x64 +; CHECK-NEXT: mov w9, #223 ; =0xdf ; CHECK-NEXT: LBB0_1: ; %loop.header ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w0, #0xff -; CHECK-NEXT: b.eq LBB0_6 +; CHECK-NEXT: tst w0, w9 +; CHECK-NEXT: b.eq LBB0_4 ; CHECK-NEXT: ; %bb.2: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: cmp w10, #32 -; CHECK-NEXT: b.eq LBB0_6 -; CHECK-NEXT: ; %bb.3: ; %loop.header -; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: and w10, w0, #0xff ; CHECK-NEXT: cmp w10, #124 -; CHECK-NEXT: b.eq LBB0_7 -; CHECK-NEXT: ; %bb.4: ; %loop.latch +; CHECK-NEXT: b.eq LBB0_5 +; CHECK-NEXT: ; %bb.3: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: subs w9, w9, #1 +; CHECK-NEXT: subs w8, w8, #1 ; CHECK-NEXT: b.ne LBB0_1 -; CHECK-NEXT: ; %bb.5: -; CHECK-NEXT: mov w8, #20 ; =0x14 -; CHECK-NEXT: LBB0_6: ; %common.ret -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: LBB0_4: +; CHECK-NEXT: mov w0, #20 ; =0x14 ; CHECK-NEXT: ret -; CHECK-NEXT: LBB0_7: ; %e2 +; CHECK-NEXT: LBB0_5: ; %e2 ; CHECK-NEXT: mov w0, #30 ; =0x1e ; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: mov w8, w0 +; GISEL-NEXT: mov w8, #100 ; =0x64 +; GISEL-NEXT: and w9, w0, #0xff ; GISEL-NEXT: mov w0, #20 ; =0x14 -; GISEL-NEXT: mov w9, #100 ; =0x64 ; GISEL-NEXT: LBB0_1: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ands w10, w8, #0xff -; GISEL-NEXT: b.eq LBB0_5 +; GISEL-NEXT: tbnz wzr, #0, LBB0_4 ; GISEL-NEXT: ; %bb.2: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; GISEL-NEXT: cmp w10, #32 +; GISEL-NEXT: cmp w9, #124 ; GISEL-NEXT: b.eq LBB0_5 -; GISEL-NEXT: ; %bb.3: ; %loop.header +; GISEL-NEXT: ; %bb.3: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; GISEL-NEXT: cmp w10, #124 -; GISEL-NEXT: b.eq LBB0_6 -; GISEL-NEXT: ; %bb.4: ; %loop.latch -; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; GISEL-NEXT: subs w9, w9, #1 +; GISEL-NEXT: subs w8, w8, #1 ; GISEL-NEXT: b.ne LBB0_1 -; GISEL-NEXT: LBB0_5: ; %common.ret +; GISEL-NEXT: LBB0_4: ; %common.ret ; GISEL-NEXT: ret -; GISEL-NEXT: LBB0_6: ; %e2 +; GISEL-NEXT: LBB0_5: ; %e2 ; GISEL-NEXT: mov w0, #30 ; =0x1e ; GISEL-NEXT: ret entry: @@ -84,59 +74,53 @@ e2: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(i8 %v) { ; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_swapped: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w8, #20 ; =0x14 +; CHECK-NEXT: mov w8, #100 ; =0x64 +; CHECK-NEXT: mov w9, #223 ; =0xdf ; CHECK-NEXT: LBB1_1: ; %loop.header ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w0, #0xff -; CHECK-NEXT: b.eq LBB1_6 +; CHECK-NEXT: tst w0, w9 +; CHECK-NEXT: b.eq LBB1_5 ; CHECK-NEXT: ; %bb.2: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: cmp w10, #32 -; CHECK-NEXT: b.eq LBB1_6 -; CHECK-NEXT: ; %bb.3: ; %loop.header -; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: and w10, w0, #0xff ; CHECK-NEXT: cmp w10, #124 -; CHECK-NEXT: b.eq LBB1_7 -; CHECK-NEXT: ; %bb.4: ; %loop.latch +; CHECK-NEXT: b.eq LBB1_6 +; CHECK-NEXT: ; %bb.3: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: subs w9, w9, #1 +; CHECK-NEXT: subs w8, w8, #1 ; CHECK-NEXT: b.ne LBB1_1 -; CHECK-NEXT: ; %bb.5: -; CHECK-NEXT: mov w8, #10 ; =0xa -; CHECK-NEXT: LBB1_6: ; %common.ret -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: mov w0, #10 ; =0xa +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_5: +; CHECK-NEXT: mov w0, #20 ; =0x14 ; CHECK-NEXT: ret -; CHECK-NEXT: LBB1_7: ; %e2 +; CHECK-NEXT: LBB1_6: ; %e2 ; CHECK-NEXT: mov w0, #30 ; =0x1e ; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_swapped: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: mov w9, #100 ; =0x64 -; GISEL-NEXT: mov w8, #20 ; =0x14 +; GISEL-NEXT: mov w8, #100 ; =0x64 +; GISEL-NEXT: and w9, w0, #0xff ; GISEL-NEXT: LBB1_1: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ands w10, w0, #0xff -; GISEL-NEXT: b.eq LBB1_6 +; GISEL-NEXT: tbnz wzr, #0, LBB1_5 ; GISEL-NEXT: ; %bb.2: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GISEL-NEXT: cmp w10, #32 +; GISEL-NEXT: cmp w9, #124 ; GISEL-NEXT: b.eq LBB1_6 -; GISEL-NEXT: ; %bb.3: ; %loop.header -; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GISEL-NEXT: cmp w10, #124 -; GISEL-NEXT: b.eq LBB1_7 -; GISEL-NEXT: ; %bb.4: ; %loop.latch +; GISEL-NEXT: ; %bb.3: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GISEL-NEXT: subs w9, w9, #1 +; GISEL-NEXT: subs w8, w8, #1 ; GISEL-NEXT: b.ne LBB1_1 -; GISEL-NEXT: ; %bb.5: -; GISEL-NEXT: mov w8, #10 ; =0xa -; GISEL-NEXT: LBB1_6: ; %common.ret -; GISEL-NEXT: mov w0, w8 +; GISEL-NEXT: ; %bb.4: +; GISEL-NEXT: mov w0, #10 ; =0xa +; GISEL-NEXT: ret +; GISEL-NEXT: LBB1_5: +; GISEL-NEXT: mov w0, #20 ; =0x14 ; GISEL-NEXT: ret -; GISEL-NEXT: LBB1_7: ; %e2 +; GISEL-NEXT: LBB1_6: ; %e2 ; GISEL-NEXT: mov w0, #30 ; =0x1e ; GISEL-NEXT: ret entry: @@ -168,65 +152,61 @@ e2: define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(i8 %v, i1 %c) { ; CHECK-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_with_phi: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: tbz w1, #0, LBB2_8 +; CHECK-NEXT: tbz w1, #0, LBB2_6 ; CHECK-NEXT: ; %bb.1: ; %loop.header.preheader -; CHECK-NEXT: mov w9, #100 ; =0x64 -; CHECK-NEXT: mov w8, #20 ; =0x14 +; CHECK-NEXT: mov w8, #100 ; =0x64 +; CHECK-NEXT: mov w9, #223 ; =0xdf ; CHECK-NEXT: LBB2_2: ; %loop.header ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w0, #0xff +; CHECK-NEXT: tst w0, w9 ; CHECK-NEXT: b.eq LBB2_7 ; CHECK-NEXT: ; %bb.3: ; %loop.header ; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: cmp w10, #32 -; CHECK-NEXT: b.eq LBB2_7 -; CHECK-NEXT: ; %bb.4: ; %loop.header -; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1 +; CHECK-NEXT: and w10, w0, #0xff ; CHECK-NEXT: cmp w10, #124 -; CHECK-NEXT: b.eq LBB2_9 -; CHECK-NEXT: ; %bb.5: ; %loop.latch +; CHECK-NEXT: b.eq LBB2_8 +; CHECK-NEXT: ; %bb.4: ; %loop.latch ; CHECK-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: subs w9, w9, #1 +; CHECK-NEXT: subs w8, w8, #1 ; CHECK-NEXT: b.ne LBB2_2 -; CHECK-NEXT: ; %bb.6: -; CHECK-NEXT: mov w8, #10 ; =0xa -; CHECK-NEXT: LBB2_7: ; %common.ret -; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ; %bb.5: +; CHECK-NEXT: mov w0, #10 ; =0xa ; CHECK-NEXT: ret -; CHECK-NEXT: LBB2_8: +; CHECK-NEXT: LBB2_6: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret -; CHECK-NEXT: LBB2_9: ; %e2 +; CHECK-NEXT: LBB2_7: +; CHECK-NEXT: mov w0, #20 ; =0x14 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB2_8: ; %e2 ; CHECK-NEXT: mov w0, #30 ; =0x1e ; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_with_phi: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: mov w8, wzr -; GISEL-NEXT: tbz w1, #0, LBB2_7 +; GISEL-NEXT: mov w8, w0 +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: tbz w1, #0, LBB2_6 ; GISEL-NEXT: ; %bb.1: ; %loop.header.preheader ; GISEL-NEXT: mov w9, #100 ; =0x64 -; GISEL-NEXT: mov w8, #20 ; =0x14 +; GISEL-NEXT: and w8, w8, #0xff ; GISEL-NEXT: LBB2_2: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ands w10, w0, #0xff -; GISEL-NEXT: b.eq LBB2_7 +; GISEL-NEXT: tbnz wzr, #0, LBB2_7 ; GISEL-NEXT: ; %bb.3: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GISEL-NEXT: cmp w10, #32 -; GISEL-NEXT: b.eq LBB2_7 -; GISEL-NEXT: ; %bb.4: ; %loop.header -; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GISEL-NEXT: cmp w10, #124 +; GISEL-NEXT: cmp w8, #124 ; GISEL-NEXT: b.eq LBB2_8 -; GISEL-NEXT: ; %bb.5: ; %loop.latch +; GISEL-NEXT: ; %bb.4: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 ; GISEL-NEXT: subs w9, w9, #1 ; GISEL-NEXT: b.ne LBB2_2 -; GISEL-NEXT: ; %bb.6: -; GISEL-NEXT: mov w8, #10 ; =0xa -; GISEL-NEXT: LBB2_7: ; %common.ret -; GISEL-NEXT: mov w0, w8 +; GISEL-NEXT: ; %bb.5: +; GISEL-NEXT: mov w0, #10 ; =0xa +; GISEL-NEXT: LBB2_6: ; %common.ret +; GISEL-NEXT: ret +; GISEL-NEXT: LBB2_7: +; GISEL-NEXT: mov w0, #20 ; =0x14 ; GISEL-NEXT: ret ; GISEL-NEXT: LBB2_8: ; %e2 ; GISEL-NEXT: mov w0, #30 ; =0x1e @@ -361,43 +341,36 @@ define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) { ; CHECK-NEXT: LBB4_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrb w9, [x8], #1 -; CHECK-NEXT: cbz w9, LBB4_4 +; CHECK-NEXT: tst w9, #0xffffffdf +; CHECK-NEXT: b.eq LBB4_4 ; CHECK-NEXT: ; %bb.2: ; %loop ; CHECK-NEXT: ; in Loop: Header=BB4_1 Depth=1 ; CHECK-NEXT: cmp w9, #124 -; CHECK-NEXT: b.eq LBB4_5 -; CHECK-NEXT: ; %bb.3: ; %loop -; CHECK-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; CHECK-NEXT: cmp w9, #32 ; CHECK-NEXT: b.ne LBB4_1 +; CHECK-NEXT: ; %bb.3: ; %e2.loopexit +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret ; CHECK-NEXT: LBB4_4: ; %e1 ; CHECK-NEXT: mov w0, #-1 ; =0xffffffff ; CHECK-NEXT: ret -; CHECK-NEXT: LBB4_5: ; %e2.loopexit -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_in_loop_with_matching_dests_0_and_pow2_3_cases: ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: add x8, x0, #1 ; GISEL-NEXT: LBB4_1: ; %loop ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: ldrb w9, [x8], #1 -; GISEL-NEXT: cbz w9, LBB4_4 +; GISEL-NEXT: tbnz wzr, #0, LBB4_4 ; GISEL-NEXT: ; %bb.2: ; %loop ; GISEL-NEXT: ; in Loop: Header=BB4_1 Depth=1 +; GISEL-NEXT: ldrb w9, [x8], #1 ; GISEL-NEXT: cmp w9, #124 -; GISEL-NEXT: b.eq LBB4_5 -; GISEL-NEXT: ; %bb.3: ; %loop -; GISEL-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; GISEL-NEXT: cmp w9, #32 ; GISEL-NEXT: b.ne LBB4_1 +; GISEL-NEXT: ; %bb.3: ; %e2.loopexit +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret ; GISEL-NEXT: LBB4_4: ; %e1 ; GISEL-NEXT: mov w0, #-1 ; =0xffffffff ; GISEL-NEXT: ret -; GISEL-NEXT: LBB4_5: ; %e2.loopexit -; GISEL-NEXT: mov w0, wzr -; GISEL-NEXT: ret entry: br label %loop @@ -581,13 +554,9 @@ define void @test_successor_with_loop_phi(ptr %A, ptr %B) { ; GISEL-NEXT: ldr w8, [x0] ; GISEL-NEXT: str wzr, [x0] ; GISEL-NEXT: mov x0, x1 -; GISEL-NEXT: cbz w8, LBB7_1 -; GISEL-NEXT: ; %bb.2: ; %loop -; GISEL-NEXT: ; in Loop: Header=BB7_1 Depth=1 -; GISEL-NEXT: cmp w8, #4 -; GISEL-NEXT: mov x0, x1 +; GISEL-NEXT: cmn w8, #5 ; GISEL-NEXT: b.eq LBB7_1 -; GISEL-NEXT: ; %bb.3: ; %exit +; GISEL-NEXT: ; %bb.2: ; %exit ; GISEL-NEXT: ret entry: br label %loop @@ -842,42 +811,38 @@ e1: define void @merge_with_stores(ptr %A, i16 %v) { ; CHECK-LABEL: merge_with_stores: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: sub w9, w8, #10 -; CHECK-NEXT: cmp w9, #2 -; CHECK-NEXT: b.lo LBB11_4 +; CHECK-NEXT: mov w8, #65533 ; =0xfffd +; CHECK-NEXT: tst w1, w8 +; CHECK-NEXT: b.eq LBB11_3 ; CHECK-NEXT: ; %bb.1: ; %entry -; CHECK-NEXT: cbz w8, LBB11_5 -; CHECK-NEXT: ; %bb.2: ; %entry +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: sub w8, w8, #10 ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.eq LBB11_5 -; CHECK-NEXT: ; %bb.3: ; %default.dst -; CHECK-NEXT: strh wzr, [x0] -; CHECK-NEXT: ret -; CHECK-NEXT: LBB11_4: ; %other.dst +; CHECK-NEXT: b.hs LBB11_4 +; CHECK-NEXT: ; %bb.2: ; %other.dst ; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: strh w8, [x0, #36] -; CHECK-NEXT: LBB11_5: ; %pow2.dst +; CHECK-NEXT: LBB11_3: ; %pow2.dst +; CHECK-NEXT: ret +; CHECK-NEXT: LBB11_4: ; %default.dst +; CHECK-NEXT: strh wzr, [x0] ; CHECK-NEXT: ret ; ; GISEL-LABEL: merge_with_stores: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: and w8, w1, #0xffff -; GISEL-NEXT: sub w9, w8, #10 -; GISEL-NEXT: cmp w9, #1 -; GISEL-NEXT: b.ls LBB11_4 +; GISEL-NEXT: tbnz wzr, #0, LBB11_3 ; GISEL-NEXT: ; %bb.1: ; %entry -; GISEL-NEXT: cbz w8, LBB11_5 -; GISEL-NEXT: ; %bb.2: ; %entry -; GISEL-NEXT: cmp w8, #2 -; GISEL-NEXT: b.eq LBB11_5 -; GISEL-NEXT: ; %bb.3: ; %default.dst -; GISEL-NEXT: strh wzr, [x0] -; GISEL-NEXT: ret -; GISEL-NEXT: LBB11_4: ; %other.dst +; GISEL-NEXT: mov w8, #-10 ; =0xfffffff6 +; GISEL-NEXT: add w8, w8, w1, uxth +; GISEL-NEXT: cmp w8, #1 +; GISEL-NEXT: b.hi LBB11_4 +; GISEL-NEXT: ; %bb.2: ; %other.dst ; GISEL-NEXT: mov w8, #1 ; =0x1 ; GISEL-NEXT: strh w8, [x0, #36] -; GISEL-NEXT: LBB11_5: ; %pow2.dst +; GISEL-NEXT: LBB11_3: ; %pow2.dst +; GISEL-NEXT: ret +; GISEL-NEXT: LBB11_4: ; %default.dst +; GISEL-NEXT: strh wzr, [x0] ; GISEL-NEXT: ret entry: switch i16 %v, label %default.dst [ @@ -903,63 +868,56 @@ pow2.dst: define void @switch_large_enough_for_clustering(i32 %x, ptr %dst) { ; CHECK-LABEL: switch_large_enough_for_clustering: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: b.le LBB12_5 +; CHECK-NEXT: cmp w0, #3 +; CHECK-NEXT: b.gt LBB12_4 ; CHECK-NEXT: ; %bb.1: ; %entry -; CHECK-NEXT: cmp w0, #7 -; CHECK-NEXT: b.eq LBB12_9 -; CHECK-NEXT: ; %bb.2: ; %entry +; CHECK-NEXT: tst w0, #0xfffffffd +; CHECK-NEXT: b.ne LBB12_7 +; CHECK-NEXT: ; %bb.2: ; %succ.1 +; CHECK-NEXT: strb wzr, [x1] +; CHECK-NEXT: LBB12_3: ; %exit +; CHECK-NEXT: ret +; CHECK-NEXT: LBB12_4: ; %entry ; CHECK-NEXT: cmp w0, #4 -; CHECK-NEXT: b.eq LBB12_7 -; CHECK-NEXT: ; %bb.3: ; %entry -; CHECK-NEXT: cmp w0, #2 ; CHECK-NEXT: b.eq LBB12_8 -; CHECK-NEXT: LBB12_4: ; %exit +; CHECK-NEXT: ; %bb.5: ; %entry +; CHECK-NEXT: cmp w0, #7 +; CHECK-NEXT: b.ne LBB12_3 +; CHECK-NEXT: ; %bb.6: ; %succ.3 +; CHECK-NEXT: strh wzr, [x1] ; CHECK-NEXT: ret -; CHECK-NEXT: LBB12_5: ; %entry -; CHECK-NEXT: cbz w0, LBB12_8 -; CHECK-NEXT: ; %bb.6: ; %entry +; CHECK-NEXT: LBB12_7: ; %entry ; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: b.ne LBB12_4 -; CHECK-NEXT: LBB12_7: ; %succ.2 +; CHECK-NEXT: b.ne LBB12_3 +; CHECK-NEXT: LBB12_8: ; %succ.2 ; CHECK-NEXT: str wzr, [x1] ; CHECK-NEXT: ret -; CHECK-NEXT: LBB12_8: ; %succ.1 -; CHECK-NEXT: strb wzr, [x1] -; CHECK-NEXT: ret -; CHECK-NEXT: LBB12_9: ; %succ.3 -; CHECK-NEXT: strh wzr, [x1] -; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_large_enough_for_clustering: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: cmp w0, #2 -; GISEL-NEXT: b.lt LBB12_5 -; GISEL-NEXT: ; %bb.1: ; %entry -; GISEL-NEXT: cmp w0, #7 -; GISEL-NEXT: b.eq LBB12_9 -; GISEL-NEXT: ; %bb.2: ; %entry ; GISEL-NEXT: cmp w0, #4 -; GISEL-NEXT: b.eq LBB12_7 -; GISEL-NEXT: ; %bb.3: ; %entry -; GISEL-NEXT: cmp w0, #2 +; GISEL-NEXT: b.ge LBB12_4 +; GISEL-NEXT: ; %bb.1: ; %entry +; GISEL-NEXT: cmn w0, #3 +; GISEL-NEXT: b.ne LBB12_7 +; GISEL-NEXT: ; %bb.2: ; %succ.1 +; GISEL-NEXT: strb wzr, [x1] +; GISEL-NEXT: LBB12_3: ; %exit +; GISEL-NEXT: ret +; GISEL-NEXT: LBB12_4: ; %entry ; GISEL-NEXT: b.eq LBB12_8 -; GISEL-NEXT: LBB12_4: ; %exit +; GISEL-NEXT: ; %bb.5: ; %entry +; GISEL-NEXT: cmp w0, #7 +; GISEL-NEXT: b.ne LBB12_3 +; GISEL-NEXT: ; %bb.6: ; %succ.3 +; GISEL-NEXT: strh wzr, [x1] ; GISEL-NEXT: ret -; GISEL-NEXT: LBB12_5: ; %entry -; GISEL-NEXT: cbz w0, LBB12_8 -; GISEL-NEXT: ; %bb.6: ; %entry +; GISEL-NEXT: LBB12_7: ; %entry ; GISEL-NEXT: cmp w0, #1 -; GISEL-NEXT: b.ne LBB12_4 -; GISEL-NEXT: LBB12_7: ; %succ.2 +; GISEL-NEXT: b.ne LBB12_3 +; GISEL-NEXT: LBB12_8: ; %succ.2 ; GISEL-NEXT: str wzr, [x1] ; GISEL-NEXT: ret -; GISEL-NEXT: LBB12_8: ; %succ.1 -; GISEL-NEXT: strb wzr, [x1] -; GISEL-NEXT: ret -; GISEL-NEXT: LBB12_9: ; %succ.3 -; GISEL-NEXT: strh wzr, [x1] -; GISEL-NEXT: ret entry: switch i32 %x, label %exit [ i32 0, label %succ.1 From e60c54fcf61969cb4e66386389962d8112d152e8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 28 May 2025 19:08:48 +0100 Subject: [PATCH 2/4] !fixup address latest comments, thanks! --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 ++- llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9dfcafc5b205d..be6e3774452cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -12322,7 +12322,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, case CC_Range: { const Value *RHS, *LHS, *MHS; ISD::CondCode CC; - if (I->Low == I->High || I->Kind == CC_And) { + if (I->Low == I->High) { // Check Cond == I->Low. CC = ISD::SETEQ; LHS = Cond; @@ -12330,6 +12330,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. + assert(I->Kind != CC_And && "CC_And must be handled above"); CC = ISD::SETLE; LHS = I->Low; MHS = Cond; diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index e534e27416c87..d65f99813599f 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -365,7 +365,7 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters, // Check if the clusters contain one checking for 0 and another one checking // for a power-of-2 constant with matching destinations. Those clusters can be - // combined to a single ane with CC_And. + // combined to a single one with CC_And. unsigned ZeroIdx = -1; for (const auto &[Idx, C] : enumerate(Clusters)) { if (C.Kind != CC_Range || C.Low != C.High) @@ -391,7 +391,7 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters, return; APInt Pow2 = Clusters[Pow2Idx].Low->getValue(); - APInt NewC = (Pow2 + 1) * -1; + APInt NewC = ~Pow2; Clusters[ZeroIdx].Low = ConstantInt::get(SI->getContext(), NewC); Clusters[ZeroIdx].High = ConstantInt::get(SI->getContext(), NewC); Clusters[ZeroIdx].Kind = CC_And; From 31b25a60b70c8efad34e68c8e3b0c8b552020759 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 28 May 2025 19:18:21 +0100 Subject: [PATCH 3/4] !fixup add missing GISel implementation. --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 9 +++- .../AArch64/switch-cases-to-branch-and.ll | 50 +++++++++++-------- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a808a1232f8e9..2374e33d16ee9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -930,7 +930,14 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, const LLT i1Ty = LLT::scalar(1); // Build the compare. - if (!CB.CmpMHS) { + if (CB.EmitAnd) { + const LLT Ty = getLLTForType(*CB.CmpRHS->getType(), *DL); + Register CondLHS = getOrCreateVReg(*CB.CmpLHS); + Register C = getOrCreateVReg(*CB.CmpRHS); + Register And = MIB.buildAnd(Ty, CondLHS, C).getReg(0); + auto Zero = MIB.buildConstant(Ty, 0); + Cond = MIB.buildICmp(CmpInst::ICMP_EQ, i1Ty, And, Zero).getReg(0); + } else if (!CB.CmpMHS) { const auto *CI = dyn_cast(CB.CmpRHS); // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll index 271ecf0889993..60ebe915e888a 100644 --- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll +++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll @@ -29,19 +29,22 @@ define i32 @switch_with_matching_dests_0_and_pow2_3_cases(i8 %v) { ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: mov w8, #100 ; =0x64 -; GISEL-NEXT: and w9, w0, #0xff +; GISEL-NEXT: mov w8, w0 +; GISEL-NEXT: mov w9, #100 ; =0x64 +; GISEL-NEXT: mov w10, #223 ; =0xdf ; GISEL-NEXT: mov w0, #20 ; =0x14 ; GISEL-NEXT: LBB0_1: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: tbnz wzr, #0, LBB0_4 +; GISEL-NEXT: tst w8, w10 +; GISEL-NEXT: b.eq LBB0_4 ; GISEL-NEXT: ; %bb.2: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; GISEL-NEXT: cmp w9, #124 +; GISEL-NEXT: and w11, w8, #0xff +; GISEL-NEXT: cmp w11, #124 ; GISEL-NEXT: b.eq LBB0_5 ; GISEL-NEXT: ; %bb.3: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; GISEL-NEXT: subs w8, w8, #1 +; GISEL-NEXT: subs w9, w9, #1 ; GISEL-NEXT: b.ne LBB0_1 ; GISEL-NEXT: LBB0_4: ; %common.ret ; GISEL-NEXT: ret @@ -102,13 +105,15 @@ define i32 @switch_with_matching_dests_0_and_pow2_3_cases_swapped(i8 %v) { ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_swapped: ; GISEL: ; %bb.0: ; %entry ; GISEL-NEXT: mov w8, #100 ; =0x64 -; GISEL-NEXT: and w9, w0, #0xff +; GISEL-NEXT: mov w9, #223 ; =0xdf ; GISEL-NEXT: LBB1_1: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: tbnz wzr, #0, LBB1_5 +; GISEL-NEXT: tst w0, w9 +; GISEL-NEXT: b.eq LBB1_5 ; GISEL-NEXT: ; %bb.2: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; GISEL-NEXT: cmp w9, #124 +; GISEL-NEXT: and w10, w0, #0xff +; GISEL-NEXT: cmp w10, #124 ; GISEL-NEXT: b.eq LBB1_6 ; GISEL-NEXT: ; %bb.3: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB1_1 Depth=1 @@ -184,26 +189,28 @@ define i32 @switch_with_matching_dests_0_and_pow2_3_cases_with_phi(i8 %v, i1 %c) ; ; GISEL-LABEL: switch_with_matching_dests_0_and_pow2_3_cases_with_phi: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: mov w8, w0 -; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: mov w8, wzr ; GISEL-NEXT: tbz w1, #0, LBB2_6 ; GISEL-NEXT: ; %bb.1: ; %loop.header.preheader -; GISEL-NEXT: mov w9, #100 ; =0x64 -; GISEL-NEXT: and w8, w8, #0xff +; GISEL-NEXT: mov w8, #100 ; =0x64 +; GISEL-NEXT: mov w9, #223 ; =0xdf ; GISEL-NEXT: LBB2_2: ; %loop.header ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: tbnz wzr, #0, LBB2_7 +; GISEL-NEXT: tst w0, w9 +; GISEL-NEXT: b.eq LBB2_7 ; GISEL-NEXT: ; %bb.3: ; %loop.header ; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GISEL-NEXT: cmp w8, #124 +; GISEL-NEXT: and w10, w0, #0xff +; GISEL-NEXT: cmp w10, #124 ; GISEL-NEXT: b.eq LBB2_8 ; GISEL-NEXT: ; %bb.4: ; %loop.latch ; GISEL-NEXT: ; in Loop: Header=BB2_2 Depth=1 -; GISEL-NEXT: subs w9, w9, #1 +; GISEL-NEXT: subs w8, w8, #1 ; GISEL-NEXT: b.ne LBB2_2 ; GISEL-NEXT: ; %bb.5: -; GISEL-NEXT: mov w0, #10 ; =0xa +; GISEL-NEXT: mov w8, #10 ; =0xa ; GISEL-NEXT: LBB2_6: ; %common.ret +; GISEL-NEXT: mov w0, w8 ; GISEL-NEXT: ret ; GISEL-NEXT: LBB2_7: ; GISEL-NEXT: mov w0, #20 ; =0x14 @@ -359,10 +366,11 @@ define i32 @switch_in_loop_with_matching_dests_0_and_pow2_3_cases(ptr %start) { ; GISEL-NEXT: add x8, x0, #1 ; GISEL-NEXT: LBB4_1: ; %loop ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: tbnz wzr, #0, LBB4_4 +; GISEL-NEXT: ldrb w9, [x8], #1 +; GISEL-NEXT: tst w9, #0xffffffdf +; GISEL-NEXT: b.eq LBB4_4 ; GISEL-NEXT: ; %bb.2: ; %loop ; GISEL-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; GISEL-NEXT: ldrb w9, [x8], #1 ; GISEL-NEXT: cmp w9, #124 ; GISEL-NEXT: b.ne LBB4_1 ; GISEL-NEXT: ; %bb.3: ; %e2.loopexit @@ -554,7 +562,7 @@ define void @test_successor_with_loop_phi(ptr %A, ptr %B) { ; GISEL-NEXT: ldr w8, [x0] ; GISEL-NEXT: str wzr, [x0] ; GISEL-NEXT: mov x0, x1 -; GISEL-NEXT: cmn w8, #5 +; GISEL-NEXT: tst w8, #0xfffffffb ; GISEL-NEXT: b.eq LBB7_1 ; GISEL-NEXT: ; %bb.2: ; %exit ; GISEL-NEXT: ret @@ -830,7 +838,9 @@ define void @merge_with_stores(ptr %A, i16 %v) { ; ; GISEL-LABEL: merge_with_stores: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: tbnz wzr, #0, LBB11_3 +; GISEL-NEXT: mov w8, #65533 ; =0xfffd +; GISEL-NEXT: tst w1, w8 +; GISEL-NEXT: b.eq LBB11_3 ; GISEL-NEXT: ; %bb.1: ; %entry ; GISEL-NEXT: mov w8, #-10 ; =0xfffffff6 ; GISEL-NEXT: add w8, w8, w1, uxth From 3e52d80725a4880fc591f89dddef6360e4d23a36 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 29 May 2025 17:05:02 +0100 Subject: [PATCH 4/4] !fixup don't perform transform with large clusters. --- llvm/lib/CodeGen/SwitchLoweringUtils.cpp | 6 ++ .../AArch64/switch-cases-to-branch-and.ll | 75 ++++++++++--------- 2 files changed, 47 insertions(+), 34 deletions(-) diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp index d65f99813599f..9c6448cb0e173 100644 --- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp +++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp @@ -363,6 +363,12 @@ void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters, } Clusters.resize(DstIndex); + // Don't try to fold clusters checking for zero and a power-of-2 constant, if + // larger ranges may be lowered as balanced binary trees later on, which won't + // work correctly after applying the transform below. + if (Clusters.size() > 4) + return; + // Check if the clusters contain one checking for 0 and another one checking // for a power-of-2 constant with matching destinations. Those clusters can be // combined to a single one with CC_And. diff --git a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll index 60ebe915e888a..716b7c0ee860c 100644 --- a/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll +++ b/llvm/test/CodeGen/AArch64/switch-cases-to-branch-and.ll @@ -878,56 +878,63 @@ pow2.dst: define void @switch_large_enough_for_clustering(i32 %x, ptr %dst) { ; CHECK-LABEL: switch_large_enough_for_clustering: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: cmp w0, #3 -; CHECK-NEXT: b.gt LBB12_4 +; CHECK-NEXT: cmp w0, #1 +; CHECK-NEXT: b.le LBB12_5 ; CHECK-NEXT: ; %bb.1: ; %entry -; CHECK-NEXT: tst w0, #0xfffffffd -; CHECK-NEXT: b.ne LBB12_7 -; CHECK-NEXT: ; %bb.2: ; %succ.1 -; CHECK-NEXT: strb wzr, [x1] -; CHECK-NEXT: LBB12_3: ; %exit -; CHECK-NEXT: ret -; CHECK-NEXT: LBB12_4: ; %entry +; CHECK-NEXT: cmp w0, #7 +; CHECK-NEXT: b.eq LBB12_9 +; CHECK-NEXT: ; %bb.2: ; %entry ; CHECK-NEXT: cmp w0, #4 +; CHECK-NEXT: b.eq LBB12_7 +; CHECK-NEXT: ; %bb.3: ; %entry +; CHECK-NEXT: cmp w0, #2 ; CHECK-NEXT: b.eq LBB12_8 -; CHECK-NEXT: ; %bb.5: ; %entry -; CHECK-NEXT: cmp w0, #7 -; CHECK-NEXT: b.ne LBB12_3 -; CHECK-NEXT: ; %bb.6: ; %succ.3 -; CHECK-NEXT: strh wzr, [x1] +; CHECK-NEXT: LBB12_4: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: LBB12_7: ; %entry +; CHECK-NEXT: LBB12_5: ; %entry +; CHECK-NEXT: cbz w0, LBB12_8 +; CHECK-NEXT: ; %bb.6: ; %entry ; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: b.ne LBB12_3 -; CHECK-NEXT: LBB12_8: ; %succ.2 +; CHECK-NEXT: b.ne LBB12_4 +; CHECK-NEXT: LBB12_7: ; %succ.2 ; CHECK-NEXT: str wzr, [x1] ; CHECK-NEXT: ret +; CHECK-NEXT: LBB12_8: ; %succ.1 +; CHECK-NEXT: strb wzr, [x1] +; CHECK-NEXT: ret +; CHECK-NEXT: LBB12_9: ; %succ.3 +; CHECK-NEXT: strh wzr, [x1] +; CHECK-NEXT: ret ; ; GISEL-LABEL: switch_large_enough_for_clustering: ; GISEL: ; %bb.0: ; %entry -; GISEL-NEXT: cmp w0, #4 -; GISEL-NEXT: b.ge LBB12_4 +; GISEL-NEXT: cmp w0, #2 +; GISEL-NEXT: b.lt LBB12_5 ; GISEL-NEXT: ; %bb.1: ; %entry -; GISEL-NEXT: cmn w0, #3 -; GISEL-NEXT: b.ne LBB12_7 -; GISEL-NEXT: ; %bb.2: ; %succ.1 -; GISEL-NEXT: strb wzr, [x1] -; GISEL-NEXT: LBB12_3: ; %exit -; GISEL-NEXT: ret -; GISEL-NEXT: LBB12_4: ; %entry -; GISEL-NEXT: b.eq LBB12_8 -; GISEL-NEXT: ; %bb.5: ; %entry ; GISEL-NEXT: cmp w0, #7 -; GISEL-NEXT: b.ne LBB12_3 -; GISEL-NEXT: ; %bb.6: ; %succ.3 -; GISEL-NEXT: strh wzr, [x1] +; GISEL-NEXT: b.eq LBB12_9 +; GISEL-NEXT: ; %bb.2: ; %entry +; GISEL-NEXT: cmp w0, #4 +; GISEL-NEXT: b.eq LBB12_7 +; GISEL-NEXT: ; %bb.3: ; %entry +; GISEL-NEXT: cmp w0, #2 +; GISEL-NEXT: b.eq LBB12_8 +; GISEL-NEXT: LBB12_4: ; %exit ; GISEL-NEXT: ret -; GISEL-NEXT: LBB12_7: ; %entry +; GISEL-NEXT: LBB12_5: ; %entry +; GISEL-NEXT: cbz w0, LBB12_8 +; GISEL-NEXT: ; %bb.6: ; %entry ; GISEL-NEXT: cmp w0, #1 -; GISEL-NEXT: b.ne LBB12_3 -; GISEL-NEXT: LBB12_8: ; %succ.2 +; GISEL-NEXT: b.ne LBB12_4 +; GISEL-NEXT: LBB12_7: ; %succ.2 ; GISEL-NEXT: str wzr, [x1] ; GISEL-NEXT: ret +; GISEL-NEXT: LBB12_8: ; %succ.1 +; GISEL-NEXT: strb wzr, [x1] +; GISEL-NEXT: ret +; GISEL-NEXT: LBB12_9: ; %succ.3 +; GISEL-NEXT: strh wzr, [x1] +; GISEL-NEXT: ret entry: switch i32 %x, label %exit [ i32 0, label %succ.1