From 07c3871ae01b0c623bd83f113f354c9593c5abde Mon Sep 17 00:00:00 2001 From: PaperChalice Date: Sat, 9 Dec 2023 15:46:02 +0800 Subject: [PATCH] [CodeGen] Port `SelectOptimize` to new pass manager --- .../include/llvm/CodeGen/CodeGenPassBuilder.h | 6 +- .../llvm/CodeGen/MachinePassRegistry.def | 2 +- llvm/include/llvm/CodeGen/SelectOptimize.h | 34 ++++ llvm/lib/CodeGen/SelectOptimize.cpp | 174 ++++++++++++------ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/test/CodeGen/AArch64/O3-pipeline.ll | 3 + ...ert-highly-predictable-select-to-branch.ll | 4 + .../test/CodeGen/AArch64/selectopt-logical.ll | 1 + llvm/test/CodeGen/AArch64/selectopt.ll | 7 + llvm/test/CodeGen/X86/select-optimize.ll | 1 + 11 files changed, 171 insertions(+), 63 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/SelectOptimize.h diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h index bb139ef2eb351..8b0bd3a8f1b8a 100644 --- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h +++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" @@ -30,6 +31,7 @@ #include "llvm/CodeGen/PreISelIntrinsicLowering.h" #include "llvm/CodeGen/ReplaceWithVeclib.h" #include "llvm/CodeGen/SafeStack.h" +#include "llvm/CodeGen/SelectOptimize.h" #include "llvm/CodeGen/UnreachableBlockElim.h" #include "llvm/CodeGen/WasmEHPrepare.h" #include "llvm/CodeGen/WinEHPrepare.h" @@ -469,6 +471,8 @@ Error CodeGenPassBuilder::buildPipeline( raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, CodeGenFileType FileType) const { AddIRPass addIRPass(MPM, Opt.DebugPM); + // `ProfileSummaryInfo` is always valid. + addIRPass(RequireAnalysisPass()); addISelPasses(addIRPass); AddMachinePass addPass(MFPM); @@ -656,7 +660,7 @@ void CodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const { // Convert conditional moves to conditional jumps when profitable. if (getOptLevel() != CodeGenOptLevel::None && !Opt.DisableSelectOptimize) - addPass(SelectOptimizePass()); + addPass(SelectOptimizePass(&TM)); } /// Turn exception handling constructs into something the code generators can diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def index e6e979a4582c7..27845b4cdd374 100644 --- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def +++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def @@ -53,6 +53,7 @@ FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true)) FUNCTION_PASS("replace-with-veclib", ReplaceWithVeclib, ()) FUNCTION_PASS("safe-stack", SafeStackPass, (TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass, ()) +FUNCTION_PASS("select-optimize", SelectOptimizePass, (TM)) FUNCTION_PASS("tlshoist", TLSVariableHoistPass, ()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass, ()) FUNCTION_PASS("verify", VerifierPass, ()) @@ -128,7 +129,6 @@ DUMMY_FUNCTION_PASS("expandmemcmp", ExpandMemCmpPass, ()) DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ()) DUMMY_FUNCTION_PASS("gc-lowering", GCLoweringPass, ()) DUMMY_FUNCTION_PASS("indirectbr-expand", IndirectBrExpandPass, ()) -DUMMY_FUNCTION_PASS("select-optimize", SelectOptimizePass, ()) DUMMY_FUNCTION_PASS("shadow-stack-gc-lowering", ShadowStackGCLoweringPass, ()) DUMMY_FUNCTION_PASS("sjljehprepare", SjLjEHPreparePass, ()) DUMMY_FUNCTION_PASS("stack-protector", StackProtectorPass, ()) diff --git a/llvm/include/llvm/CodeGen/SelectOptimize.h b/llvm/include/llvm/CodeGen/SelectOptimize.h new file mode 100644 index 0000000000000..37024a154145a --- /dev/null +++ b/llvm/include/llvm/CodeGen/SelectOptimize.h @@ -0,0 +1,34 @@ +//===--- llvm/CodeGen/SelectOptimize.h ---------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the SelectOptimizePass class, +/// its corresponding pass name is `select-optimize`. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_SELECTOPTIMIZE_H +#define LLVM_CODEGEN_SELECTOPTIMIZE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class TargetMachine; + +class SelectOptimizePass : public PassInfoMixin { + const TargetMachine *TM; + +public: + explicit SelectOptimizePass(const TargetMachine *TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_SELECTOPTIMIZE_H diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 05413fb5d7582..1316919e65dac 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectOptimize.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -96,36 +97,22 @@ static cl::opt namespace { -class SelectOptimize : public FunctionPass { +class SelectOptimizeImpl { const TargetMachine *TM = nullptr; const TargetSubtargetInfo *TSI = nullptr; const TargetLowering *TLI = nullptr; const TargetTransformInfo *TTI = nullptr; const LoopInfo *LI = nullptr; - DominatorTree *DT = nullptr; - std::unique_ptr BFI; - std::unique_ptr BPI; + BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI = nullptr; OptimizationRemarkEmitter *ORE = nullptr; TargetSchedModel TSchedModel; public: - static char ID; - - SelectOptimize() : FunctionPass(ID) { - initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - } + SelectOptimizeImpl() = default; + SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){}; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + bool runOnFunction(Function &F, Pass &P); private: // Select groups consist of consecutive select instructions with the same @@ -211,29 +198,94 @@ class SelectOptimize : public FunctionPass { // Returns true if the target architecture supports lowering a given select. bool isSelectKindSupported(SelectInst *SI); }; + +class SelectOptimize : public FunctionPass { + SelectOptimizeImpl Impl; + +public: + static char ID; + + SelectOptimize() : FunctionPass(ID) { + initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + return Impl.runOnFunction(F, *this); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } +}; + } // namespace +PreservedAnalyses SelectOptimizePass::run(Function &F, + FunctionAnalysisManager &FAM) { + SelectOptimizeImpl Impl(TM); + return Impl.run(F, FAM); +} + char SelectOptimize::ID = 0; INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, false) FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } -bool SelectOptimize::runOnFunction(Function &F) { - TM = &getAnalysis().getTM(); +PreservedAnalyses SelectOptimizeImpl::run(Function &F, + FunctionAnalysisManager &FAM) { + TSI = TM->getSubtargetImpl(F); + TLI = TSI->getTargetLowering(); + + // If none of the select types are supported then skip this pass. + // This is an optimization pass. Legality issues will be handled by + // instruction selection. + if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && + !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && + !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) + return PreservedAnalyses::all(); + + TTI = &FAM.getResult(F); + if (!TTI->enableSelectOptimize()) + return PreservedAnalyses::all(); + + PSI = FAM.getResult(F) + .getCachedResult(*F.getParent()); + assert(PSI && "This pass requires module analysis pass `profile-summary`!"); + BFI = &FAM.getResult(F); + + // When optimizing for size, selects are preferable over branches. + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) + return PreservedAnalyses::all(); + + LI = &FAM.getResult(F); + ORE = &FAM.getResult(F); + TSchedModel.init(TSI); + + bool Changed = optimizeSelects(F); + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + +bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) { + TM = &P.getAnalysis().getTM(); TSI = TM->getSubtargetImpl(F); TLI = TSI->getTargetLowering(); - // If none of the select types is supported then skip this pass. + // If none of the select types are supported then skip this pass. // This is an optimization pass. Legality issues will be handled by // instruction selection. if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && @@ -241,27 +293,25 @@ bool SelectOptimize::runOnFunction(Function &F) { !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) return false; - TTI = &getAnalysis().getTTI(F); + TTI = &P.getAnalysis().getTTI(F); if (!TTI->enableSelectOptimize()) return false; - DT = &getAnalysis().getDomTree(); - LI = &getAnalysis().getLoopInfo(); - BPI.reset(new BranchProbabilityInfo(F, *LI)); - BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); - PSI = &getAnalysis().getPSI(); - ORE = &getAnalysis().getORE(); + LI = &P.getAnalysis().getLoopInfo(); + BFI = &P.getAnalysis().getBFI(); + PSI = &P.getAnalysis().getPSI(); + ORE = &P.getAnalysis().getORE(); TSchedModel.init(TSI); // When optimizing for size, selects are preferable over branches. - if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI.get())) + if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) return false; return optimizeSelects(F); } -bool SelectOptimize::optimizeSelects(Function &F) { +bool SelectOptimizeImpl::optimizeSelects(Function &F) { // Determine for which select groups it is profitable converting to branches. SelectGroups ProfSIGroups; // Base heuristics apply only to non-loops and outer loops. @@ -277,8 +327,8 @@ bool SelectOptimize::optimizeSelects(Function &F) { return !ProfSIGroups.empty(); } -void SelectOptimize::optimizeSelectsBase(Function &F, - SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::optimizeSelectsBase(Function &F, + SelectGroups &ProfSIGroups) { // Collect all the select groups. SelectGroups SIGroups; for (BasicBlock &BB : F) { @@ -293,8 +343,8 @@ void SelectOptimize::optimizeSelectsBase(Function &F, findProfitableSIGroupsBase(SIGroups, ProfSIGroups); } -void SelectOptimize::optimizeSelectsInnerLoops(Function &F, - SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F, + SelectGroups &ProfSIGroups) { SmallVector Loops(LI->begin(), LI->end()); // Need to check size on each iteration as we accumulate child loops. for (unsigned long i = 0; i < Loops.size(); ++i) @@ -331,7 +381,7 @@ getTrueOrFalseValue(SelectInst *SI, bool isTrue, return V; } -void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { for (SelectGroup &ASI : ProfSIGroups) { // The code transformation here is a modified version of the sinking // transformation in CodeGenPrepare::optimizeSelectInst with a more @@ -531,8 +581,8 @@ static bool isSpecialSelect(SelectInst *SI) { return false; } -void SelectOptimize::collectSelectGroups(BasicBlock &BB, - SelectGroups &SIGroups) { +void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, + SelectGroups &SIGroups) { BasicBlock::iterator BBIt = BB.begin(); while (BBIt != BB.end()) { Instruction *I = &*BBIt++; @@ -565,8 +615,8 @@ void SelectOptimize::collectSelectGroups(BasicBlock &BB, } } -void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, - SelectGroups &ProfSIGroups) { +void SelectOptimizeImpl::findProfitableSIGroupsBase( + SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { for (SelectGroup &ASI : SIGroups) { ++NumSelectOptAnalyzed; if (isConvertToBranchProfitableBase(ASI)) @@ -580,14 +630,14 @@ static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, ORE->emit(Rem); } -void SelectOptimize::findProfitableSIGroupsInnerLoops( +void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { NumSelectOptAnalyzed += SIGroups.size(); // For each select group in an inner-most loop, // a branch is more preferable than a select/conditional-move if: // i) conversion to branches for all the select groups of the loop satisfies // loop-level heuristics including reducing the loop's critical path by - // some threshold (see SelectOptimize::checkLoopHeuristics); and + // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and // ii) the total cost of the select group is cheaper with a branch compared // to its predicated version. The cost is in terms of latency and the cost // of a select group is the cost of its most expensive select instruction @@ -627,7 +677,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( } } -bool SelectOptimize::isConvertToBranchProfitableBase( +bool SelectOptimizeImpl::isConvertToBranchProfitableBase( const SmallVector &ASI) { SelectInst *SI = ASI.front(); LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n"); @@ -635,7 +685,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); // Skip cold basic blocks. Better to optimize for size for cold blocks. - if (PSI->isColdBlock(SI->getParent(), BFI.get())) { + if (PSI->isColdBlock(SI->getParent(), BFI)) { ++NumSelectColdBB; ORmiss << "Not converted to branch because of cold basic block. "; EmitAndPrintRemark(ORE, ORmiss); @@ -678,7 +728,7 @@ static InstructionCost divideNearest(InstructionCost Numerator, return (Numerator + (Denominator / 2)) / Denominator; } -bool SelectOptimize::hasExpensiveColdOperand( +bool SelectOptimizeImpl::hasExpensiveColdOperand( const SmallVector &ASI) { bool ColdOperand = false; uint64_t TrueWeight, FalseWeight, TotalWeight; @@ -752,9 +802,10 @@ static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) { // (sufficiently-accurate in practice), we populate this set with the // instructions of the backwards dependence slice that only have one-use and // form an one-use chain that leads to the source instruction. -void SelectOptimize::getExclBackwardsSlice(Instruction *I, - std::stack &Slice, - Instruction *SI, bool ForSinking) { +void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I, + std::stack &Slice, + Instruction *SI, + bool ForSinking) { SmallPtrSet Visited; std::queue Worklist; Worklist.push(I); @@ -798,7 +849,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, } } -bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { +bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectInst *SI) { uint64_t TrueWeight, FalseWeight; if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { uint64_t Max = std::max(TrueWeight, FalseWeight); @@ -812,8 +863,8 @@ bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { return false; } -bool SelectOptimize::checkLoopHeuristics(const Loop *L, - const CostInfo LoopCost[2]) { +bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L, + const CostInfo LoopCost[2]) { // Loop-level checks to determine if a non-predicated version (with branches) // of the loop is more profitable than its predicated version. @@ -881,7 +932,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, // and non-predicated version of the given loop. // Returns false if unable to compute these costs due to invalid cost of loop // instruction(s). -bool SelectOptimize::computeLoopCosts( +bool SelectOptimizeImpl::computeLoopCosts( const Loop *L, const SelectGroups &SIGroups, DenseMap &InstCostMap, CostInfo *LoopCost) { LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " @@ -969,7 +1020,7 @@ bool SelectOptimize::computeLoopCosts( } SmallPtrSet -SelectOptimize::getSIset(const SelectGroups &SIGroups) { +SelectOptimizeImpl::getSIset(const SelectGroups &SIGroups) { SmallPtrSet SIset; for (const SelectGroup &ASI : SIGroups) for (const SelectInst *SI : ASI) @@ -977,7 +1028,8 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) { return SIset; } -std::optional SelectOptimize::computeInstCost(const Instruction *I) { +std::optional +SelectOptimizeImpl::computeInstCost(const Instruction *I) { InstructionCost ICost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); if (auto OC = ICost.getValue()) @@ -986,8 +1038,8 @@ std::optional SelectOptimize::computeInstCost(const Instruction *I) { } ScaledNumber -SelectOptimize::getMispredictionCost(const SelectInst *SI, - const Scaled64 CondCost) { +SelectOptimizeImpl::getMispredictionCost(const SelectInst *SI, + const Scaled64 CondCost) { uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; // Account for the default misprediction rate when using a branch @@ -1012,8 +1064,8 @@ SelectOptimize::getMispredictionCost(const SelectInst *SI, // Returns the cost of a branch when the prediction is correct. // TrueCost * TrueProbability + FalseCost * FalseProbability. ScaledNumber -SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, - const SelectInst *SI) { +SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, + const SelectInst *SI) { Scaled64 PredPathCost; uint64_t TrueWeight, FalseWeight; if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { @@ -1033,7 +1085,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, return PredPathCost; } -bool SelectOptimize::isSelectKindSupported(SelectInst *SI) { +bool SelectOptimizeImpl::isSelectKindSupported(SelectInst *SI) { bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); if (VectorCond) return false; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f26450e941870..1d5fb122a1ae9 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -79,6 +79,7 @@ #include "llvm/CodeGen/HardwareLoops.h" #include "llvm/CodeGen/InterleavedAccess.h" #include "llvm/CodeGen/SafeStack.h" +#include "llvm/CodeGen/SelectOptimize.h" #include "llvm/CodeGen/TypePromotion.h" #include "llvm/CodeGen/WasmEHPrepare.h" #include "llvm/CodeGen/WinEHPrepare.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 56449906eb656..fe8e0e2c8c9b8 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -396,6 +396,7 @@ FUNCTION_PASS("safe-stack", SafeStackPass(TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("sccp", SCCPPass()) +FUNCTION_PASS("select-optimize", SelectOptimizePass(TM)) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sink", SinkingPass()) diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index f5c1c3c291cb5..638f26298ee26 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -66,6 +66,9 @@ ; CHECK-NEXT: Expand reduction intrinsics ; CHECK-NEXT: Natural Loop Information ; CHECK-NEXT: TLS Variable Hoist +; CHECK-NEXT: Post-Dominator Tree Construction +; CHECK-NEXT: Branch Probability Analysis +; CHECK-NEXT: Block Frequency Analysis ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter diff --git a/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll b/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll index 156ec400d5e7f..22fa8b1005335 100644 --- a/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll +++ b/llvm/test/CodeGen/AArch64/convert-highly-predictable-select-to-branch.ll @@ -3,6 +3,10 @@ ; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s ; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s ; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -S < %s | FileCheck %s +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=generic -S < %s | FileCheck %s --check-prefix=CHECK-GENERIC +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -S < %s | FileCheck %s ; Test has not predictable select, which should not be transformed to a branch define i32 @test1(i32 %a) { diff --git a/llvm/test/CodeGen/AArch64/selectopt-logical.ll b/llvm/test/CodeGen/AArch64/selectopt-logical.ll index 635922e272c4f..b8cc896b61d41 100644 --- a/llvm/test/CodeGen/AArch64/selectopt-logical.ll +++ b/llvm/test/CodeGen/AArch64/selectopt-logical.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s define i32 @test(ptr nocapture noundef readnone %x, i32 noundef %iters) { ; CHECK-LABEL: @test( diff --git a/llvm/test/CodeGen/AArch64/selectopt.ll b/llvm/test/CodeGen/AArch64/selectopt.ll index 46ac585b6d555..8922eba0406cc 100644 --- a/llvm/test/CodeGen/AArch64/selectopt.ll +++ b/llvm/test/CodeGen/AArch64/selectopt.ll @@ -6,6 +6,13 @@ ; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s --check-prefix=CHECKOO ; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=cortex-a710 -S < %s | FileCheck %s --check-prefix=CHECKOO ; RUN: opt -select-optimize -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=generic -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a55 -S < %s | FileCheck %s --check-prefix=CHECKII +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a510 -S < %s | FileCheck %s --check-prefix=CHECKII +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-n1 -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=cortex-a710 -S < %s | FileCheck %s --check-prefix=CHECKOO +; RUN: opt -passes='require,function(select-optimize)' -mtriple=aarch64-linux-gnu -mcpu=neoverse-v2 -S < %s | FileCheck %s --check-prefix=CHECKOO %struct.st = type { i32, i64, ptr, ptr, i16, ptr, ptr, i64, i64 } diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index a53dd36d813e3..6e44065440875 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple=x86_64-unknown-unknown -select-optimize -S < %s | FileCheck %s +; RUN: opt -mtriple=x86_64-unknown-unknown -passes='require,function(select-optimize)' -S < %s | FileCheck %s ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Test base heuristic 1: