diff --git a/llvm/include/llvm/CodeGen/SSAIfConv.h b/llvm/include/llvm/CodeGen/SSAIfConv.h new file mode 100644 index 0000000000000..07e47319af094 --- /dev/null +++ b/llvm/include/llvm/CodeGen/SSAIfConv.h @@ -0,0 +1,166 @@ +//===- llvm/CodeGen/SSAIfConv.h - SSAIfConv ----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The SSAIfConv class performs if-conversion on SSA form machine code after +// determining if it is possible. The class contains no heuristics; external +// code should be used to determine when if-conversion is a good idea. +// +// SSAIfConv can convert both triangles and diamonds: +// +// Triangle: Head Diamond: Head +// | \ / \_ +// | \ / | +// | [TF]BB FBB TBB +// | / \ / +// | / \ / +// Tail Tail +// +// Instructions in the conditional blocks TBB and/or FBB are spliced into the +// Head block, and phis in the Tail block are converted to select instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" + +#ifndef LLVM_CODEGEN_SSA_IF_CONV_H +#define LLVM_CODEGEN_SSA_IF_CONV_H +namespace llvm { +class SSAIfConv { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + +public: + /// The block containing the conditional branch. + MachineBasicBlock *Head; + + /// The block containing phis after the if-then-else. + MachineBasicBlock *Tail; + + /// The 'true' conditional block as determined by analyzeBranch. + MachineBasicBlock *TBB; + + /// The 'false' conditional block as determined by analyzeBranch. + MachineBasicBlock *FBB; + + /// isTriangle - When there is no 'else' block, either TBB or FBB will be + /// equal to Tail. + bool isTriangle() const { return TBB == Tail || FBB == Tail; } + + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + + /// Information about each phi in the Tail block. + struct PHIInfo { + MachineInstr *PHI; + unsigned TReg = 0, FReg = 0; + // Latencies from Cond+Branch, TReg, and FReg to DstReg. + int CondCycles = 0, TCycles = 0, FCycles = 0; + + PHIInfo(MachineInstr *phi) : PHI(phi) {} + }; + + SmallVector PHIs; + + /// The branch condition determined by analyzeBranch. + SmallVector Cond; + + struct PredicationStrategyBase { + virtual bool canConvertIf(MachineBasicBlock *Head, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *Tail, + ArrayRef Cond) { + return true; + } + virtual bool canPredicateInstr(const MachineInstr &I) = 0; + /// Apply cost model and heuristics to the if-conversion in IfConv. + /// Return true if the conversion is a good idea. + virtual bool shouldConvertIf(SSAIfConv &) = 0; + virtual void predicateBlock(MachineBasicBlock *MBB, + ArrayRef Cond, + bool Reverse) = 0; + virtual ~PredicationStrategyBase() = default; + }; + + PredicationStrategyBase &Predicate; + +public: + SSAIfConv(PredicationStrategyBase &Predicate, MachineFunction &MF, + MachineDominatorTree *DomTree, MachineLoopInfo *Loops, + MachineTraceMetrics *Traces = nullptr); + + bool run(); + + MachineTraceMetrics::Ensemble *getEnsemble(MachineTraceStrategy S); + +private: + /// Instructions in Head that define values used by the conditional blocks. + /// The hoisted instructions must be inserted after these instructions. + SmallPtrSet InsertAfter; + + /// Register units clobbered by the conditional blocks. + BitVector ClobberedRegUnits; + + // Scratch pad for findInsertionPoint. + SparseSet LiveRegUnits; + + /// Insertion point in Head for speculatively executed instructions form TBB + /// and FBB. + MachineBasicBlock::iterator InsertionPoint; + + /// Return true if all non-terminator instructions in MBB can be safely + /// predicated. + bool canPredicateInstrs(MachineBasicBlock *MBB); + + /// Scan through instruction dependencies and update InsertAfter array. + /// Return false if any dependency is incompatible with if conversion. + bool InstrDependenciesAllowIfConv(MachineInstr *I); + + /// Find a valid insertion point in Head. + bool findInsertionPoint(); + + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + + /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, + /// initialize the internal state, and return true. + bool canConvertIf(MachineBasicBlock *MBB); + + /// convertIf - If-convert the last block passed to canConvertIf(), assuming + /// it is possible. Add any blocks that are to be erased to RemoveBlocks. + void convertIf(SmallVectorImpl &RemoveBlocks); + + /// Attempt repeated if-conversion on MBB, return true if successful. + bool tryConvertIf(MachineBasicBlock *); + + /// Invalidate MachineTraceMetrics before if-conversion. + void invalidateTraces(); + + /// Update the dominator tree after if-conversion erased some blocks. + void updateDomTree(ArrayRef Removed); + + /// Update LoopInfo after if-conversion. + void updateLoops(ArrayRef Removed); +}; + +} // namespace llvm + +#endif \ No newline at end of file diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index ae12ce1170f70..b74705848eeaf 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -219,6 +219,7 @@ add_llvm_component_library(LLVMCodeGen SlotIndexes.cpp SpillPlacement.cpp SplitKit.cpp + SSAIfConv.cpp StackColoring.cpp StackFrameLayoutAnalysisPass.cpp StackMapLivenessAnalysis.cpp diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp index 0de8112fb72c8..7effb37260b5a 100644 --- a/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -15,787 +15,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/SSAIfConv.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "early-ifcvt" -// Absolute maximum number of instructions allowed per speculated block. -// This bypasses all other heuristics, so it should be set fairly high. -static cl::opt -BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, - cl::desc("Maximum number of instructions per speculated block.")); - -// Stress testing mode - disable heuristics. -static cl::opt Stress("stress-early-ifcvt", cl::Hidden, - cl::desc("Turn all knobs to 11")); - -STATISTIC(NumDiamondsSeen, "Number of diamonds"); -STATISTIC(NumDiamondsConv, "Number of diamonds converted"); -STATISTIC(NumTrianglesSeen, "Number of triangles"); -STATISTIC(NumTrianglesConv, "Number of triangles converted"); - -//===----------------------------------------------------------------------===// -// SSAIfConv -//===----------------------------------------------------------------------===// -// -// The SSAIfConv class performs if-conversion on SSA form machine code after -// determining if it is possible. The class contains no heuristics; external -// code should be used to determine when if-conversion is a good idea. -// -// SSAIfConv can convert both triangles and diamonds: -// -// Triangle: Head Diamond: Head -// | \ / \_ -// | \ / | -// | [TF]BB FBB TBB -// | / \ / -// | / \ / -// Tail Tail -// -// Instructions in the conditional blocks TBB and/or FBB are spliced into the -// Head block, and phis in the Tail block are converted to select instructions. -// -namespace { -class SSAIfConv { - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - -public: - /// The block containing the conditional branch. - MachineBasicBlock *Head; - - /// The block containing phis after the if-then-else. - MachineBasicBlock *Tail; - - /// The 'true' conditional block as determined by analyzeBranch. - MachineBasicBlock *TBB; - - /// The 'false' conditional block as determined by analyzeBranch. - MachineBasicBlock *FBB; - - /// isTriangle - When there is no 'else' block, either TBB or FBB will be - /// equal to Tail. - bool isTriangle() const { return TBB == Tail || FBB == Tail; } - - /// Returns the Tail predecessor for the True side. - MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } - - /// Returns the Tail predecessor for the False side. - MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } - - /// Information about each phi in the Tail block. - struct PHIInfo { - MachineInstr *PHI; - unsigned TReg = 0, FReg = 0; - // Latencies from Cond+Branch, TReg, and FReg to DstReg. - int CondCycles = 0, TCycles = 0, FCycles = 0; - - PHIInfo(MachineInstr *phi) : PHI(phi) {} - }; - - SmallVector PHIs; - - /// The branch condition determined by analyzeBranch. - SmallVector Cond; - -private: - /// Instructions in Head that define values used by the conditional blocks. - /// The hoisted instructions must be inserted after these instructions. - SmallPtrSet InsertAfter; - - /// Register units clobbered by the conditional blocks. - BitVector ClobberedRegUnits; - - // Scratch pad for findInsertionPoint. - SparseSet LiveRegUnits; - - /// Insertion point in Head for speculatively executed instructions form TBB - /// and FBB. - MachineBasicBlock::iterator InsertionPoint; - - /// Return true if all non-terminator instructions in MBB can be safely - /// speculated. - bool canSpeculateInstrs(MachineBasicBlock *MBB); - - /// Return true if all non-terminator instructions in MBB can be safely - /// predicated. - bool canPredicateInstrs(MachineBasicBlock *MBB); - - /// Scan through instruction dependencies and update InsertAfter array. - /// Return false if any dependency is incompatible with if conversion. - bool InstrDependenciesAllowIfConv(MachineInstr *I); - - /// Predicate all instructions of the basic block with current condition - /// except for terminators. Reverse the condition if ReversePredicate is set. - void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate); - - /// Find a valid insertion point in Head. - bool findInsertionPoint(); - - /// Replace PHI instructions in Tail with selects. - void replacePHIInstrs(); - - /// Insert selects and rewrite PHI operands to use them. - void rewritePHIOperands(); - -public: - /// runOnMachineFunction - Initialize per-function data structures. - void runOnMachineFunction(MachineFunction &MF) { - TII = MF.getSubtarget().getInstrInfo(); - TRI = MF.getSubtarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); - LiveRegUnits.clear(); - LiveRegUnits.setUniverse(TRI->getNumRegUnits()); - ClobberedRegUnits.clear(); - ClobberedRegUnits.resize(TRI->getNumRegUnits()); - } - - /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, - /// initialize the internal state, and return true. - /// If predicate is set try to predicate the block otherwise try to - /// speculatively execute it. - bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false); - - /// convertIf - If-convert the last block passed to canConvertIf(), assuming - /// it is possible. Add any blocks that are to be erased to RemoveBlocks. - void convertIf(SmallVectorImpl &RemoveBlocks, - bool Predicate = false); -}; -} // end anonymous namespace - - -/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely -/// be speculated. The terminators are not considered. -/// -/// If instructions use any values that are defined in the head basic block, -/// the defining instructions are added to InsertAfter. -/// -/// Any clobbered regunits are added to ClobberedRegUnits. -/// -bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { - // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to - // get right. - if (!MBB->livein_empty()) { - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); - return false; - } - - unsigned InstrCount = 0; - - // Check all instructions, except the terminators. It is assumed that - // terminators never have side effects or define any used register values. - for (MachineInstr &MI : - llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) { - if (MI.isDebugInstr()) - continue; - - if (++InstrCount > BlockInstrLimit && !Stress) { - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " - << BlockInstrLimit << " instructions.\n"); - return false; - } - - // There shouldn't normally be any phis in a single-predecessor block. - if (MI.isPHI()) { - LLVM_DEBUG(dbgs() << "Can't hoist: " << MI); - return false; - } - - // Don't speculate loads. Note that it may be possible and desirable to - // speculate GOT or constant pool loads that are guaranteed not to trap, - // but we don't support that for now. - if (MI.mayLoad()) { - LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI); - return false; - } - - // We never speculate stores, so an AA pointer isn't necessary. - bool DontMoveAcrossStore = true; - if (!MI.isSafeToMove(DontMoveAcrossStore)) { - LLVM_DEBUG(dbgs() << "Can't speculate: " << MI); - return false; - } - - // Check for any dependencies on Head instructions. - if (!InstrDependenciesAllowIfConv(&MI)) - return false; - } - return true; -} - -/// Check that there is no dependencies preventing if conversion. -/// -/// If instruction uses any values that are defined in the head basic block, -/// the defining instructions are added to InsertAfter. -bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { - for (const MachineOperand &MO : I->operands()) { - if (MO.isRegMask()) { - LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); - return false; - } - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - - // Remember clobbered regunits. - if (MO.isDef() && Reg.isPhysical()) - for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) - ClobberedRegUnits.set(Unit); - - if (!MO.readsReg() || !Reg.isVirtual()) - continue; - MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (!DefMI || DefMI->getParent() != Head) - continue; - if (InsertAfter.insert(DefMI).second) - LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on " - << *DefMI); - if (DefMI->isTerminator()) { - LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); - return false; - } - } - return true; -} - -/// canPredicateInstrs - Returns true if all the instructions in MBB can safely -/// be predicates. The terminators are not considered. -/// -/// If instructions use any values that are defined in the head basic block, -/// the defining instructions are added to InsertAfter. -/// -/// Any clobbered regunits are added to ClobberedRegUnits. -/// -bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { - // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to - // get right. - if (!MBB->livein_empty()) { - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); - return false; - } - - unsigned InstrCount = 0; - - // Check all instructions, except the terminators. It is assumed that - // terminators never have side effects or define any used register values. - for (MachineBasicBlock::iterator I = MBB->begin(), - E = MBB->getFirstTerminator(); - I != E; ++I) { - if (I->isDebugInstr()) - continue; - - if (++InstrCount > BlockInstrLimit && !Stress) { - LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " - << BlockInstrLimit << " instructions.\n"); - return false; - } - - // There shouldn't normally be any phis in a single-predecessor block. - if (I->isPHI()) { - LLVM_DEBUG(dbgs() << "Can't predicate: " << *I); - return false; - } - - // Check that instruction is predicable - if (!TII->isPredicable(*I)) { - LLVM_DEBUG(dbgs() << "Isn't predicable: " << *I); - return false; - } - - // Check that instruction is not already predicated. - if (TII->isPredicated(*I) && !TII->canPredicatePredicatedInstr(*I)) { - LLVM_DEBUG(dbgs() << "Is already predicated: " << *I); - return false; - } - - // Check for any dependencies on Head instructions. - if (!InstrDependenciesAllowIfConv(&(*I))) - return false; - } - return true; -} - -// Apply predicate to all instructions in the machine block. -void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) { - auto Condition = Cond; - if (ReversePredicate) { - bool CanRevCond = !TII->reverseBranchCondition(Condition); - assert(CanRevCond && "Reversed predicate is not supported"); - (void)CanRevCond; - } - // Terminators don't need to be predicated as they will be removed. - for (MachineBasicBlock::iterator I = MBB->begin(), - E = MBB->getFirstTerminator(); - I != E; ++I) { - if (I->isDebugInstr()) - continue; - TII->PredicateInstruction(*I, Condition); - } -} - -/// Find an insertion point in Head for the speculated instructions. The -/// insertion point must be: -/// -/// 1. Before any terminators. -/// 2. After any instructions in InsertAfter. -/// 3. Not have any clobbered regunits live. -/// -/// This function sets InsertionPoint and returns true when successful, it -/// returns false if no valid insertion point could be found. -/// -bool SSAIfConv::findInsertionPoint() { - // Keep track of live regunits before the current position. - // Only track RegUnits that are also in ClobberedRegUnits. - LiveRegUnits.clear(); - SmallVector Reads; - MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); - MachineBasicBlock::iterator I = Head->end(); - MachineBasicBlock::iterator B = Head->begin(); - while (I != B) { - --I; - // Some of the conditional code depends in I. - if (InsertAfter.count(&*I)) { - LLVM_DEBUG(dbgs() << "Can't insert code after " << *I); - return false; - } - - // Update live regunits. - for (const MachineOperand &MO : I->operands()) { - // We're ignoring regmask operands. That is conservatively correct. - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (!Reg.isPhysical()) - continue; - // I clobbers Reg, so it isn't live before I. - if (MO.isDef()) - for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) - LiveRegUnits.erase(Unit); - // Unless I reads Reg. - if (MO.readsReg()) - Reads.push_back(Reg.asMCReg()); - } - // Anything read by I is live before I. - while (!Reads.empty()) - for (MCRegUnit Unit : TRI->regunits(Reads.pop_back_val())) - if (ClobberedRegUnits.test(Unit)) - LiveRegUnits.insert(Unit); - - // We can't insert before a terminator. - if (I != FirstTerm && I->isTerminator()) - continue; - - // Some of the clobbered registers are live before I, not a valid insertion - // point. - if (!LiveRegUnits.empty()) { - LLVM_DEBUG({ - dbgs() << "Would clobber"; - for (unsigned LRU : LiveRegUnits) - dbgs() << ' ' << printRegUnit(LRU, TRI); - dbgs() << " live before " << *I; - }); - continue; - } - - // This is a valid insertion point. - InsertionPoint = I; - LLVM_DEBUG(dbgs() << "Can insert before " << *I); - return true; - } - LLVM_DEBUG(dbgs() << "No legal insertion point found.\n"); - return false; -} - - - -/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is -/// a potential candidate for if-conversion. Fill out the internal state. -/// -bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) { - Head = MBB; - TBB = FBB = Tail = nullptr; - - if (Head->succ_size() != 2) - return false; - MachineBasicBlock *Succ0 = Head->succ_begin()[0]; - MachineBasicBlock *Succ1 = Head->succ_begin()[1]; - - // Canonicalize so Succ0 has MBB as its single predecessor. - if (Succ0->pred_size() != 1) - std::swap(Succ0, Succ1); - - if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) - return false; - - Tail = Succ0->succ_begin()[0]; - - // This is not a triangle. - if (Tail != Succ1) { - // Check for a diamond. We won't deal with any critical edges. - if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || - Succ1->succ_begin()[0] != Tail) - return false; - LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " - << printMBBReference(*Succ0) << "/" - << printMBBReference(*Succ1) << " -> " - << printMBBReference(*Tail) << '\n'); - - // Live-in physregs are tricky to get right when speculating code. - if (!Tail->livein_empty()) { - LLVM_DEBUG(dbgs() << "Tail has live-ins.\n"); - return false; - } - } else { - LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " - << printMBBReference(*Succ0) << " -> " - << printMBBReference(*Tail) << '\n'); - } - - // This is a triangle or a diamond. - // Skip if we cannot predicate and there are no phis skip as there must be - // side effects that can only be handled with predication. - if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) { - LLVM_DEBUG(dbgs() << "No phis in tail.\n"); - return false; - } - - // The branch we're looking to eliminate must be analyzable. - Cond.clear(); - if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) { - LLVM_DEBUG(dbgs() << "Branch not analyzable.\n"); - return false; - } - - // This is weird, probably some sort of degenerate CFG. - if (!TBB) { - LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n"); - return false; - } - - // Make sure the analyzed branch is conditional; one of the successors - // could be a landing pad. (Empty landing pads can be generated on Windows.) - if (Cond.empty()) { - LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n"); - return false; - } - - // analyzeBranch doesn't set FBB on a fall-through branch. - // Make sure it is always set. - FBB = TBB == Succ0 ? Succ1 : Succ0; - - // Any phis in the tail block must be convertible to selects. - PHIs.clear(); - MachineBasicBlock *TPred = getTPred(); - MachineBasicBlock *FPred = getFPred(); - for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); - I != E && I->isPHI(); ++I) { - PHIs.push_back(&*I); - PHIInfo &PI = PHIs.back(); - // Find PHI operands corresponding to TPred and FPred. - for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { - if (PI.PHI->getOperand(i+1).getMBB() == TPred) - PI.TReg = PI.PHI->getOperand(i).getReg(); - if (PI.PHI->getOperand(i+1).getMBB() == FPred) - PI.FReg = PI.PHI->getOperand(i).getReg(); - } - assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI"); - assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); - - // Get target information. - if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(), - PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles, - PI.FCycles)) { - LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); - return false; - } - } - - // Check that the conditional instructions can be speculated. - InsertAfter.clear(); - ClobberedRegUnits.reset(); - if (Predicate) { - if (TBB != Tail && !canPredicateInstrs(TBB)) - return false; - if (FBB != Tail && !canPredicateInstrs(FBB)) - return false; - } else { - if (TBB != Tail && !canSpeculateInstrs(TBB)) - return false; - if (FBB != Tail && !canSpeculateInstrs(FBB)) - return false; - } - - // Try to find a valid insertion point for the speculated instructions in the - // head basic block. - if (!findInsertionPoint()) - return false; - - if (isTriangle()) - ++NumTrianglesSeen; - else - ++NumDiamondsSeen; - return true; -} - -/// \return true iff the two registers are known to have the same value. -static bool hasSameValue(const MachineRegisterInfo &MRI, - const TargetInstrInfo *TII, Register TReg, - Register FReg) { - if (TReg == FReg) - return true; - - if (!TReg.isVirtual() || !FReg.isVirtual()) - return false; - - const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg); - const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg); - if (!TDef || !FDef) - return false; - - // If there are side-effects, all bets are off. - if (TDef->hasUnmodeledSideEffects()) - return false; - - // If the instruction could modify memory, or there may be some intervening - // store between the two, we can't consider them to be equal. - if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad()) - return false; - - // We also can't guarantee that they are the same if, for example, the - // instructions are both a copy from a physical reg, because some other - // instruction may have modified the value in that reg between the two - // defining insts. - if (any_of(TDef->uses(), [](const MachineOperand &MO) { - return MO.isReg() && MO.getReg().isPhysical(); - })) - return false; - - // Check whether the two defining instructions produce the same value(s). - if (!TII->produceSameValue(*TDef, *FDef, &MRI)) - return false; - - // Further, check that the two defs come from corresponding operands. - int TIdx = TDef->findRegisterDefOperandIdx(TReg, /*TRI=*/nullptr); - int FIdx = FDef->findRegisterDefOperandIdx(FReg, /*TRI=*/nullptr); - if (TIdx == -1 || FIdx == -1) - return false; - - return TIdx == FIdx; -} - -/// replacePHIInstrs - Completely replace PHI instructions with selects. -/// This is possible when the only Tail predecessors are the if-converted -/// blocks. -void SSAIfConv::replacePHIInstrs() { - assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); - MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); - assert(FirstTerm != Head->end() && "No terminators"); - DebugLoc HeadDL = FirstTerm->getDebugLoc(); - - // Convert all PHIs to select instructions inserted before FirstTerm. - for (PHIInfo &PI : PHIs) { - LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - Register DstReg = PI.PHI->getOperand(0).getReg(); - if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { - // We do not need the select instruction if both incoming values are - // equal, but we do need a COPY. - BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) - .addReg(PI.TReg); - } else { - TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, - PI.FReg); - } - LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); - PI.PHI->eraseFromParent(); - PI.PHI = nullptr; - } -} - -/// rewritePHIOperands - When there are additional Tail predecessors, insert -/// select instructions in Head and rewrite PHI operands to use the selects. -/// Keep the PHI instructions in Tail to handle the other predecessors. -void SSAIfConv::rewritePHIOperands() { - MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); - assert(FirstTerm != Head->end() && "No terminators"); - DebugLoc HeadDL = FirstTerm->getDebugLoc(); - - // Convert all PHIs to select instructions inserted before FirstTerm. - for (PHIInfo &PI : PHIs) { - unsigned DstReg = 0; - - LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); - if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { - // We do not need the select instruction if both incoming values are - // equal. - DstReg = PI.TReg; - } else { - Register PHIDst = PI.PHI->getOperand(0).getReg(); - DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); - TII->insertSelect(*Head, FirstTerm, HeadDL, - DstReg, Cond, PI.TReg, PI.FReg); - LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); - } - - // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. - for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { - MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); - if (MBB == getTPred()) { - PI.PHI->getOperand(i-1).setMBB(Head); - PI.PHI->getOperand(i-2).setReg(DstReg); - } else if (MBB == getFPred()) { - PI.PHI->removeOperand(i-1); - PI.PHI->removeOperand(i-2); - } - } - LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); - } -} - -/// convertIf - Execute the if conversion after canConvertIf has determined the -/// feasibility. -/// -/// Any basic blocks that need to be erased will be added to RemoveBlocks. -/// -void SSAIfConv::convertIf(SmallVectorImpl &RemoveBlocks, - bool Predicate) { - assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); - - // Update statistics. - if (isTriangle()) - ++NumTrianglesConv; - else - ++NumDiamondsConv; - - // Move all instructions into Head, except for the terminators. - if (TBB != Tail) { - if (Predicate) - PredicateBlock(TBB, /*ReversePredicate=*/false); - Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); - } - if (FBB != Tail) { - if (Predicate) - PredicateBlock(FBB, /*ReversePredicate=*/true); - Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); - } - // Are there extra Tail predecessors? - bool ExtraPreds = Tail->pred_size() != 2; - if (ExtraPreds) - rewritePHIOperands(); - else - replacePHIInstrs(); - - // Fix up the CFG, temporarily leave Head without any successors. - Head->removeSuccessor(TBB); - Head->removeSuccessor(FBB, true); - if (TBB != Tail) - TBB->removeSuccessor(Tail, true); - if (FBB != Tail) - FBB->removeSuccessor(Tail, true); - - // Fix up Head's terminators. - // It should become a single branch or a fallthrough. - DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); - TII->removeBranch(*Head); - - // Mark the now empty conditional blocks for removal and move them to the end. - // It is likely that Head can fall - // through to Tail, and we can join the two blocks. - if (TBB != Tail) { - RemoveBlocks.push_back(TBB); - if (TBB != &TBB->getParent()->back()) - TBB->moveAfter(&TBB->getParent()->back()); - } - if (FBB != Tail) { - RemoveBlocks.push_back(FBB); - if (FBB != &FBB->getParent()->back()) - FBB->moveAfter(&FBB->getParent()->back()); - } - - assert(Head->succ_empty() && "Additional head successors?"); - if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { - // Splice Tail onto the end of Head. - LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) - << " into head " << printMBBReference(*Head) << '\n'); - Head->splice(Head->end(), Tail, - Tail->begin(), Tail->end()); - Head->transferSuccessorsAndUpdatePHIs(Tail); - RemoveBlocks.push_back(Tail); - if (Tail != &Tail->getParent()->back()) - Tail->moveAfter(&Tail->getParent()->back()); - } else { - // We need a branch to Tail, let code placement work it out later. - LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n"); - SmallVector EmptyCond; - TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); - Head->addSuccessor(Tail); - } - LLVM_DEBUG(dbgs() << *Head); -} - -//===----------------------------------------------------------------------===// -// EarlyIfConverter Pass -//===----------------------------------------------------------------------===// - namespace { -class EarlyIfConverter : public MachineFunctionPass { - const TargetInstrInfo *TII = nullptr; - const TargetRegisterInfo *TRI = nullptr; - MCSchedModel SchedModel; - MachineRegisterInfo *MRI = nullptr; - MachineDominatorTree *DomTree = nullptr; - MachineLoopInfo *Loops = nullptr; - MachineTraceMetrics *Traces = nullptr; - MachineTraceMetrics::Ensemble *MinInstr = nullptr; - SSAIfConv IfConv; - -public: +struct EarlyIfConverter : MachineFunctionPass { static char ID; EarlyIfConverter() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "Early If-Conversion"; } - -private: - bool tryConvertIf(MachineBasicBlock*); - void invalidateTraces(); - bool shouldConvertIf(); }; -} // end anonymous namespace char EarlyIfConverter::ID = 0; -char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; - -INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, - "Early If Converter", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, - "Early If Converter", false, false) void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -808,53 +51,13 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -namespace { -/// Update the dominator tree after if-conversion erased some blocks. -void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv, - ArrayRef Removed) { - // convertIf can remove TBB, FBB, and Tail can be merged into Head. - // TBB and FBB should not dominate any blocks. - // Tail children should be transferred to Head. - MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); - for (auto *B : Removed) { - MachineDomTreeNode *Node = DomTree->getNode(B); - assert(Node != HeadNode && "Cannot erase the head node"); - while (Node->getNumChildren()) { - assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); - DomTree->changeImmediateDominator(Node->back(), HeadNode); - } - DomTree->eraseNode(B); - } -} - -/// Update LoopInfo after if-conversion. -void updateLoops(MachineLoopInfo *Loops, - ArrayRef Removed) { - // If-conversion doesn't change loop structure, and it doesn't mess with back - // edges, so updating LoopInfo is simply removing the dead blocks. - for (auto *B : Removed) - Loops->removeBlock(B); -} -} // namespace - -/// Invalidate MachineTraceMetrics before if-conversion. -void EarlyIfConverter::invalidateTraces() { - Traces->verifyAnalysis(); - Traces->invalidate(IfConv.Head); - Traces->invalidate(IfConv.Tail); - Traces->invalidate(IfConv.TBB); - Traces->invalidate(IfConv.FBB); - Traces->verifyAnalysis(); -} - // Adjust cycles with downward saturation. -static unsigned adjCycles(unsigned Cyc, int Delta) { +unsigned adjCycles(unsigned Cyc, int Delta) { if (Delta < 0 && Cyc + Delta > Cyc) return 0; return Cyc + Delta; } -namespace { /// Helper class to simplify emission of cycle counts into optimization remarks. struct Cycles { const char *Key; @@ -863,16 +66,59 @@ struct Cycles { template Remark &operator<<(Remark &R, Cycles C) { return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles"); } -} // anonymous namespace -/// Apply cost model and heuristics to the if-conversion in IfConv. -/// Return true if the conversion is a good idea. -/// -bool EarlyIfConverter::shouldConvertIf() { - // Stress testing mode disables all cost considerations. - if (Stress) +struct SpeculateStrategy : SSAIfConv::PredicationStrategyBase { + MachineLoopInfo *Loops = nullptr; + const MCSchedModel &SchedModel; + MachineTraceMetrics *Traces = nullptr; + + SpeculateStrategy(MachineLoopInfo *Loops, const MCSchedModel &SchedModel, + MachineTraceMetrics *Traces = nullptr) + : Loops(Loops), SchedModel(SchedModel), Traces(Traces) {} + + bool canConvertIf(MachineBasicBlock *Head, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *Tail, + ArrayRef Cond) override { + // This is a triangle or a diamond. + // Skip if we cannot predicate and there are no phis skip as there must + // be side effects that can only be handled with predication. + if (Tail->empty() || !Tail->front().isPHI()) { + LLVM_DEBUG(dbgs() << "No phis in tail.\n"); + return false; + } + return true; + } + + bool canPredicateInstr(const MachineInstr &I) override { + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (I.mayLoad()) { + LLVM_DEBUG(dbgs() << "Won't speculate load: " << I); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(DontMoveAcrossStore)) { + LLVM_DEBUG(dbgs() << "Can't speculate: " << I); + return false; + } return true; + } + + bool shouldConvertIf(SSAIfConv &IfConv) override; + + void predicateBlock(MachineBasicBlock *MBB, ArrayRef Cond, + bool Reverse) + override { /* do nothing, everything is speculatable and it's valid to + move the instructions into the head */ + } + ~SpeculateStrategy() override = default; +}; + +bool SpeculateStrategy::shouldConvertIf(SSAIfConv &IfConv) { // Do not try to if-convert if the condition has a high chance of being // predictable. MachineLoop *CurrentLoop = Loops->getLoopFor(IfConv.Head); @@ -888,6 +134,7 @@ bool EarlyIfConverter::shouldConvertIf() { if (Register::isPhysicalRegister(Reg)) return false; + MachineRegisterInfo *MRI = &IfConv.Head->getParent()->getRegInfo(); MachineInstr *Def = MRI->getVRegDef(Reg); return CurrentLoop->isLoopInvariant(*Def) || all_of(Def->operands(), [&](MachineOperand &Op) { @@ -905,8 +152,8 @@ bool EarlyIfConverter::shouldConvertIf() { })) return false; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount); + auto *MinInstr = IfConv.getEnsemble(MachineTraceStrategy::TS_MinInstrCount); + assert(MinInstr); MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); @@ -1059,24 +306,6 @@ bool EarlyIfConverter::shouldConvertIf() { return ShouldConvert; } -/// Attempt repeated if-conversion on MBB, return true if successful. -/// -bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { - bool Changed = false; - while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { - // If-convert MBB and update analyses. - invalidateTraces(); - SmallVector RemoveBlocks; - IfConv.convertIf(RemoveBlocks); - Changed = true; - updateDomTree(DomTree, IfConv, RemoveBlocks); - for (MachineBasicBlock *MBB : RemoveBlocks) - MBB->eraseFromParent(); - updateLoops(Loops, RemoveBlocks); - } - return Changed; -} - bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); @@ -1088,69 +317,39 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { if (!STI.enableEarlyIfConversion()) return false; - TII = STI.getInstrInfo(); - TRI = STI.getRegisterInfo(); - SchedModel = STI.getSchedModel(); - MRI = &MF.getRegInfo(); - DomTree = &getAnalysis().getDomTree(); - Loops = &getAnalysis().getLI(); - Traces = &getAnalysis(); - MinInstr = nullptr; + const MCSchedModel &SchedModel = STI.getSchedModel(); + auto *DomTree = &getAnalysis().getDomTree(); + auto *Loops = &getAnalysis().getLI(); + MachineTraceMetrics *Traces = &getAnalysis(); - bool Changed = false; - IfConv.runOnMachineFunction(MF); - - // Visit blocks in dominator tree post-order. The post-order enables nested - // if-conversion in a single pass. The tryConvertIf() function may erase - // blocks, but only blocks dominated by the head block. This makes it safe to - // update the dominator tree while the post-order iterator is still active. - for (auto *DomNode : post_order(DomTree)) - if (tryConvertIf(DomNode->getBlock())) - Changed = true; - - return Changed; + SpeculateStrategy Speculate(Loops, SchedModel); + SSAIfConv IfConv(Speculate, MF, DomTree, Loops, Traces); + return IfConv.run(); } +} // end anonymous namespace -//===----------------------------------------------------------------------===// -// EarlyIfPredicator Pass -//===----------------------------------------------------------------------===// +char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; -namespace { -class EarlyIfPredicator : public MachineFunctionPass { - const TargetInstrInfo *TII = nullptr; - const TargetRegisterInfo *TRI = nullptr; - TargetSchedModel SchedModel; - MachineRegisterInfo *MRI = nullptr; - MachineDominatorTree *DomTree = nullptr; - MachineBranchProbabilityInfo *MBPI = nullptr; - MachineLoopInfo *Loops = nullptr; - SSAIfConv IfConv; +INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, "Early If Converter", false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, "Early If Converter", false, + false) +#undef DEBUG_TYPE -public: +#define DEBUG_TYPE "early-if-predicator" +namespace { +struct EarlyIfPredicator : MachineFunctionPass { static char ID; EarlyIfPredicator() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "Early If-predicator"; } - -protected: - bool tryConvertIf(MachineBasicBlock *); - bool shouldConvertIf(); }; -} // end anonymous namespace - -#undef DEBUG_TYPE -#define DEBUG_TYPE "early-if-predicator" char EarlyIfPredicator::ID = 0; -char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; - -INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", - false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) -INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, - false) void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -1161,8 +360,53 @@ void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -/// Apply the target heuristic to decide if the transformation is profitable. -bool EarlyIfPredicator::shouldConvertIf() { +struct PredicatorStrategy : SSAIfConv::PredicationStrategyBase { + const TargetInstrInfo *TII = nullptr; + TargetSchedModel &SchedModel; + MachineBranchProbabilityInfo *MBPI = nullptr; + PredicatorStrategy(const TargetInstrInfo *TII, TargetSchedModel &SchedModel, + MachineBranchProbabilityInfo *MBPI) + : TII(TII), SchedModel(SchedModel), MBPI(MBPI) {} + + bool canPredicateInstr(const MachineInstr &I) override { + // Check that instruction is predicable + if (!TII->isPredicable(I)) { + LLVM_DEBUG(dbgs() << "Isn't predicable: " << I); + return false; + } + + // Check that instruction is not already predicated. + if (TII->isPredicated(I) && !TII->canPredicatePredicatedInstr(I)) { + LLVM_DEBUG(dbgs() << "Is already predicated: " << I); + return false; + } + return true; + } + + bool shouldConvertIf(SSAIfConv &IfConv) override; + + void predicateBlock(MachineBasicBlock *MBB, ArrayRef Cond, + bool Reverse) override { + SmallVector Condition(Cond.begin(), Cond.end()); + if (Reverse) { + bool CanRevCond = !TII->reverseBranchCondition(Condition); + assert(CanRevCond && "Reversed predicate is not supported"); + (void)CanRevCond; + } + // Terminators don't need to be predicated as they will be removed. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + TII->PredicateInstruction(*I, Condition); + } + } + + ~PredicatorStrategy() override = default; +}; + +bool PredicatorStrategy::shouldConvertIf(SSAIfConv &IfConv) { auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB); if (IfConv.isTriangle()) { MachineBasicBlock &IfBlock = @@ -1200,23 +444,6 @@ bool EarlyIfPredicator::shouldConvertIf() { FCycle, FExtra, TrueProbability); } -/// Attempt repeated if-conversion on MBB, return true if successful. -/// -bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) { - bool Changed = false; - while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) { - // If-convert MBB and update analyses. - SmallVector RemoveBlocks; - IfConv.convertIf(RemoveBlocks, /*Predicate*/ true); - Changed = true; - updateDomTree(DomTree, IfConv, RemoveBlocks); - for (MachineBasicBlock *MBB : RemoveBlocks) - MBB->eraseFromParent(); - updateLoops(Loops, RemoveBlocks); - } - return Changed; -} - bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n" << "********** Function: " << MF.getName() << '\n'); @@ -1224,24 +451,25 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) { return false; const TargetSubtargetInfo &STI = MF.getSubtarget(); - TII = STI.getInstrInfo(); - TRI = STI.getRegisterInfo(); - MRI = &MF.getRegInfo(); + auto *TII = STI.getInstrInfo(); + TargetSchedModel SchedModel; SchedModel.init(&STI); - DomTree = &getAnalysis().getDomTree(); - Loops = &getAnalysis().getLI(); - MBPI = &getAnalysis().getMBPI(); - - bool Changed = false; - IfConv.runOnMachineFunction(MF); + auto *DomTree = &getAnalysis().getDomTree(); + auto *Loops = &getAnalysis().getLI(); + auto *MBPI = + &getAnalysis().getMBPI(); + + PredicatorStrategy Predicate(TII, SchedModel, MBPI); + SSAIfConv IfConv(Predicate, MF, DomTree, Loops); + return IfConv.run(); +} - // Visit blocks in dominator tree post-order. The post-order enables nested - // if-conversion in a single pass. The tryConvertIf() function may erase - // blocks, but only blocks dominated by the head block. This makes it safe to - // update the dominator tree while the post-order iterator is still active. - for (auto *DomNode : post_order(DomTree)) - if (tryConvertIf(DomNode->getBlock())) - Changed = true; +} // end anonymous namespace +char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID; - return Changed; -} +INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false, + false) \ No newline at end of file diff --git a/llvm/lib/CodeGen/SSAIfConv.cpp b/llvm/lib/CodeGen/SSAIfConv.cpp new file mode 100644 index 0000000000000..b55993d672eff --- /dev/null +++ b/llvm/lib/CodeGen/SSAIfConv.cpp @@ -0,0 +1,570 @@ +#include "llvm/CodeGen/SSAIfConv.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "early-ifcvt" + +using namespace llvm; + +// Stress testing mode - disable heuristics. +static cl::opt Stress("stress-early-ifcvt", cl::Hidden, + cl::desc("Turn all knobs to 11")); + +// Absolute maximum number of instructions allowed per speculated block. +// This bypasses all other heuristics, so it should be set fairly high. +static cl::opt BlockInstrLimit( + "early-ifcvt-limit", cl::init(30), cl::Hidden, + cl::desc("Maximum number of instructions per speculated block.")); + +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); + +SSAIfConv::SSAIfConv(PredicationStrategyBase &Predicate, MachineFunction &MF, + MachineDominatorTree *DomTree, MachineLoopInfo *Loops, + MachineTraceMetrics *Traces) + : DomTree(DomTree), Loops(Loops), Traces(Traces), Predicate(Predicate) { + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LiveRegUnits.clear(); + LiveRegUnits.setUniverse(TRI->getNumRegUnits()); + ClobberedRegUnits.clear(); + ClobberedRegUnits.resize(TRI->getNumRegUnits()); +} + +MachineTraceMetrics::Ensemble *SSAIfConv::getEnsemble(MachineTraceStrategy S) { + return Traces ? Traces->getEnsemble(S) : nullptr; +} + +/// Check that there is no dependencies preventing if conversion. +/// +/// If instruction uses any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) { + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask()) { + LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + + // Remember clobbered regunits. + if (MO.isDef() && Reg.isPhysical()) + for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) + ClobberedRegUnits.set(Unit); + + if (!MO.readsReg() || !Reg.isVirtual()) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI).second) + LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on " + << *DefMI); + if (DefMI->isTerminator()) { + LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; + } + } + return true; +} + +/// canPredicateInstrs - Returns true if all the instructions in MBB can safely +/// be predicates. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + if (I->isDebugInstr()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + LLVM_DEBUG(dbgs() << "Can't predicate: " << *I); + return false; + } + + if (!Predicate.canPredicateInstr(*I)) + return false; + + // Check for any dependencies on Head instructions. + if (!InstrDependenciesAllowIfConv(&(*I))) + return false; + } + return true; +} + +/// Find an insertion point in Head for the speculated instructions. The +/// insertion point must be: +/// +/// 1. Before any terminators. +/// 2. After any instructions in InsertAfter. +/// 3. Not have any clobbered regunits live. +/// +/// This function sets InsertionPoint and returns true when successful, it +/// returns false if no valid insertion point could be found. +/// +bool SSAIfConv::findInsertionPoint() { + // Keep track of live regunits before the current position. + // Only track RegUnits that are also in ClobberedRegUnits. + LiveRegUnits.clear(); + SmallVector Reads; + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + MachineBasicBlock::iterator I = Head->end(); + MachineBasicBlock::iterator B = Head->begin(); + while (I != B) { + --I; + // Some of the conditional code depends in I. + if (InsertAfter.count(&*I)) { + LLVM_DEBUG(dbgs() << "Can't insert code after " << *I); + return false; + } + + // Update live regunits. + for (const MachineOperand &MO : I->operands()) { + // We're ignoring regmask operands. That is conservatively correct. + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg.isPhysical()) + continue; + // I clobbers Reg, so it isn't live before I. + if (MO.isDef()) + for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) + LiveRegUnits.erase(Unit); + // Unless I reads Reg. + if (MO.readsReg()) + Reads.push_back(Reg.asMCReg()); + } + // Anything read by I is live before I. + while (!Reads.empty()) + for (MCRegUnit Unit : TRI->regunits(Reads.pop_back_val())) + if (ClobberedRegUnits.test(Unit)) + LiveRegUnits.insert(Unit); + + // We can't insert before a terminator. + if (I != FirstTerm && I->isTerminator()) + continue; + + // Some of the clobbered registers are live before I, not a valid insertion + // point. + if (!LiveRegUnits.empty()) { + LLVM_DEBUG({ + dbgs() << "Would clobber"; + for (unsigned LRU : LiveRegUnits) + dbgs() << ' ' << printRegUnit(LRU, TRI); + dbgs() << " live before " << *I; + }); + continue; + } + + // This is a valid insertion point. + InsertionPoint = I; + LLVM_DEBUG(dbgs() << "Can insert before " << *I); + return true; + } + LLVM_DEBUG(dbgs() << "No legal insertion point found.\n"); + return false; +} + +/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is +/// a potential candidate for if-conversion. Fill out the internal state. +/// +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { + Head = MBB; + TBB = FBB = Tail = nullptr; + + if (Head->succ_size() != 2) + return false; + MachineBasicBlock *Succ0 = Head->succ_begin()[0]; + MachineBasicBlock *Succ1 = Head->succ_begin()[1]; + + // Canonicalize so Succ0 has MBB as its single predecessor. + if (Succ0->pred_size() != 1) + std::swap(Succ0, Succ1); + + if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) + return false; + + Tail = Succ0->succ_begin()[0]; + + // This is not a triangle. + if (Tail != Succ1) { + // Check for a diamond. We won't deal with any critical edges. + if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || + Succ1->succ_begin()[0] != Tail) + return false; + LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << "/" + << printMBBReference(*Succ1) << " -> " + << printMBBReference(*Tail) << '\n'); + + // Live-in physregs are tricky to get right when speculating code. + if (!Tail->livein_empty()) { + LLVM_DEBUG(dbgs() << "Tail has live-ins.\n"); + return false; + } + } else { + LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> " + << printMBBReference(*Succ0) << " -> " + << printMBBReference(*Tail) << '\n'); + } + + // The branch we're looking to eliminate must be analyzable. + Cond.clear(); + if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) { + LLVM_DEBUG(dbgs() << "Branch not analyzable.\n"); + return false; + } + + if (!Predicate.canConvertIf(Head, TBB, FBB, Tail, Cond)) { + return false; + } + + // This is weird, probably some sort of degenerate CFG. + if (!TBB) { + LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n"); + return false; + } + + // Make sure the analyzed branch is conditional; one of the successors + // could be a landing pad. (Empty landing pads can be generated on Windows.) + if (Cond.empty()) { + LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n"); + return false; + } + + // analyzeBranch doesn't set FBB on a fall-through branch. + // Make sure it is always set. + FBB = TBB == Succ0 ? Succ1 : Succ0; + + // Any phis in the tail block must be convertible to selects. + PHIs.clear(); + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); + for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); + I != E && I->isPHI(); ++I) { + PHIs.push_back(&*I); + PHIInfo &PI = PHIs.back(); + // Find PHI operands corresponding to TPred and FPred. + for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { + if (PI.PHI->getOperand(i + 1).getMBB() == TPred) + PI.TReg = PI.PHI->getOperand(i).getReg(); + if (PI.PHI->getOperand(i + 1).getMBB() == FPred) + PI.FReg = PI.PHI->getOperand(i).getReg(); + } + assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI"); + + // Get target information. + if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(), + PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles, + PI.FCycles)) { + LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI); + return false; + } + } + + // Check that the conditional instructions can be speculated. + InsertAfter.clear(); + ClobberedRegUnits.reset(); + for (MachineBasicBlock *MBB : {TBB, FBB}) + if (MBB != Tail && !canPredicateInstrs(MBB)) + return false; + + // Try to find a valid insertion point for the speculated instructions in the + // head basic block. + if (!findInsertionPoint()) + return false; + + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; + return true; +} + +/// \return true iff the two registers are known to have the same value. +bool hasSameValue(const MachineRegisterInfo &MRI, const TargetInstrInfo *TII, + Register TReg, Register FReg) { + if (TReg == FReg) + return true; + + if (!TReg.isVirtual() || !FReg.isVirtual()) + return false; + + const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg); + const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg); + if (!TDef || !FDef) + return false; + + // If there are side-effects, all bets are off. + if (TDef->hasUnmodeledSideEffects()) + return false; + + // If the instruction could modify memory, or there may be some intervening + // store between the two, we can't consider them to be equal. + if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad()) + return false; + + // We also can't guarantee that they are the same if, for example, the + // instructions are both a copy from a physical reg, because some other + // instruction may have modified the value in that reg between the two + // defining insts. + if (any_of(TDef->uses(), [](const MachineOperand &MO) { + return MO.isReg() && MO.getReg().isPhysical(); + })) + return false; + + // Check whether the two defining instructions produce the same value(s). + if (!TII->produceSameValue(*TDef, *FDef, &MRI)) + return false; + + // Further, check that the two defs come from corresponding operands. + int TIdx = TDef->findRegisterDefOperandIdx(TReg, /*TRI=*/nullptr); + int FIdx = FDef->findRegisterDefOperandIdx(FReg, /*TRI=*/nullptr); + if (TIdx == -1 || FIdx == -1) + return false; + + return TIdx == FIdx; +} + +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (PHIInfo &PI : PHIs) { + LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); + Register DstReg = PI.PHI->getOperand(0).getReg(); + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { + // We do not need the select instruction if both incoming values are + // equal, but we do need a COPY. + BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg) + .addReg(PI.TReg); + } else { + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, + PI.FReg); + } + LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); + PI.PHI->eraseFromParent(); + PI.PHI = nullptr; + } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (PHIInfo &PI : PHIs) { + unsigned DstReg = 0; + + LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI); + if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) { + // We do not need the select instruction if both incoming values are + // equal. + DstReg = PI.TReg; + } else { + Register PHIDst = PI.PHI->getOperand(0).getReg(); + DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, + PI.FReg); + LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); + } + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i - 1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i - 1).setMBB(Head); + PI.PHI->getOperand(i - 2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->removeOperand(i - 1); + PI.PHI->removeOperand(i - 2); + } + } + LLVM_DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks that need to be erased will be added to RemoveBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl &RemoveBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + for (MachineBasicBlock *MBB : {TBB, FBB}) { + if (MBB != Tail) { + // reverse the condition for the false bb + Predicate.predicateBlock(MBB, Cond, MBB == FBB); + Head->splice(InsertionPoint, MBB, MBB->begin(), + MBB->getFirstTerminator()); + } + } + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); + + // Fix up the CFG, temporarily leave Head without any successors. + Head->removeSuccessor(TBB); + Head->removeSuccessor(FBB, true); + if (TBB != Tail) + TBB->removeSuccessor(Tail, true); + if (FBB != Tail) + FBB->removeSuccessor(Tail, true); + + // Fix up Head's terminators. + // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); + TII->removeBranch(*Head); + + // Mark the now empty conditional blocks for removal and move them to the end. + // It is likely that Head can fall + // through to Tail, and we can join the two blocks. + if (TBB != Tail) { + RemoveBlocks.push_back(TBB); + if (TBB != &TBB->getParent()->back()) + TBB->moveAfter(&TBB->getParent()->back()); + } + if (FBB != Tail) { + RemoveBlocks.push_back(FBB); + if (FBB != &FBB->getParent()->back()) + FBB->moveAfter(&FBB->getParent()->back()); + } + + assert(Head->succ_empty() && "Additional head successors?"); + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { + // Splice Tail onto the end of Head. + LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail) + << " into head " << printMBBReference(*Head) << '\n'); + Head->splice(Head->end(), Tail, Tail->begin(), Tail->end()); + Head->transferSuccessorsAndUpdatePHIs(Tail); + RemoveBlocks.push_back(Tail); + if (Tail != &Tail->getParent()->back()) + Tail->moveAfter(&Tail->getParent()->back()); + } else { + // We need a branch to Tail, let code placement work it out later. + LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n"); + SmallVector EmptyCond; + TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); + Head->addSuccessor(Tail); + } + LLVM_DEBUG(dbgs() << *Head); +} + +void SSAIfConv::updateDomTree(ArrayRef Removed) { + // convertIf can remove TBB, FBB, and Tail can be merged into Head. + // TBB and FBB should not dominate any blocks. + // Tail children should be transferred to Head. + MachineDomTreeNode *HeadNode = DomTree->getNode(Head); + for (auto *B : Removed) { + MachineDomTreeNode *Node = DomTree->getNode(B); + assert(Node != HeadNode && "Cannot erase the head node"); + while (Node->getNumChildren()) { + assert(Node->getBlock() == Tail && "Unexpected children"); + DomTree->changeImmediateDominator(Node->back(), HeadNode); + } + DomTree->eraseNode(B); + } +} + +void SSAIfConv::updateLoops(ArrayRef Removed) { + // If-conversion doesn't change loop structure, and it doesn't mess with back + // edges, so updating LoopInfo is simply removing the dead blocks. + for (auto *B : Removed) + Loops->removeBlock(B); +} + +void SSAIfConv::invalidateTraces() { + if (!Traces) + return; + Traces->verifyAnalysis(); + Traces->invalidate(Head); + Traces->invalidate(Tail); + Traces->invalidate(TBB); + Traces->invalidate(FBB); + Traces->verifyAnalysis(); +} + +// Visit blocks in dominator tree post-order. The post-order enables nested +// if-conversion in a single pass. The tryConvertIf() function may erase +// blocks, but only blocks dominated by the head block. This makes it safe to +// update the dominator tree while the post-order iterator is still active. +bool SSAIfConv::run() { + bool Changed = false; + for (auto *DomNode : post_order(DomTree)) + if (tryConvertIf(DomNode->getBlock())) + Changed = true; + return Changed; +} + +bool SSAIfConv::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (canConvertIf(MBB) && (Stress || Predicate.shouldConvertIf(*this))) { + // If-convert MBB and update analyses. + invalidateTraces(); + SmallVector RemoveBlocks; + convertIf(RemoveBlocks); + Changed = true; + updateDomTree(RemoveBlocks); + for (MachineBasicBlock *MBB : RemoveBlocks) + MBB->eraseFromParent(); + updateLoops(RemoveBlocks); + } + return Changed; +} \ No newline at end of file diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 8d6e022e1e4d4..8f53adc46908d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -369,6 +369,9 @@ extern char &AMDGPUCodeGenPrepareID; void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); extern char &AMDGPURemoveIncompatibleFunctionsID; +void initializeAMDGPUIfConverterPass(PassRegistry &); +extern char &AMDGPUIfConverterID; + void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &); extern char &AMDGPULateCodeGenPrepareLegacyID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIfConverter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIfConverter.cpp new file mode 100644 index 0000000000000..828d97d652eca --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUIfConverter.cpp @@ -0,0 +1,184 @@ +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SSAIfConv.h" + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-if-cvt" + +namespace { +unsigned getReversedVCMPXOpcode(unsigned Opcode) { + // TODO: this is a placeholder for the real function + switch (Opcode) { + case AMDGPU::V_CMPX_LT_I32_nosdst_e64: + return AMDGPU::V_CMPX_GE_I32_nosdst_e64; + default: + errs() << "unhandled: " << Opcode << "\n"; + llvm_unreachable("unhandled vcmp opcode"); + } +} + +bool needsExecPredication(const SIInstrInfo *TII, const MachineInstr &I) { + return TII->isVALU(I) || TII->isVMEM(I); +} + +struct ExecPredicate : SSAIfConv::PredicationStrategyBase { + const SIInstrInfo *TII; + const SIRegisterInfo *RegInfo; + + MachineInstr *Cmp = nullptr; + + ExecPredicate(const SIInstrInfo *TII) + : TII(TII), RegInfo(&TII->getRegisterInfo()) {} + + bool canConvertIf(MachineBasicBlock *Head, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *Tail, + ArrayRef Cond) override { + + // check that the cmp is just before the branch and that it is promotable to + // v_cmpx + const unsigned SupportedBranchOpc[]{ + AMDGPU::S_CBRANCH_SCC0, AMDGPU::S_CBRANCH_SCC1, AMDGPU::S_CBRANCH_VCCNZ, + AMDGPU::S_CBRANCH_VCCZ}; + + MachineInstr &CBranch = *Head->getFirstInstrTerminator(); + if (!llvm::is_contained(SupportedBranchOpc, CBranch.getOpcode())) + return false; + + auto CmpInstr = std::next(CBranch.getReverseIterator()); + if (CmpInstr == Head->instr_rend()) + return false; + + Register SCCorVCC = Cond[1].getReg(); + bool ModifiesConditionReg = CmpInstr->modifiesRegister(SCCorVCC, RegInfo); + if (!ModifiesConditionReg) + return false; + + Cmp = &*CmpInstr; + + unsigned CmpOpc = Cmp->getOpcode(); + if (TII->isSALU(*Cmp)) + CmpOpc = TII->getVALUOp(*Cmp); + if (AMDGPU::getVCMPXOpFromVCMP(CmpOpc) == -1) { + errs() << "unhandled branch " << *Cmp << "\n"; + return false; + } + + return true; + } + + bool canPredicateInstr(const MachineInstr &I) override { + + // TODO: relax this condition, if exec is masked, check that it goes back to + // normal + // TODO: what about scc or vcc ? Are they taken into acount in the MBB + // live-ins ? + MCRegister Exec = RegInfo->getExec(); + bool ModifiesExec = I.modifiesRegister(Exec, RegInfo); + if (ModifiesExec) + return false; + + if (needsExecPredication(TII, I)) + return true; + + bool DontMoveAcrossStore = true; + bool IsSpeculatable = I.isDereferenceableInvariantLoad() || + I.isSafeToMove(DontMoveAcrossStore); + if (IsSpeculatable) + return true; + + return false; + } + + bool shouldConvertIf(SSAIfConv &IfConv) override { + // TODO: cost model + return true; + } + + void predicateBlock(MachineBasicBlock *MBB, ArrayRef Cond, + bool Reverse) override { + // save exec + MachineFunction &MF = *MBB->getParent(); + SIMachineFunctionInfo *MFI = MF.getInfo(); + + Register ExecBackup = MFI->getSGPRForEXECCopy(); + + const DebugLoc &CmpLoc = Cmp->getDebugLoc(); + + auto FirstInstruction = MBB->begin(); + const bool IsSCCLive = + false; // asume not since the live-ins are supposed to be empty + TII->insertScratchExecCopy(MF, *MBB, FirstInstruction, CmpLoc, ExecBackup, + IsSCCLive); + + // mask exec + unsigned CmpOpc = Cmp->getOpcode(); + if (TII->isSALU(*Cmp)) + CmpOpc = TII->getVALUOp(*Cmp); + + CmpOpc = AMDGPU::getVCMPXOpFromVCMP(CmpOpc); + if (Reverse) + CmpOpc = getReversedVCMPXOpcode(CmpOpc); + + // TODO: handle this properly. The second block may kill those registers. + Cmp->getOperand(0).setIsKill(false); + Cmp->getOperand(1).setIsKill(false); + + auto VCmpX = BuildMI(*MBB, FirstInstruction, CmpLoc, TII->get(CmpOpc)); + VCmpX->addOperand(Cmp->getOperand(0)); + VCmpX->addOperand(Cmp->getOperand(1)); + + // restore exec + TII->restoreExec(MF, *MBB, MBB->end(), DebugLoc(), ExecBackup); + } + + ~ExecPredicate() override = default; +}; + +const char PassName[] = "AMDGPU If Conversion"; + +struct AMDGPUIfConverter : MachineFunctionPass { + static char ID; + AMDGPUIfConverter() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + StringRef getPassName() const override { return PassName; } +}; + +char AMDGPUIfConverter::ID = 0; + +void AMDGPUIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool AMDGPUIfConverter::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + const auto &STI = MF.getSubtarget(); + if (!STI.hasGFX10_3Insts()) + return false; + + const SIInstrInfo *TII = STI.getInstrInfo(); + auto *DomTree = &getAnalysis().getDomTree(); + auto *Loops = &getAnalysis().getLI(); + + ExecPredicate Predicate(TII); + SSAIfConv IfConv(Predicate, MF, DomTree, Loops); + return IfConv.run(); +} +} // namespace +char &llvm::AMDGPUIfConverterID = AMDGPUIfConverter::ID; +INITIALIZE_PASS_BEGIN(AMDGPUIfConverter, DEBUG_TYPE, PassName, false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(AMDGPUIfConverter, DEBUG_TYPE, PassName, false, false) \ No newline at end of file diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 9c9c505139373..a7f30f502bb50 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -203,11 +203,6 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR( "fast", "fast register allocator", createFastVGPRRegisterAllocator); } // anonymous namespace -static cl::opt -EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, - cl::desc("Run early if-conversion"), - cl::init(false)); - static cl::opt OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), @@ -447,6 +442,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURewriteUndefForPHILegacyPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); initializeSIAnnotateControlFlowLegacyPass(*PR); + initializeAMDGPUIfConverterPass(*PR); initializeAMDGPUInsertSingleUseVDSTPass(*PR); initializeAMDGPUInsertDelayAluPass(*PR); initializeSIInsertHardClausesPass(*PR); @@ -1285,9 +1281,7 @@ void GCNPassConfig::addMachineSSAOptimization() { } bool GCNPassConfig::addILPOpts() { - if (EnableEarlyIfConversion) - addPass(&EarlyIfConverterID); - + addPass(&AMDGPUIfConverterID); TargetPassConfig::addILPOpts(); return false; } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index e813653158e5d..a33ecbfd691a2 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -59,6 +59,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUGlobalISelDivergenceLowering.cpp AMDGPUGlobalISelUtils.cpp AMDGPUHSAMetadataStreamer.cpp + AMDGPUIfConverter.cpp AMDGPUInsertDelayAlu.cpp AMDGPUInstCombineIntrinsic.cpp AMDGPUInstrInfo.cpp diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-if-cvt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-if-cvt.ll new file mode 100644 index 0000000000000..24b4d9b5de27f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-if-cvt.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @scalar_cmp(i32 noundef %value, ptr addrspace(8) nocapture writeonly %res, i32 noundef %v_offset, i32 noundef %0, i32 noundef %flag) { +; GCN-LABEL: scalar_cmp: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s0, s[2:3], 0x4c +; GCN-NEXT: s_or_saveexec_b32 s105, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_cmpx_ge_i32_e64 s0, 1 +; GCN-NEXT: s_clause 0x2 +; GCN-NEXT: s_load_dword s4, s[2:3], 0x24 +; GCN-NEXT: s_load_dword s5, s[2:3], 0x44 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x34 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GCN-NEXT: s_endpgm +entry: + %cmp = icmp sgt i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + ret void +} + +define amdgpu_kernel void @vec_cmp(i32 noundef %value, ptr addrspace(8) nocapture writeonly %res, i32 noundef %v_offset, i32 noundef %0, i32 noundef %flag) { +; GCN-LABEL: vec_cmp: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s0, s[2:3], 0x4c +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_cmp_gt_u32_e32 vcc_lo, s0, v0 +; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GCN-NEXT: s_cbranch_execz .LBB1_2 +; GCN-NEXT: ; %bb.1: ; %if.then +; GCN-NEXT: s_clause 0x2 +; GCN-NEXT: s_load_dword s4, s[2:3], 0x24 +; GCN-NEXT: s_load_dword s5, s[2:3], 0x44 +; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x34 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen +; GCN-NEXT: .LBB1_2: ; %if.end +; GCN-NEXT: s_endpgm +entry: + %1 = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x() + %cmp = icmp ult i32 %1, %flag + br i1 %cmp, label %if.then, label %if.end + +if.then: + tail call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %value, ptr addrspace(8) %res, i32 %v_offset, i32 0, i32 0) + br label %if.end + +if.end: + ret void +} + +declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture writeonly, i32, i32, i32 immarg) +declare i32 @llvm.amdgcn.workitem.id.x()