diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 1caed93b1b668..14cdcc956aa5d 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -131,7 +131,7 @@ class SelectInstToUnfold { explicit operator bool() const { return SI && SIUse; } }; -void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, +void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, std::vector *NewSIsToUnfold, std::vector *NewBBs); @@ -142,6 +142,7 @@ class DFAJumpThreading { : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} bool run(Function &F); + bool LoopInfoBroken; private: void @@ -157,7 +158,7 @@ class DFAJumpThreading { std::vector NewSIsToUnfold; std::vector NewBBs; - unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); + unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); // Put newly discovered select instructions into the work list. for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) @@ -201,7 +202,7 @@ void createBasicBlockAndSinkSelectInst( /// created basic blocks into \p NewBBs. /// /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. -void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, +void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, std::vector *NewSIsToUnfold, std::vector *NewBBs) { SelectInst *SI = SIToUnfold.getInst(); @@ -307,6 +308,12 @@ void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, {DominatorTree::Insert, StartBlock, FT}}); + // Preserve loop info + if (Loop *L = LI->getLoopFor(SI->getParent())) { + for (BasicBlock *NewBB : *NewBBs) + L->addBasicBlockToLoop(NewBB, *LI); + } + // The select is now dead. assert(SI->use_empty() && "Select must be dead now"); SI->eraseFromParent(); @@ -522,9 +529,10 @@ struct MainSwitch { }; struct AllSwitchPaths { - AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) - : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), - ORE(ORE) {} + AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE, + LoopInfo *LI) + : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE), + LI(LI) {} std::vector &getThreadingPaths() { return TPaths; } unsigned getNumThreadingPaths() { return TPaths.size(); } @@ -596,6 +604,12 @@ struct AllSwitchPaths { Visited.insert(BB); + // Stop if we have reached the BB out of loop, since its successors have no + // impact on the DFA. + // TODO: Do we need to stop exploring if BB is the outer loop of the switch? + if (!LI->getLoopFor(BB)) + return Res; + // Some blocks have multiple edges to the same successor, and this set // is used to prevent a duplicate path from being generated SmallSet Successors; @@ -737,6 +751,7 @@ struct AllSwitchPaths { BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; std::vector TPaths; + LoopInfo *LI; }; struct TransformDFA { @@ -1283,6 +1298,7 @@ bool DFAJumpThreading::run(Function &F) { SmallVector ThreadableLoops; bool MadeChanges = false; + LoopInfoBroken = false; for (BasicBlock &BB : F) { auto *SI = dyn_cast(BB.getTerminator()); @@ -1304,7 +1320,7 @@ bool DFAJumpThreading::run(Function &F) { if (!Switch.getSelectInsts().empty()) MadeChanges = true; - AllSwitchPaths SwitchPaths(&Switch, ORE); + AllSwitchPaths SwitchPaths(&Switch, ORE, LI); SwitchPaths.run(); if (SwitchPaths.getNumThreadingPaths() > 0) { @@ -1315,10 +1331,15 @@ bool DFAJumpThreading::run(Function &F) { // strict requirement but it can cause buggy behavior if there is an // overlap of blocks in different opportunities. There is a lot of room to // experiment with catching more opportunities here. + // NOTE: To release this contraint, we must handle LoopInfo invalidation break; } } +#ifdef NDEBUG + LI->verify(*DT); +#endif + SmallPtrSet EphValues; if (ThreadableLoops.size() > 0) CodeMetrics::collectEphemeralValues(&F, AC, EphValues); @@ -1327,6 +1348,7 @@ bool DFAJumpThreading::run(Function &F) { TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); Transform.run(); MadeChanges = true; + LoopInfoBroken = true; } #ifdef EXPENSIVE_CHECKS @@ -1347,11 +1369,13 @@ PreservedAnalyses DFAJumpThreadingPass::run(Function &F, LoopInfo &LI = AM.getResult(F); TargetTransformInfo &TTI = AM.getResult(F); OptimizationRemarkEmitter ORE(&F); - - if (!DFAJumpThreading(&AC, &DT, &LI, &TTI, &ORE).run(F)) + DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); + if (!ThreadImpl.run(F)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + if (!ThreadImpl.LoopInfoBroken) + PA.preserve(); return PA; }