diff --git a/llvm/lib/Transforms/Coroutines/CMakeLists.txt b/llvm/lib/Transforms/Coroutines/CMakeLists.txt index ddc4caa82b0e9..c6508174a7f10 100644 --- a/llvm/lib/Transforms/Coroutines/CMakeLists.txt +++ b/llvm/lib/Transforms/Coroutines/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_component_library(LLVMCoroutines CoroFrame.cpp CoroSplit.cpp SuspendCrossingInfo.cpp + SpillUtils.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Coroutines diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 7a631a646f3f8..c6027a7de2f80 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -16,27 +16,22 @@ //===----------------------------------------------------------------------===// #include "CoroInternal.h" +#include "SpillUtils.h" #include "SuspendCrossingInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" -#include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/StackLifetime.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/IR/CFG.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/OptimizedStructLayout.h" -#include "llvm/Support/circular_raw_ostream.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -175,24 +170,17 @@ template <> struct GraphTraits { namespace { class FrameTypeBuilder; // Mapping from the to-be-spilled value to all the users that need reload. -using SpillInfo = SmallMapVector, 8>; -struct AllocaInfo { - AllocaInst *Alloca; - DenseMap> Aliases; - bool MayWriteBeforeCoroBegin; - AllocaInfo(AllocaInst *Alloca, - DenseMap> Aliases, - bool MayWriteBeforeCoroBegin) - : Alloca(Alloca), Aliases(std::move(Aliases)), - MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {} -}; struct FrameDataInfo { // All the values (that are not allocas) that needs to be spilled to the // frame. - SpillInfo Spills; + coro::SpillInfo &Spills; // Allocas contains all values defined as allocas that need to live in the // frame. - SmallVector Allocas; + SmallVectorImpl &Allocas; + + FrameDataInfo(coro::SpillInfo &Spills, + SmallVectorImpl &Allocas) + : Spills(Spills), Allocas(Allocas) {} SmallVector getAllDefs() const { SmallVector Defs; @@ -271,7 +259,7 @@ struct FrameDataInfo { } // namespace #ifndef NDEBUG -static void dumpSpills(StringRef Title, const SpillInfo &Spills) { +static void dumpSpills(StringRef Title, const coro::SpillInfo &Spills) { dbgs() << "------------- " << Title << " --------------\n"; for (const auto &E : Spills) { E.first->dump(); @@ -290,7 +278,7 @@ static void dumpRemats( } } -static void dumpAllocas(const SmallVectorImpl &Allocas) { +static void dumpAllocas(const SmallVectorImpl &Allocas) { dbgs() << "------------- Allocas --------------\n"; for (const auto &A : Allocas) { A.Alloca->dump(); @@ -538,7 +526,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F, return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( StackLifetimeAnalyzer.getLiveRange(AI2)); }; - auto GetAllocaSize = [&](const AllocaInfo &A) { + auto GetAllocaSize = [&](const coro::AllocaInfo &A) { std::optional RetSize = A.Alloca->getAllocationSize(DL); assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n"); assert(!RetSize->isScalable() && "Scalable vectors are not yet supported"); @@ -1119,367 +1107,6 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, return FrameTy; } -// We use a pointer use visitor to track how an alloca is being used. -// The goal is to be able to answer the following three questions: -// 1. Should this alloca be allocated on the frame instead. -// 2. Could the content of the alloca be modified prior to CoroBegn, which would -// require copying the data from alloca to the frame after CoroBegin. -// 3. Is there any alias created for this alloca prior to CoroBegin, but used -// after CoroBegin. In that case, we will need to recreate the alias after -// CoroBegin based off the frame. To answer question 1, we track two things: -// a. List of all BasicBlocks that use this alloca or any of the aliases of -// the alloca. In the end, we check if there exists any two basic blocks that -// cross suspension points. If so, this alloca must be put on the frame. b. -// Whether the alloca or any alias of the alloca is escaped at some point, -// either by storing the address somewhere, or the address is used in a -// function call that might capture. If it's ever escaped, this alloca must be -// put on the frame conservatively. -// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin. -// Whenever a potential write happens, either through a store instruction, a -// function call or any of the memory intrinsics, we check whether this -// instruction is prior to CoroBegin. To answer question 3, we track the offsets -// of all aliases created for the alloca prior to CoroBegin but used after -// CoroBegin. std::optional is used to be able to represent the case when the -// offset is unknown (e.g. when you have a PHINode that takes in different -// offset values). We cannot handle unknown offsets and will assert. This is the -// potential issue left out. An ideal solution would likely require a -// significant redesign. -namespace { -struct AllocaUseVisitor : PtrUseVisitor { - using Base = PtrUseVisitor; - AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, - const coro::Shape &CoroShape, - const SuspendCrossingInfo &Checker, - bool ShouldUseLifetimeStartInfo) - : PtrUseVisitor(DL), DT(DT), CoroShape(CoroShape), Checker(Checker), - ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) { - for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends) - CoroSuspendBBs.insert(SuspendInst->getParent()); - } - - void visit(Instruction &I) { - Users.insert(&I); - Base::visit(I); - // If the pointer is escaped prior to CoroBegin, we have to assume it would - // be written into before CoroBegin as well. - if (PI.isEscaped() && - !DT.dominates(CoroShape.CoroBegin, PI.getEscapingInst())) { - MayWriteBeforeCoroBegin = true; - } - } - // We need to provide this overload as PtrUseVisitor uses a pointer based - // visiting function. - void visit(Instruction *I) { return visit(*I); } - - void visitPHINode(PHINode &I) { - enqueueUsers(I); - handleAlias(I); - } - - void visitSelectInst(SelectInst &I) { - enqueueUsers(I); - handleAlias(I); - } - - void visitStoreInst(StoreInst &SI) { - // Regardless whether the alias of the alloca is the value operand or the - // pointer operand, we need to assume the alloca is been written. - handleMayWrite(SI); - - if (SI.getValueOperand() != U->get()) - return; - - // We are storing the pointer into a memory location, potentially escaping. - // As an optimization, we try to detect simple cases where it doesn't - // actually escape, for example: - // %ptr = alloca .. - // %addr = alloca .. - // store %ptr, %addr - // %x = load %addr - // .. - // If %addr is only used by loading from it, we could simply treat %x as - // another alias of %ptr, and not considering %ptr being escaped. - auto IsSimpleStoreThenLoad = [&]() { - auto *AI = dyn_cast(SI.getPointerOperand()); - // If the memory location we are storing to is not an alloca, it - // could be an alias of some other memory locations, which is difficult - // to analyze. - if (!AI) - return false; - // StoreAliases contains aliases of the memory location stored into. - SmallVector StoreAliases = {AI}; - while (!StoreAliases.empty()) { - Instruction *I = StoreAliases.pop_back_val(); - for (User *U : I->users()) { - // If we are loading from the memory location, we are creating an - // alias of the original pointer. - if (auto *LI = dyn_cast(U)) { - enqueueUsers(*LI); - handleAlias(*LI); - continue; - } - // If we are overriding the memory location, the pointer certainly - // won't escape. - if (auto *S = dyn_cast(U)) - if (S->getPointerOperand() == I) - continue; - if (auto *II = dyn_cast(U)) - if (II->isLifetimeStartOrEnd()) - continue; - // BitCastInst creats aliases of the memory location being stored - // into. - if (auto *BI = dyn_cast(U)) { - StoreAliases.push_back(BI); - continue; - } - return false; - } - } - - return true; - }; - - if (!IsSimpleStoreThenLoad()) - PI.setEscaped(&SI); - } - - // All mem intrinsics modify the data. - void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); } - - void visitBitCastInst(BitCastInst &BC) { - Base::visitBitCastInst(BC); - handleAlias(BC); - } - - void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { - Base::visitAddrSpaceCastInst(ASC); - handleAlias(ASC); - } - - void visitGetElementPtrInst(GetElementPtrInst &GEPI) { - // The base visitor will adjust Offset accordingly. - Base::visitGetElementPtrInst(GEPI); - handleAlias(GEPI); - } - - void visitIntrinsicInst(IntrinsicInst &II) { - // When we found the lifetime markers refers to a - // subrange of the original alloca, ignore the lifetime - // markers to avoid misleading the analysis. - if (!IsOffsetKnown || !Offset.isZero()) - return Base::visitIntrinsicInst(II); - switch (II.getIntrinsicID()) { - default: - return Base::visitIntrinsicInst(II); - case Intrinsic::lifetime_start: - LifetimeStarts.insert(&II); - LifetimeStartBBs.push_back(II.getParent()); - break; - case Intrinsic::lifetime_end: - LifetimeEndBBs.insert(II.getParent()); - break; - } - } - - void visitCallBase(CallBase &CB) { - for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op) - if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op)) - PI.setEscaped(&CB); - handleMayWrite(CB); - } - - bool getShouldLiveOnFrame() const { - if (!ShouldLiveOnFrame) - ShouldLiveOnFrame = computeShouldLiveOnFrame(); - return *ShouldLiveOnFrame; - } - - bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; } - - DenseMap> getAliasesCopy() const { - assert(getShouldLiveOnFrame() && "This method should only be called if the " - "alloca needs to live on the frame."); - for (const auto &P : AliasOffetMap) - if (!P.second) - report_fatal_error("Unable to handle an alias with unknown offset " - "created before CoroBegin."); - return AliasOffetMap; - } - -private: - const DominatorTree &DT; - const coro::Shape &CoroShape; - const SuspendCrossingInfo &Checker; - // All alias to the original AllocaInst, created before CoroBegin and used - // after CoroBegin. Each entry contains the instruction and the offset in the - // original Alloca. They need to be recreated after CoroBegin off the frame. - DenseMap> AliasOffetMap{}; - SmallPtrSet Users{}; - SmallPtrSet LifetimeStarts{}; - SmallVector LifetimeStartBBs{}; - SmallPtrSet LifetimeEndBBs{}; - SmallPtrSet CoroSuspendBBs{}; - bool MayWriteBeforeCoroBegin{false}; - bool ShouldUseLifetimeStartInfo{true}; - - mutable std::optional ShouldLiveOnFrame{}; - - bool computeShouldLiveOnFrame() const { - // If lifetime information is available, we check it first since it's - // more precise. We look at every pair of lifetime.start intrinsic and - // every basic block that uses the pointer to see if they cross suspension - // points. The uses cover both direct uses as well as indirect uses. - if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) { - // If there is no explicit lifetime.end, then assume the address can - // cross suspension points. - if (LifetimeEndBBs.empty()) - return true; - - // If there is a path from a lifetime.start to a suspend without a - // corresponding lifetime.end, then the alloca's lifetime persists - // beyond that suspension point and the alloca must go on the frame. - llvm::SmallVector Worklist(LifetimeStartBBs); - if (isManyPotentiallyReachableFromMany(Worklist, CoroSuspendBBs, - &LifetimeEndBBs, &DT)) - return true; - - // Addresses are guaranteed to be identical after every lifetime.start so - // we cannot use the local stack if the address escaped and there is a - // suspend point between lifetime markers. This should also cover the - // case of a single lifetime.start intrinsic in a loop with suspend point. - if (PI.isEscaped()) { - for (auto *A : LifetimeStarts) { - for (auto *B : LifetimeStarts) { - if (Checker.hasPathOrLoopCrossingSuspendPoint(A->getParent(), - B->getParent())) - return true; - } - } - } - return false; - } - // FIXME: Ideally the isEscaped check should come at the beginning. - // However there are a few loose ends that need to be fixed first before - // we can do that. We need to make sure we are not over-conservative, so - // that the data accessed in-between await_suspend and symmetric transfer - // is always put on the stack, and also data accessed after coro.end is - // always put on the stack (esp the return object). To fix that, we need - // to: - // 1) Potentially treat sret as nocapture in calls - // 2) Special handle the return object and put it on the stack - // 3) Utilize lifetime.end intrinsic - if (PI.isEscaped()) - return true; - - for (auto *U1 : Users) - for (auto *U2 : Users) - if (Checker.isDefinitionAcrossSuspend(*U1, U2)) - return true; - - return false; - } - - void handleMayWrite(const Instruction &I) { - if (!DT.dominates(CoroShape.CoroBegin, &I)) - MayWriteBeforeCoroBegin = true; - } - - bool usedAfterCoroBegin(Instruction &I) { - for (auto &U : I.uses()) - if (DT.dominates(CoroShape.CoroBegin, U)) - return true; - return false; - } - - void handleAlias(Instruction &I) { - // We track all aliases created prior to CoroBegin but used after. - // These aliases may need to be recreated after CoroBegin if the alloca - // need to live on the frame. - if (DT.dominates(CoroShape.CoroBegin, &I) || !usedAfterCoroBegin(I)) - return; - - if (!IsOffsetKnown) { - AliasOffetMap[&I].reset(); - } else { - auto Itr = AliasOffetMap.find(&I); - if (Itr == AliasOffetMap.end()) { - AliasOffetMap[&I] = Offset; - } else if (Itr->second && *Itr->second != Offset) { - // If we have seen two different possible values for this alias, we set - // it to empty. - AliasOffetMap[&I].reset(); - } - } - } -}; -} // namespace - -// We need to make room to insert a spill after initial PHIs, but before -// catchswitch instruction. Placing it before violates the requirement that -// catchswitch, like all other EHPads must be the first nonPHI in a block. -// -// Split away catchswitch into a separate block and insert in its place: -// -// cleanuppad cleanupret. -// -// cleanupret instruction will act as an insert point for the spill. -static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { - BasicBlock *CurrentBlock = CatchSwitch->getParent(); - BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); - CurrentBlock->getTerminator()->eraseFromParent(); - - auto *CleanupPad = - CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); - auto *CleanupRet = - CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); - return CleanupRet; -} - -static BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, - Value *Def, - const DominatorTree &DT) { - BasicBlock::iterator InsertPt; - if (auto *Arg = dyn_cast(Def)) { - // For arguments, we will place the store instruction right after - // the coroutine frame pointer instruction, i.e. coro.begin. - InsertPt = Shape.getInsertPtAfterFramePtr(); - - // If we're spilling an Argument, make sure we clear 'nocapture' - // from the coroutine function. - Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); - } else if (auto *CSI = dyn_cast(Def)) { - // Don't spill immediately after a suspend; splitting assumes - // that the suspend will be followed by a branch. - InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt(); - } else { - auto *I = cast(Def); - if (!DT.dominates(Shape.CoroBegin, I)) { - // If it is not dominated by CoroBegin, then spill should be - // inserted immediately after CoroFrame is computed. - InsertPt = Shape.getInsertPtAfterFramePtr(); - } else if (auto *II = dyn_cast(I)) { - // If we are spilling the result of the invoke instruction, split - // the normal edge and insert the spill in the new block. - auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); - InsertPt = NewBB->getTerminator()->getIterator(); - } else if (isa(I)) { - // Skip the PHINodes and EH pads instructions. - BasicBlock *DefBlock = I->getParent(); - if (auto *CSI = dyn_cast(DefBlock->getTerminator())) - InsertPt = splitBeforeCatchSwitch(CSI)->getIterator(); - else - InsertPt = DefBlock->getFirstInsertionPt(); - } else { - assert(!I->isTerminator() && "unexpected terminator"); - // For all other values, the spill is placed immediately after - // the definition. - InsertPt = I->getNextNode()->getIterator(); - } - } - - return InsertPt; -} - // Replace all alloca and SSA values that are accessed across suspend points // with GetElementPointer from coroutine frame + loads and stores. Create an // AllocaSpillBB that will become the new entry block for the resume parts of @@ -1564,7 +1191,7 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { auto SpillAlignment = Align(FrameData.getAlign(Def)); // Create a store instruction storing the value into the // coroutine frame. - BasicBlock::iterator InsertPt = getSpillInsertionPt(Shape, Def, DT); + BasicBlock::iterator InsertPt = coro::getSpillInsertionPt(Shape, Def, DT); Type *ByValTy = nullptr; if (auto *Arg = dyn_cast(Def)) { @@ -2015,13 +1642,6 @@ bool coro::defaultMaterializable(Instruction &V) { isa(&V) || isa(&V) || isa(&V)); } -// Check for structural coroutine intrinsics that should not be spilled into -// the coroutine frame. -static bool isCoroutineStructureIntrinsic(Instruction &I) { - return isa(&I) || isa(&I) || - isa(&I); -} - // For each instruction identified as materializable across the suspend point, // and its associated DAG of other rematerializable instructions, // recreate the DAG of instructions after the suspend point. @@ -2125,45 +1745,6 @@ static bool isSuspendBlock(BasicBlock *BB) { return isa(BB->front()); } -typedef SmallPtrSet VisitedBlocksSet; - -/// Does control flow starting at the given block ever reach a suspend -/// instruction before reaching a block in VisitedOrFreeBBs? -static bool isSuspendReachableFrom(BasicBlock *From, - VisitedBlocksSet &VisitedOrFreeBBs) { - // Eagerly try to add this block to the visited set. If it's already - // there, stop recursing; this path doesn't reach a suspend before - // either looping or reaching a freeing block. - if (!VisitedOrFreeBBs.insert(From).second) - return false; - - // We assume that we'll already have split suspends into their own blocks. - if (isSuspendBlock(From)) - return true; - - // Recurse on the successors. - for (auto *Succ : successors(From)) { - if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) - return true; - } - - return false; -} - -/// Is the given alloca "local", i.e. bounded in lifetime to not cross a -/// suspend point? -static bool isLocalAlloca(CoroAllocaAllocInst *AI) { - // Seed the visited set with all the basic blocks containing a free - // so that we won't pass them up. - VisitedBlocksSet VisitedOrFreeBBs; - for (auto *User : AI->users()) { - if (auto FI = dyn_cast(User)) - VisitedOrFreeBBs.insert(FI->getParent()); - } - - return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); -} - /// After we split the coroutine, will the given basic block be along /// an obvious exit path for the resumption function? static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, @@ -2240,33 +1821,6 @@ static void lowerLocalAllocas(ArrayRef LocalAllocas, } } -/// Turn the given coro.alloca.alloc call into a dynamic allocation. -/// This happens during the all-instructions iteration, so it must not -/// delete the call. -static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, - coro::Shape &Shape, - SmallVectorImpl &DeadInsts) { - IRBuilder<> Builder(AI); - auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); - - for (User *U : AI->users()) { - if (isa(U)) { - U->replaceAllUsesWith(Alloc); - } else { - auto FI = cast(U); - Builder.SetInsertPoint(FI); - Shape.emitDealloc(Builder, Alloc, nullptr); - } - DeadInsts.push_back(cast(U)); - } - - // Push this on last so that it gets deleted after all the others. - DeadInsts.push_back(AI); - - // Return the new allocation value so that we can check for needed spills. - return cast(Alloc); -} - /// Get the current swifterror value. static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, coro::Shape &Shape) { @@ -2425,51 +1979,6 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) { } } -/// Async and Retcon{Once} conventions assume that all spill uses can be sunk -/// after the coro.begin intrinsic. -static void sinkSpillUsesAfterCoroBegin(Function &F, - const FrameDataInfo &FrameData, - CoroBeginInst *CoroBegin) { - DominatorTree Dom(F); - - SmallSetVector ToMove; - SmallVector Worklist; - - // Collect all users that precede coro.begin. - for (auto *Def : FrameData.getAllDefs()) { - for (User *U : Def->users()) { - auto Inst = cast(U); - if (Inst->getParent() != CoroBegin->getParent() || - Dom.dominates(CoroBegin, Inst)) - continue; - if (ToMove.insert(Inst)) - Worklist.push_back(Inst); - } - } - // Recursively collect users before coro.begin. - while (!Worklist.empty()) { - auto *Def = Worklist.pop_back_val(); - for (User *U : Def->users()) { - auto Inst = cast(U); - if (Dom.dominates(CoroBegin, Inst)) - continue; - if (ToMove.insert(Inst)) - Worklist.push_back(Inst); - } - } - - // Sort by dominance. - SmallVector InsertionList(ToMove.begin(), ToMove.end()); - llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool { - // If a dominates b it should precede (<) b. - return Dom.dominates(A, B); - }); - - Instruction *InsertPt = CoroBegin->getNextNode(); - for (Instruction *Inst : InsertionList) - Inst->moveBefore(InsertPt); -} - /// For each local variable that all of its user are only used inside one of /// suspended region, we sink their lifetime.start markers to the place where /// after the suspend block. Doing so minimizes the lifetime of each variable, @@ -2552,38 +2061,6 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, } } -static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape, - const SuspendCrossingInfo &Checker, - SmallVectorImpl &Allocas, - const DominatorTree &DT) { - if (Shape.CoroSuspends.empty()) - return; - - // The PromiseAlloca will be specially handled since it needs to be in a - // fixed position in the frame. - if (AI == Shape.SwitchLowering.PromiseAlloca) - return; - - // The __coro_gro alloca should outlive the promise, make sure we - // keep it outside the frame. - if (AI->hasMetadata(LLVMContext::MD_coro_outside_frame)) - return; - - // The code that uses lifetime.start intrinsic does not work for functions - // with loops without exit. Disable it on ABIs we know to generate such - // code. - bool ShouldUseLifetimeStartInfo = - (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && - Shape.ABI != coro::ABI::RetconOnce); - AllocaUseVisitor Visitor{AI->getDataLayout(), DT, Shape, Checker, - ShouldUseLifetimeStartInfo}; - Visitor.visitPtr(*AI); - if (!Visitor.getShouldLiveOnFrame()) - return; - Allocas.emplace_back(AI, Visitor.getAliasesCopy(), - Visitor.getMayWriteBeforeCoroBegin()); -} - static std::optional> salvageDebugInfoImpl(SmallDenseMap &ArgToAllocaMap, bool UseEntryValue, Function *F, Value *Storage, @@ -2754,7 +2231,7 @@ static void doRematerializations( if (F.hasOptNone()) return; - SpillInfo Spills; + coro::SpillInfo Spills; // See if there are materializable instructions across suspend points // We record these as the starting point to also identify materializable @@ -2866,90 +2343,32 @@ void coro::buildCoroutineFrame( doRematerializations(F, Checker, MaterializableCallback); const DominatorTree DT(F); - FrameDataInfo FrameData; - SmallVector LocalAllocas; - SmallVector DeadInstructions; if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && Shape.ABI != coro::ABI::RetconOnce) sinkLifetimeStartMarkers(F, Shape, Checker, DT); - // Collect the spills for arguments and other not-materializable values. - for (Argument &A : F.args()) - for (User *U : A.users()) - if (Checker.isDefinitionAcrossSuspend(A, U)) - FrameData.Spills[&A].push_back(cast(U)); - - for (Instruction &I : instructions(F)) { - // Values returned from coroutine structure intrinsics should not be part - // of the Coroutine Frame. - if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) - continue; - - // Handle alloca.alloc specially here. - if (auto AI = dyn_cast(&I)) { - // Check whether the alloca's lifetime is bounded by suspend points. - if (isLocalAlloca(AI)) { - LocalAllocas.push_back(AI); - continue; - } - - // If not, do a quick rewrite of the alloca and then add spills of - // the rewritten value. The rewrite doesn't invalidate anything in - // Spills because the other alloca intrinsics have no other operands - // besides AI, and it doesn't invalidate the iteration because we delay - // erasing AI. - auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); - - for (User *U : Alloc->users()) { - if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) - FrameData.Spills[Alloc].push_back(cast(U)); - } - continue; - } - - // Ignore alloca.get; we process this as part of coro.alloca.alloc. - if (isa(I)) - continue; - - if (auto *AI = dyn_cast(&I)) { - collectFrameAlloca(AI, Shape, Checker, FrameData.Allocas, DT); - continue; - } - - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) { - // We cannot spill a token. - if (I.getType()->isTokenTy()) - report_fatal_error( - "token definition is separated from the use by a suspend point"); - FrameData.Spills[&I].push_back(cast(U)); - } - } + // All values (that are not allocas) that needs to be spilled to the frame. + coro::SpillInfo Spills; + // All values defined as allocas that need to live in the frame. + SmallVector Allocas; - LLVM_DEBUG(dumpAllocas(FrameData.Allocas)); + // Collect the spills for arguments and other not-materializable values. + coro::collectSpillsFromArgs(Spills, F, Checker); + SmallVector DeadInstructions; + SmallVector LocalAllocas; + coro::collectSpillsAndAllocasFromInsts(Spills, Allocas, DeadInstructions, + LocalAllocas, F, Checker, DT, Shape); + coro::collectSpillsFromDbgInfo(Spills, F, Checker); - // We don't want the layout of coroutine frame to be affected - // by debug information. So we only choose to salvage DbgValueInst for - // whose value is already in the frame. - // We would handle the dbg.values for allocas specially - for (auto &Iter : FrameData.Spills) { - auto *V = Iter.first; - SmallVector DVIs; - SmallVector DVRs; - findDbgValues(DVIs, V, &DVRs); - for (DbgValueInst *DVI : DVIs) - if (Checker.isDefinitionAcrossSuspend(*V, DVI)) - FrameData.Spills[V].push_back(DVI); - // Add the instructions which carry debug info that is in the frame. - for (DbgVariableRecord *DVR : DVRs) - if (Checker.isDefinitionAcrossSuspend(*V, DVR->Marker->MarkedInstr)) - FrameData.Spills[V].push_back(DVR->Marker->MarkedInstr); - } + LLVM_DEBUG(dumpAllocas(Allocas)); + LLVM_DEBUG(dumpSpills("Spills", Spills)); - LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills)); if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async) - sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin); + sinkSpillUsesAfterCoroBegin(DT, Shape.CoroBegin, Spills, Allocas); + + // Build frame + FrameDataInfo FrameData(Spills, Allocas); Shape.FrameTy = buildFrameType(F, Shape, FrameData); Shape.FramePtr = Shape.CoroBegin; // For now, this works for C++ programs only. diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.cpp b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp new file mode 100644 index 0000000000000..d71b0a336d471 --- /dev/null +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.cpp @@ -0,0 +1,631 @@ +//===- SpillUtils.cpp - Utilities for checking for spills ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SpillUtils.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +namespace llvm { + +namespace coro { + +namespace { + +typedef SmallPtrSet VisitedBlocksSet; + +static bool isSuspendBlock(BasicBlock *BB) { + return isa(BB->front()); +} + +// Check for structural coroutine intrinsics that should not be spilled into +// the coroutine frame. +static bool isCoroutineStructureIntrinsic(Instruction &I) { + return isa(&I) || isa(&I) || + isa(&I); +} + +/// Does control flow starting at the given block ever reach a suspend +/// instruction before reaching a block in VisitedOrFreeBBs? +static bool isSuspendReachableFrom(BasicBlock *From, + VisitedBlocksSet &VisitedOrFreeBBs) { + // Eagerly try to add this block to the visited set. If it's already + // there, stop recursing; this path doesn't reach a suspend before + // either looping or reaching a freeing block. + if (!VisitedOrFreeBBs.insert(From).second) + return false; + + // We assume that we'll already have split suspends into their own blocks. + if (isSuspendBlock(From)) + return true; + + // Recurse on the successors. + for (auto *Succ : successors(From)) { + if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) + return true; + } + + return false; +} + +/// Is the given alloca "local", i.e. bounded in lifetime to not cross a +/// suspend point? +static bool isLocalAlloca(CoroAllocaAllocInst *AI) { + // Seed the visited set with all the basic blocks containing a free + // so that we won't pass them up. + VisitedBlocksSet VisitedOrFreeBBs; + for (auto *User : AI->users()) { + if (auto FI = dyn_cast(User)) + VisitedOrFreeBBs.insert(FI->getParent()); + } + + return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); +} + +/// Turn the given coro.alloca.alloc call into a dynamic allocation. +/// This happens during the all-instructions iteration, so it must not +/// delete the call. +static Instruction * +lowerNonLocalAlloca(CoroAllocaAllocInst *AI, const coro::Shape &Shape, + SmallVectorImpl &DeadInsts) { + IRBuilder<> Builder(AI); + auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); + + for (User *U : AI->users()) { + if (isa(U)) { + U->replaceAllUsesWith(Alloc); + } else { + auto FI = cast(U); + Builder.SetInsertPoint(FI); + Shape.emitDealloc(Builder, Alloc, nullptr); + } + DeadInsts.push_back(cast(U)); + } + + // Push this on last so that it gets deleted after all the others. + DeadInsts.push_back(AI); + + // Return the new allocation value so that we can check for needed spills. + return cast(Alloc); +} + +// We need to make room to insert a spill after initial PHIs, but before +// catchswitch instruction. Placing it before violates the requirement that +// catchswitch, like all other EHPads must be the first nonPHI in a block. +// +// Split away catchswitch into a separate block and insert in its place: +// +// cleanuppad cleanupret. +// +// cleanupret instruction will act as an insert point for the spill. +static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { + BasicBlock *CurrentBlock = CatchSwitch->getParent(); + BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); + CurrentBlock->getTerminator()->eraseFromParent(); + + auto *CleanupPad = + CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); + auto *CleanupRet = + CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); + return CleanupRet; +} + +// We use a pointer use visitor to track how an alloca is being used. +// The goal is to be able to answer the following three questions: +// 1. Should this alloca be allocated on the frame instead. +// 2. Could the content of the alloca be modified prior to CoroBegn, which would +// require copying the data from alloca to the frame after CoroBegin. +// 3. Is there any alias created for this alloca prior to CoroBegin, but used +// after CoroBegin. In that case, we will need to recreate the alias after +// CoroBegin based off the frame. To answer question 1, we track two things: +// a. List of all BasicBlocks that use this alloca or any of the aliases of +// the alloca. In the end, we check if there exists any two basic blocks that +// cross suspension points. If so, this alloca must be put on the frame. b. +// Whether the alloca or any alias of the alloca is escaped at some point, +// either by storing the address somewhere, or the address is used in a +// function call that might capture. If it's ever escaped, this alloca must be +// put on the frame conservatively. +// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin. +// Whenever a potential write happens, either through a store instruction, a +// function call or any of the memory intrinsics, we check whether this +// instruction is prior to CoroBegin. To answer question 3, we track the offsets +// of all aliases created for the alloca prior to CoroBegin but used after +// CoroBegin. std::optional is used to be able to represent the case when the +// offset is unknown (e.g. when you have a PHINode that takes in different +// offset values). We cannot handle unknown offsets and will assert. This is the +// potential issue left out. An ideal solution would likely require a +// significant redesign. +namespace { +struct AllocaUseVisitor : PtrUseVisitor { + using Base = PtrUseVisitor; + AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, + const coro::Shape &CoroShape, + const SuspendCrossingInfo &Checker, + bool ShouldUseLifetimeStartInfo) + : PtrUseVisitor(DL), DT(DT), CoroShape(CoroShape), Checker(Checker), + ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) { + for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends) + CoroSuspendBBs.insert(SuspendInst->getParent()); + } + + void visit(Instruction &I) { + Users.insert(&I); + Base::visit(I); + // If the pointer is escaped prior to CoroBegin, we have to assume it would + // be written into before CoroBegin as well. + if (PI.isEscaped() && + !DT.dominates(CoroShape.CoroBegin, PI.getEscapingInst())) { + MayWriteBeforeCoroBegin = true; + } + } + // We need to provide this overload as PtrUseVisitor uses a pointer based + // visiting function. + void visit(Instruction *I) { return visit(*I); } + + void visitPHINode(PHINode &I) { + enqueueUsers(I); + handleAlias(I); + } + + void visitSelectInst(SelectInst &I) { + enqueueUsers(I); + handleAlias(I); + } + + void visitStoreInst(StoreInst &SI) { + // Regardless whether the alias of the alloca is the value operand or the + // pointer operand, we need to assume the alloca is been written. + handleMayWrite(SI); + + if (SI.getValueOperand() != U->get()) + return; + + // We are storing the pointer into a memory location, potentially escaping. + // As an optimization, we try to detect simple cases where it doesn't + // actually escape, for example: + // %ptr = alloca .. + // %addr = alloca .. + // store %ptr, %addr + // %x = load %addr + // .. + // If %addr is only used by loading from it, we could simply treat %x as + // another alias of %ptr, and not considering %ptr being escaped. + auto IsSimpleStoreThenLoad = [&]() { + auto *AI = dyn_cast(SI.getPointerOperand()); + // If the memory location we are storing to is not an alloca, it + // could be an alias of some other memory locations, which is difficult + // to analyze. + if (!AI) + return false; + // StoreAliases contains aliases of the memory location stored into. + SmallVector StoreAliases = {AI}; + while (!StoreAliases.empty()) { + Instruction *I = StoreAliases.pop_back_val(); + for (User *U : I->users()) { + // If we are loading from the memory location, we are creating an + // alias of the original pointer. + if (auto *LI = dyn_cast(U)) { + enqueueUsers(*LI); + handleAlias(*LI); + continue; + } + // If we are overriding the memory location, the pointer certainly + // won't escape. + if (auto *S = dyn_cast(U)) + if (S->getPointerOperand() == I) + continue; + if (auto *II = dyn_cast(U)) + if (II->isLifetimeStartOrEnd()) + continue; + // BitCastInst creats aliases of the memory location being stored + // into. + if (auto *BI = dyn_cast(U)) { + StoreAliases.push_back(BI); + continue; + } + return false; + } + } + + return true; + }; + + if (!IsSimpleStoreThenLoad()) + PI.setEscaped(&SI); + } + + // All mem intrinsics modify the data. + void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); } + + void visitBitCastInst(BitCastInst &BC) { + Base::visitBitCastInst(BC); + handleAlias(BC); + } + + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + Base::visitAddrSpaceCastInst(ASC); + handleAlias(ASC); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + // The base visitor will adjust Offset accordingly. + Base::visitGetElementPtrInst(GEPI); + handleAlias(GEPI); + } + + void visitIntrinsicInst(IntrinsicInst &II) { + // When we found the lifetime markers refers to a + // subrange of the original alloca, ignore the lifetime + // markers to avoid misleading the analysis. + if (!IsOffsetKnown || !Offset.isZero()) + return Base::visitIntrinsicInst(II); + switch (II.getIntrinsicID()) { + default: + return Base::visitIntrinsicInst(II); + case Intrinsic::lifetime_start: + LifetimeStarts.insert(&II); + LifetimeStartBBs.push_back(II.getParent()); + break; + case Intrinsic::lifetime_end: + LifetimeEndBBs.insert(II.getParent()); + break; + } + } + + void visitCallBase(CallBase &CB) { + for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op) + if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op)) + PI.setEscaped(&CB); + handleMayWrite(CB); + } + + bool getShouldLiveOnFrame() const { + if (!ShouldLiveOnFrame) + ShouldLiveOnFrame = computeShouldLiveOnFrame(); + return *ShouldLiveOnFrame; + } + + bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; } + + DenseMap> getAliasesCopy() const { + assert(getShouldLiveOnFrame() && "This method should only be called if the " + "alloca needs to live on the frame."); + for (const auto &P : AliasOffetMap) + if (!P.second) + report_fatal_error("Unable to handle an alias with unknown offset " + "created before CoroBegin."); + return AliasOffetMap; + } + +private: + const DominatorTree &DT; + const coro::Shape &CoroShape; + const SuspendCrossingInfo &Checker; + // All alias to the original AllocaInst, created before CoroBegin and used + // after CoroBegin. Each entry contains the instruction and the offset in the + // original Alloca. They need to be recreated after CoroBegin off the frame. + DenseMap> AliasOffetMap{}; + SmallPtrSet Users{}; + SmallPtrSet LifetimeStarts{}; + SmallVector LifetimeStartBBs{}; + SmallPtrSet LifetimeEndBBs{}; + SmallPtrSet CoroSuspendBBs{}; + bool MayWriteBeforeCoroBegin{false}; + bool ShouldUseLifetimeStartInfo{true}; + + mutable std::optional ShouldLiveOnFrame{}; + + bool computeShouldLiveOnFrame() const { + // If lifetime information is available, we check it first since it's + // more precise. We look at every pair of lifetime.start intrinsic and + // every basic block that uses the pointer to see if they cross suspension + // points. The uses cover both direct uses as well as indirect uses. + if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) { + // If there is no explicit lifetime.end, then assume the address can + // cross suspension points. + if (LifetimeEndBBs.empty()) + return true; + + // If there is a path from a lifetime.start to a suspend without a + // corresponding lifetime.end, then the alloca's lifetime persists + // beyond that suspension point and the alloca must go on the frame. + llvm::SmallVector Worklist(LifetimeStartBBs); + if (isManyPotentiallyReachableFromMany(Worklist, CoroSuspendBBs, + &LifetimeEndBBs, &DT)) + return true; + + // Addresses are guaranteed to be identical after every lifetime.start so + // we cannot use the local stack if the address escaped and there is a + // suspend point between lifetime markers. This should also cover the + // case of a single lifetime.start intrinsic in a loop with suspend point. + if (PI.isEscaped()) { + for (auto *A : LifetimeStarts) { + for (auto *B : LifetimeStarts) { + if (Checker.hasPathOrLoopCrossingSuspendPoint(A->getParent(), + B->getParent())) + return true; + } + } + } + return false; + } + // FIXME: Ideally the isEscaped check should come at the beginning. + // However there are a few loose ends that need to be fixed first before + // we can do that. We need to make sure we are not over-conservative, so + // that the data accessed in-between await_suspend and symmetric transfer + // is always put on the stack, and also data accessed after coro.end is + // always put on the stack (esp the return object). To fix that, we need + // to: + // 1) Potentially treat sret as nocapture in calls + // 2) Special handle the return object and put it on the stack + // 3) Utilize lifetime.end intrinsic + if (PI.isEscaped()) + return true; + + for (auto *U1 : Users) + for (auto *U2 : Users) + if (Checker.isDefinitionAcrossSuspend(*U1, U2)) + return true; + + return false; + } + + void handleMayWrite(const Instruction &I) { + if (!DT.dominates(CoroShape.CoroBegin, &I)) + MayWriteBeforeCoroBegin = true; + } + + bool usedAfterCoroBegin(Instruction &I) { + for (auto &U : I.uses()) + if (DT.dominates(CoroShape.CoroBegin, U)) + return true; + return false; + } + + void handleAlias(Instruction &I) { + // We track all aliases created prior to CoroBegin but used after. + // These aliases may need to be recreated after CoroBegin if the alloca + // need to live on the frame. + if (DT.dominates(CoroShape.CoroBegin, &I) || !usedAfterCoroBegin(I)) + return; + + if (!IsOffsetKnown) { + AliasOffetMap[&I].reset(); + } else { + auto Itr = AliasOffetMap.find(&I); + if (Itr == AliasOffetMap.end()) { + AliasOffetMap[&I] = Offset; + } else if (Itr->second && *Itr->second != Offset) { + // If we have seen two different possible values for this alias, we set + // it to empty. + AliasOffetMap[&I].reset(); + } + } + } +}; +} // namespace + +static void collectFrameAlloca(AllocaInst *AI, const coro::Shape &Shape, + const SuspendCrossingInfo &Checker, + SmallVectorImpl &Allocas, + const DominatorTree &DT) { + if (Shape.CoroSuspends.empty()) + return; + + // The PromiseAlloca will be specially handled since it needs to be in a + // fixed position in the frame. + if (AI == Shape.SwitchLowering.PromiseAlloca) + return; + + // The __coro_gro alloca should outlive the promise, make sure we + // keep it outside the frame. + if (AI->hasMetadata(LLVMContext::MD_coro_outside_frame)) + return; + + // The code that uses lifetime.start intrinsic does not work for functions + // with loops without exit. Disable it on ABIs we know to generate such + // code. + bool ShouldUseLifetimeStartInfo = + (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon && + Shape.ABI != coro::ABI::RetconOnce); + AllocaUseVisitor Visitor{AI->getDataLayout(), DT, Shape, Checker, + ShouldUseLifetimeStartInfo}; + Visitor.visitPtr(*AI); + if (!Visitor.getShouldLiveOnFrame()) + return; + Allocas.emplace_back(AI, Visitor.getAliasesCopy(), + Visitor.getMayWriteBeforeCoroBegin()); +} + +} // namespace + +void collectSpillsFromArgs(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { + // Collect the spills for arguments and other not-materializable values. + for (Argument &A : F.args()) + for (User *U : A.users()) + if (Checker.isDefinitionAcrossSuspend(A, U)) + Spills[&A].push_back(cast(U)); +} + +void collectSpillsAndAllocasFromInsts( + SpillInfo &Spills, SmallVector &Allocas, + SmallVector &DeadInstructions, + SmallVector &LocalAllocas, Function &F, + const SuspendCrossingInfo &Checker, const DominatorTree &DT, + const coro::Shape &Shape) { + + for (Instruction &I : instructions(F)) { + // Values returned from coroutine structure intrinsics should not be part + // of the Coroutine Frame. + if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) + continue; + + // Handle alloca.alloc specially here. + if (auto AI = dyn_cast(&I)) { + // Check whether the alloca's lifetime is bounded by suspend points. + if (isLocalAlloca(AI)) { + LocalAllocas.push_back(AI); + continue; + } + + // If not, do a quick rewrite of the alloca and then add spills of + // the rewritten value. The rewrite doesn't invalidate anything in + // Spills because the other alloca intrinsics have no other operands + // besides AI, and it doesn't invalidate the iteration because we delay + // erasing AI. + auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); + + for (User *U : Alloc->users()) { + if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) + Spills[Alloc].push_back(cast(U)); + } + continue; + } + + // Ignore alloca.get; we process this as part of coro.alloca.alloc. + if (isa(I)) + continue; + + if (auto *AI = dyn_cast(&I)) { + collectFrameAlloca(AI, Shape, Checker, Allocas, DT); + continue; + } + + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) { + // We cannot spill a token. + if (I.getType()->isTokenTy()) + report_fatal_error( + "token definition is separated from the use by a suspend point"); + Spills[&I].push_back(cast(U)); + } + } +} + +void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker) { + // We don't want the layout of coroutine frame to be affected + // by debug information. So we only choose to salvage DbgValueInst for + // whose value is already in the frame. + // We would handle the dbg.values for allocas specially + for (auto &Iter : Spills) { + auto *V = Iter.first; + SmallVector DVIs; + SmallVector DVRs; + findDbgValues(DVIs, V, &DVRs); + for (DbgValueInst *DVI : DVIs) + if (Checker.isDefinitionAcrossSuspend(*V, DVI)) + Spills[V].push_back(DVI); + // Add the instructions which carry debug info that is in the frame. + for (DbgVariableRecord *DVR : DVRs) + if (Checker.isDefinitionAcrossSuspend(*V, DVR->Marker->MarkedInstr)) + Spills[V].push_back(DVR->Marker->MarkedInstr); + } +} + +/// Async and Retcon{Once} conventions assume that all spill uses can be sunk +/// after the coro.begin intrinsic. +void sinkSpillUsesAfterCoroBegin(const DominatorTree &Dom, + CoroBeginInst *CoroBegin, + coro::SpillInfo &Spills, + SmallVectorImpl &Allocas) { + SmallSetVector ToMove; + SmallVector Worklist; + + // Collect all users that precede coro.begin. + auto collectUsers = [&](Value *Def) { + for (User *U : Def->users()) { + auto Inst = cast(U); + if (Inst->getParent() != CoroBegin->getParent() || + Dom.dominates(CoroBegin, Inst)) + continue; + if (ToMove.insert(Inst)) + Worklist.push_back(Inst); + } + }; + std::for_each(Spills.begin(), Spills.end(), + [&](auto &I) { collectUsers(I.first); }); + std::for_each(Allocas.begin(), Allocas.end(), + [&](auto &I) { collectUsers(I.Alloca); }); + + // Recursively collect users before coro.begin. + while (!Worklist.empty()) { + auto *Def = Worklist.pop_back_val(); + for (User *U : Def->users()) { + auto Inst = cast(U); + if (Dom.dominates(CoroBegin, Inst)) + continue; + if (ToMove.insert(Inst)) + Worklist.push_back(Inst); + } + } + + // Sort by dominance. + SmallVector InsertionList(ToMove.begin(), ToMove.end()); + llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool { + // If a dominates b it should precede (<) b. + return Dom.dominates(A, B); + }); + + Instruction *InsertPt = CoroBegin->getNextNode(); + for (Instruction *Inst : InsertionList) + Inst->moveBefore(InsertPt); +} + +BasicBlock::iterator getSpillInsertionPt(const coro::Shape &Shape, Value *Def, + const DominatorTree &DT) { + BasicBlock::iterator InsertPt; + if (auto *Arg = dyn_cast(Def)) { + // For arguments, we will place the store instruction right after + // the coroutine frame pointer instruction, i.e. coro.begin. + InsertPt = Shape.getInsertPtAfterFramePtr(); + + // If we're spilling an Argument, make sure we clear 'nocapture' + // from the coroutine function. + Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); + } else if (auto *CSI = dyn_cast(Def)) { + // Don't spill immediately after a suspend; splitting assumes + // that the suspend will be followed by a branch. + InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHIIt(); + } else { + auto *I = cast(Def); + if (!DT.dominates(Shape.CoroBegin, I)) { + // If it is not dominated by CoroBegin, then spill should be + // inserted immediately after CoroFrame is computed. + InsertPt = Shape.getInsertPtAfterFramePtr(); + } else if (auto *II = dyn_cast(I)) { + // If we are spilling the result of the invoke instruction, split + // the normal edge and insert the spill in the new block. + auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); + InsertPt = NewBB->getTerminator()->getIterator(); + } else if (isa(I)) { + // Skip the PHINodes and EH pads instructions. + BasicBlock *DefBlock = I->getParent(); + if (auto *CSI = dyn_cast(DefBlock->getTerminator())) + InsertPt = splitBeforeCatchSwitch(CSI)->getIterator(); + else + InsertPt = DefBlock->getFirstInsertionPt(); + } else { + assert(!I->isTerminator() && "unexpected terminator"); + // For all other values, the spill is placed immediately after + // the definition. + InsertPt = I->getNextNode()->getIterator(); + } + } + + return InsertPt; +} + +} // End namespace coro. + +} // End namespace llvm. diff --git a/llvm/lib/Transforms/Coroutines/SpillUtils.h b/llvm/lib/Transforms/Coroutines/SpillUtils.h new file mode 100644 index 0000000000000..de0ff0bcd3a4f --- /dev/null +++ b/llvm/lib/Transforms/Coroutines/SpillUtils.h @@ -0,0 +1,60 @@ +//===- SpillUtils.h - Utilities for han dling for spills ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CoroInternal.h" +#include "SuspendCrossingInfo.h" + +#ifndef LLVM_TRANSFORMS_COROUTINES_SPILLINGINFO_H +#define LLVM_TRANSFORMS_COROUTINES_SPILLINGINFO_H + +namespace llvm { + +namespace coro { + +using SpillInfo = SmallMapVector, 8>; + +struct AllocaInfo { + AllocaInst *Alloca; + DenseMap> Aliases; + bool MayWriteBeforeCoroBegin; + AllocaInfo(AllocaInst *Alloca, + DenseMap> Aliases, + bool MayWriteBeforeCoroBegin) + : Alloca(Alloca), Aliases(std::move(Aliases)), + MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {} +}; + +bool isSuspendBlock(BasicBlock *BB); + +void collectSpillsFromArgs(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker); +void collectSpillsAndAllocasFromInsts( + SpillInfo &Spills, SmallVector &Allocas, + SmallVector &DeadInstructions, + SmallVector &LocalAllocas, Function &F, + const SuspendCrossingInfo &Checker, const DominatorTree &DT, + const coro::Shape &Shape); +void collectSpillsFromDbgInfo(SpillInfo &Spills, Function &F, + const SuspendCrossingInfo &Checker); + +/// Async and Retcon{Once} conventions assume that all spill uses can be sunk +/// after the coro.begin intrinsic. +void sinkSpillUsesAfterCoroBegin(const DominatorTree &DT, + CoroBeginInst *CoroBegin, + coro::SpillInfo &Spills, + SmallVectorImpl &Allocas); + +// Get the insertion point for a spill after a Def. +BasicBlock::iterator getSpillInsertionPt(const coro::Shape &, Value *Def, + const DominatorTree &DT); + +} // namespace coro + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_COROUTINES_SPILLINGINFO_H