diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7424fe31945dc..2def0cfb22a3a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -122,6 +122,11 @@ static cl::opt cl::desc("Allow reordering across at most this many " "instructions when hoisting")); +static cl::opt BBSizeHoistLimit( + "simplifycfg-bb-size-hoist-limit", cl::Hidden, cl::init(40), + cl::desc("Allow hoisting instructions only if the smallest " + "basicblock has fewer instructions than this limit")); + static cl::opt SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); @@ -1582,6 +1587,16 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1, } } +// Hash instructions based on following factors: +// 1- Instruction Opcode +// 2- Instruction type +// 3- Instruction operands +llvm::hash_code getHash(Instruction *Instr) { + return llvm::hash_combine( + Instr->getOpcode(), Instr->getType(), + hash_combine_range(Instr->value_op_begin(), Instr->value_op_end())); +} + /// Hoist any common code in the successor blocks up into the block. This /// function guarantees that BB dominates all successors. If EqTermsOnly is /// given, only perform hoisting in case both blocks only contain a terminator. @@ -1589,12 +1604,11 @@ hoistLockstepIdenticalDPValues(Instruction *TI, Instruction *I1, /// added. bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly) { - // This does very trivial matching, with limited scanning, to find identical - // instructions in the two blocks. In particular, we don't want to get into - // O(N1*N2*...) situations here where Ni are the sizes of these successors. As - // such, we currently just scan for obviously identical instructions in an - // identical order, possibly separated by the same number of non-identical - // instructions. + // We first sort successors based on the number of instructions each block + // holds. Then for each successor we make a hashmap from its instructions, + // except for the first successor. After that, we iterate over the + // instructions of the first successor. If we find identical instructions from + // every other successor, we hoist all of them into the predeccessor. unsigned int SuccSize = succ_size(BB); if (SuccSize < 2) return false; @@ -1608,16 +1622,42 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, auto *TI = BB->getTerminator(); + SmallVector SuccessorBBs; + for (auto *Succ : successors(BB)) { + // If we find an unreachable instruction at the beginning of a basic block, + // we can still hoist instructions from the rest of the basic blocks. + if (isa(Succ->getFirstNonPHIOrDbg())) + continue; + SuccessorBBs.push_back(Succ); + } + + // Find the smallest BB because we always want to iterate over instructions + // of the smallest Successor. + auto *SmallestBBPos = + std::min_element(SuccessorBBs.begin(), SuccessorBBs.end(), + [](BasicBlock *BB1, BasicBlock *BB2) { + return BB1->size() < BB2->size(); + }); + + std::iter_swap(SuccessorBBs.begin(), SmallestBBPos); + BasicBlock *SmallestBB = *SmallestBBPos; + + if (SmallestBB->size() > BBSizeHoistLimit) + return false; + // The second of pair is a SkipFlags bitmask. using SuccIterPair = std::pair; SmallVector SuccIterPairs; - for (auto *Succ : successors(BB)) { + for (auto *Succ : SuccessorBBs) { BasicBlock::iterator SuccItr = Succ->begin(); if (isa(*SuccItr)) return false; SuccIterPairs.push_back(SuccIterPair(SuccItr, 0)); } + if (SuccIterPairs.size() < 2) + return false; + // Check if only hoisting terminators is allowed. This does not add new // instructions to the hoist location. if (EqTermsOnly) { @@ -1635,87 +1675,149 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, // many instructions we skip, serving as a compilation time control as well as // preventing excessive increase of life ranges. unsigned NumSkipped = 0; - // If we find an unreachable instruction at the beginning of a basic block, we - // can still hoist instructions from the rest of the basic blocks. - if (SuccIterPairs.size() > 2) { - erase_if(SuccIterPairs, - [](const auto &Pair) { return isa(Pair.first); }); - if (SuccIterPairs.size() < 2) - return false; - } bool Changed = false; + auto *SuccIterPairBegin = SuccIterPairs.begin(); + ++SuccIterPairBegin; + auto BBItrPair = *SuccIterPairBegin++; + auto OtherSuccIterPairRange = + iterator_range(SuccIterPairBegin, SuccIterPairs.end()); + + DenseMap> OtherSuccessorsHash; + DenseMap InstToSkipFlag; + unsigned skipFlag = 0; + Instruction *I = nullptr; + do { + I = &*BBItrPair.first; + auto HashValue = getHash(I); + skipFlag |= skippedInstrFlags(I); + // For the first successor we created hashmap for, put all instructions + // in the hashmap execept for the ones that have the same hash as some + // previous instruction in that BB. + if (OtherSuccessorsHash.find(HashValue) == OtherSuccessorsHash.end()) { + OtherSuccessorsHash[HashValue] = {I}; + InstToSkipFlag[I] = skipFlag; + } + BBItrPair.first++; + } while (!I->isTerminator()); + + unsigned Index = 1; + for (auto BBItrPair : OtherSuccIterPairRange) { + // Fill the hashmap for every other successor + unsigned skipFlag = 0; + Instruction *I = nullptr; + do { + I = &*BBItrPair.first; + auto HashValue = getHash(I); + skipFlag |= skippedInstrFlags(I); + auto &InstVec = OtherSuccessorsHash[HashValue]; + // For other successors put the instrcution in the map only if there are + // instructions with the same hash from other successors and this is the + // first instruction with this hash value from current successor. + if (OtherSuccessorsHash.find(HashValue) != OtherSuccessorsHash.end() && + InstVec.size() == Index) { + InstVec.push_back(I); + InstToSkipFlag[I] = skipFlag; + } + BBItrPair.first++; + } while (!I->isTerminator()); + Index++; + } + + // Keep track of instructions skipped in the first successor + unsigned SkipFlagsBB1 = 0; + bool SameLevelHoist = true; for (;;) { auto *SuccIterPairBegin = SuccIterPairs.begin(); auto &BB1ItrPair = *SuccIterPairBegin++; auto OtherSuccIterPairRange = iterator_range(SuccIterPairBegin, SuccIterPairs.end()); auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange); - Instruction *I1 = &*BB1ItrPair.first; // Skip debug info if it is not identical. - bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) { + bool IdenticalDebugs = all_of(OtherSuccIterRange, [I1](auto &Iter) { Instruction *I2 = &*Iter; return I1->isIdenticalToWhenDefined(I2); }); - if (!AllDbgInstsAreIdentical) { + if (!IdenticalDebugs) { while (isa(I1)) I1 = &*++BB1ItrPair.first; - for (auto &SuccIter : OtherSuccIterRange) { - Instruction *I2 = &*SuccIter; - while (isa(I2)) - I2 = &*++SuccIter; - } } - - bool AllInstsAreIdentical = true; - bool HasTerminator = I1->isTerminator(); - for (auto &SuccIter : OtherSuccIterRange) { - Instruction *I2 = &*SuccIter; - HasTerminator |= I2->isTerminator(); - if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2)) - AllInstsAreIdentical = false; + auto OtherInsts = OtherSuccessorsHash[getHash(I1)]; + + // Check if there are identical instructions in all other successors + // We might face with same hash values for different instructions. + // If that happens, ignore the instruction. + bool HasIdenticalInst = + OtherSuccessorsHash.find(getHash(I1)) != OtherSuccessorsHash.end() && + OtherInsts.size() == (SuccIterPairs.size() - 1) && + all_of(OtherInsts, [&](Instruction *I2) { + return I2->isIdenticalToWhenDefined(I1); + }); + + if (!HasIdenticalInst) { + if (NumSkipped >= HoistCommonSkipLimit) + return Changed; + SkipFlagsBB1 |= skippedInstrFlags(I1); + if (SameLevelHoist) { + for (auto &SuccIterPair : OtherSuccIterPairRange) { + Instruction *I = &*SuccIterPair.first++; + SuccIterPair.second |= skippedInstrFlags(I); + } + } + ++NumSkipped; + if (I1->isTerminator()) + return Changed; + ++BB1ItrPair.first; + continue; } - SmallVector OtherInsts; - for (auto &SuccIter : OtherSuccIterRange) - OtherInsts.push_back(&*SuccIter); - // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. - if (HasTerminator) { + if (I1->isTerminator()) { // Even if BB, which contains only one unreachable instruction, is ignored // at the beginning of the loop, we can hoist the terminator instruction. // If any instructions remain in the block, we cannot hoist terminators. - if (NumSkipped || !AllInstsAreIdentical) { + if (NumSkipped) { hoistLockstepIdenticalDPValues(TI, I1, OtherInsts); return Changed; } - - return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherInsts) || - Changed; + SmallVector Insts; + for (auto *I2 : OtherInsts) { + // BB holding I2 should only contain the branch instruction + auto itr = I2->getParent()->instructionsWithoutDebug(); + if (&*itr.begin() != I2) + return Changed; + Insts.push_back(I2); + } + return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed; } - if (AllInstsAreIdentical) { - unsigned SkipFlagsBB1 = BB1ItrPair.second; - AllInstsAreIdentical = - isSafeToHoistInstr(I1, SkipFlagsBB1) && - all_of(OtherSuccIterPairRange, [=](const auto &Pair) { - Instruction *I2 = &*Pair.first; - unsigned SkipFlagsBB2 = Pair.second; - // Even if the instructions are identical, it may not - // be safe to hoist them if we have skipped over - // instructions with side effects or their operands - // weren't hoisted. - return isSafeToHoistInstr(I2, SkipFlagsBB2) && - shouldHoistCommonInstructions(I1, I2, TTI); - }); + bool SafeToHoist = isSafeToHoistInstr(I1, SkipFlagsBB1); + for (auto [SuccIterPair, I2] : zip(OtherSuccIterPairRange, OtherInsts)) { + // If instructions of all successors are at the same level, use the + // skipFlag of its BB, i.e., SameLevelHoist. Otherwise, use the skipFlag + // that was calculated initially for this instruction in the hashmap + if (SameLevelHoist && I2 == (&*(SuccIterPair.first))) { + SafeToHoist = SafeToHoist && + isSafeToHoistInstr(I2, SuccIterPair.second) && + shouldHoistCommonInstructions(I1, I2, TTI); + } else { + unsigned skipFlag = InstToSkipFlag[I2]; + SafeToHoist = SafeToHoist && isSafeToHoistInstr(I2, skipFlag) && + shouldHoistCommonInstructions(I1, I2, TTI); + SameLevelHoist = false; + } } - if (AllInstsAreIdentical) { + if (SafeToHoist) { BB1ItrPair.first++; + if (SameLevelHoist) { + for (auto &SuccIterPair : OtherSuccIterPairRange) + SuccIterPair.first++; + } if (isa(I1)) { // The debug location is an integral part of a debug info intrinsic // and can't be separated from it or replaced. Instead of attempting @@ -1725,8 +1827,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, // leave any that were not hoisted behind (by calling moveBefore // rather than moveBeforePreserving). I1->moveBefore(TI); - for (auto &SuccIter : OtherSuccIterRange) { - auto *I2 = &*SuccIter++; + for (auto *I2 : OtherInsts) { assert(isa(I2)); I2->moveBefore(TI); } @@ -1739,18 +1840,44 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, // leave any that were not hoisted behind (by calling moveBefore // rather than moveBeforePreserving). I1->moveBefore(TI); - for (auto &SuccIter : OtherSuccIterRange) { - Instruction *I2 = &*SuccIter++; + for (auto *I2 : OtherInsts) { assert(I2 != I1); - if (!I2->use_empty()) + // Update hashcode of all instructions using I2 + if (!I2->use_empty()) { + SmallVector PrevUsers; + // Once the uses of I1 are replaced, the hash value computed for + // those users are not valid anymore so we gather users and then + // recompute the hash codes for them. We need to do this only for + // the instructions located in the same block as I2 because we + // initially only hashed those instructions. + for (auto *user : I2->users()) { + if (auto *I = dyn_cast(user)) { + if (I->getParent() != I2->getParent()) + continue; + PrevUsers.push_back(I); + } + } I2->replaceAllUsesWith(I1); + for (auto &PrevUser : PrevUsers) { + auto NewHash = getHash(PrevUser); + if (OtherSuccessorsHash.find(NewHash) != + OtherSuccessorsHash.end()) { + OtherSuccessorsHash[NewHash].push_back(PrevUser); + } else { + SmallVector InstVec{PrevUser}; + OtherSuccessorsHash[NewHash] = InstVec; + } + } + } I1->andIRFlags(I2); combineMetadataForCSE(I1, I2, true); // I1 and I2 are being combined into a single instruction. Its debug // location is the merged locations of the original instructions. I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + OtherSuccessorsHash.erase(getHash(I1)); I2->eraseFromParent(); } + OtherSuccessorsHash.erase(getHash(I1)); } if (!Changed) NumHoistCommonCode += SuccIterPairs.size(); @@ -1764,9 +1891,13 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, // We are about to skip over a pair of non-identical instructions. Record // if any have characteristics that would prevent reordering instructions // across them. - for (auto &SuccIterPair : SuccIterPairs) { - Instruction *I = &*SuccIterPair.first++; - SuccIterPair.second |= skippedInstrFlags(I); + BB1ItrPair.first++; + SkipFlagsBB1 |= skippedInstrFlags(I1); + if (SameLevelHoist) { + for (auto &SuccIterPair : OtherSuccIterPairRange) { // update flags + Instruction *I = &*SuccIterPair.first++; + SuccIterPair.second |= skippedInstrFlags(I); + } } ++NumSkipped; } @@ -1788,8 +1919,6 @@ bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf( auto *BB2 = I2->getParent(); if (BI) { assert(OtherSuccTIs.size() == 1); - assert(BI->getSuccessor(0) == I1->getParent()); - assert(BI->getSuccessor(1) == I2->getParent()); } // In the case of an if statement, we try to hoist an invoke. @@ -3647,7 +3776,6 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValueForBlock(IfTrue); Value *FalseVal = PN->getIncomingValueForBlock(IfFalse); - Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI); PN->replaceAllUsesWith(Sel); Sel->takeName(PN); diff --git a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll index 17d1ca65430af..ba41ce30b98e5 100644 --- a/llvm/test/CodeGen/ARM/aes-erratum-fix.ll +++ b/llvm/test/CodeGen/ARM/aes-erratum-fix.ll @@ -314,16 +314,15 @@ define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x ; CHECK-FIX-LABEL: aese_set8_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB12_2 ; CHECK-FIX-NEXT: @ %bb.1: -; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: bne .LBB12_3 ; CHECK-FIX-NEXT: b .LBB12_4 ; CHECK-FIX-NEXT: .LBB12_2: -; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB12_4 ; CHECK-FIX-NEXT: .LBB12_3: @@ -3205,15 +3204,10 @@ define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) no define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr: ; CHECK-FIX-NOSCHED: @ %bb.0: -; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 -; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2 -; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: -; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] -; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1] -; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3 -; CHECK-FIX-NOSCHED-NEXT: .LBB76_2: ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] -; CHECK-FIX-NOSCHED-NEXT: .LBB76_3: +; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 +; CHECK-FIX-NOSCHED-NEXT: vldrne d16, [r1] +; CHECK-FIX-NOSCHED-NEXT: vorr q8, q8, q8 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 ; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1] ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 @@ -3221,7 +3215,7 @@ define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] ; CHECK-FIX-NOSCHED-NEXT: bx lr -; + ; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr: ; CHECK-CORTEX-FIX: @ %bb.0: ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 @@ -4096,19 +4090,15 @@ define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 ; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr: ; CHECK-FIX: @ %bb.0: ; CHECK-FIX-NEXT: vorr q0, q0, q0 +; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB88_2 ; CHECK-FIX-NEXT: @ %bb.1: -; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] -; CHECK-FIX-NEXT: cmp r0, #0 -; CHECK-FIX-NEXT: bne .LBB88_3 -; CHECK-FIX-NEXT: b .LBB88_4 ; CHECK-FIX-NEXT: .LBB88_2: -; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] ; CHECK-FIX-NEXT: cmp r0, #0 ; CHECK-FIX-NEXT: beq .LBB88_4 -; CHECK-FIX-NEXT: .LBB88_3: +; CHECK-FIX-NEXT: @ %bb.3: ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] ; CHECK-FIX-NEXT: .LBB88_4: ; CHECK-FIX-NEXT: aesd.8 q8, q0 diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll index b5c9b903e1841..cafae03698257 100644 --- a/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll +++ b/llvm/test/CodeGen/Thumb2/mve-memtp-branch.ll @@ -11,8 +11,8 @@ define i32 @a(i8 zeroext %b, ptr nocapture readonly %c, ptr nocapture readonly % ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: cmp r0, #2 -; CHECK-NEXT: bls.w .LBB0_12 +; CHECK-NEXT: cmp r0, #3 +; CHECK-NEXT: blo.w .LBB0_12 ; CHECK-NEXT: @ %bb.1: @ %for.body.us.preheader ; CHECK-NEXT: movw r5, :lower16:arr_183 ; CHECK-NEXT: movs r3, #0 diff --git a/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll b/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll new file mode 100644 index 0000000000000..fc8ed95416a4e --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/simplifyCFG-hoist.ll @@ -0,0 +1,87 @@ +; opt -passes='default' -S --mtriple=aarch64-linux-gnu --mcpu=a64fx < %s | FileCheck %s + +; Hoist identical instructions from successor blocks even if +; they are not located at the same level. This could help generate +; more compact vectorized code. +; More info can be found at https://github.com/llvm/llvm-project/issues/68395. + + +define void @hoist_then_vectorize(ptr %a, ptr %b, ptr %c, ptr %d, i32 %N){ +; CHECK-LABEL: @hoist_then_vectorize( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[VSCALE:%.*]] = tail call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHIFT:%.*]] = shl i64 [[VSCALE:%.*]], 1 +; CHECK-NEXT: [[MIN_ITR:%.*]] = icmp ugt i64 [[SHIFT:%.*]], 20 +; CHECK-NEXT: br i1 [[MIN_ITR:%.*]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_MAIN_LOOP_ITR_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[VSCALE2:%.*]] = tail call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHIFT2:%.*]] = shl i64 [[VSCALE2:%.*]], 2 +; CHECK-NEXT: [[MIN_ITR2:%.*]] = icmp ugt i64 [[SHIFT2:%.*]], 20 +; CHECK-NEXT: br i1 [[MIN_ITR2:%.*]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[VSCALE3:%.*]] = tail call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHIFT3:%.*]] = shl i64 [[VSCALE3:%.*]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 20, [[SHIFT3:%.*]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 20, [[N_MOD_VF:%.*]] +; CHECK-NEXT: [[VSCALE4:%.*]] = tail call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[SHIFT4:%.*]] = shl i64 [[VSCALE4:%.*]], 2 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] +; CHECK-NEXT: [[GEP_D:%.*]] = getelementptr inbounds i32, ptr [[D:%.*]], i64 [[INDEX:%.*]] +; CHECK-NEXT: [[LOAD_D:%.*]] = load , ptr [[GEP_D:%.*]], align 4 +; CHECK-NEXT: [[MASK1:%.*]] = icmp slt [[LOAD_D:%.*]], zeroinitializer +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX:%.*]] +; CHECK-NEXT: [[LOAD_A:%.*]] = load , ptr [[GEP_A:%.*]], align 4 +; CHECK-NEXT: [[MASK2:%.*]] = icmp eq [[LOAD_A:%.*]], zeroinitializer +; CHECK-NEXT: [[SEL1:%.*]] = select [[MASK2:%.*]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[SEL2:%.*]] = select [[MASK1:%.*]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer), [[SEL1:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add [[LOAD_A:%.*]], [[SEL2:%.*]] +; CHECK-NEXT: store [[ADD:%.*]], ptr [[GEP_A:%.*]], align 4 +; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw i64 [[INDEX:%.*]], [[SHIFT4:%.*]] +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDEX_NEXT:%.*]], [[N_VEC:%.*]] +; CHECK-NEXT: br i1 [[LOOP_COND:%.*]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY:%.*]] + +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i32, ptr %d, i64 %indvars.iv + %ldr_d = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %ldr_d, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: ; preds = %for.body + %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %ldr_a = load i32, ptr %arrayidx3, align 4 + %add33 = add i32 %ldr_a, 1 + store i32 %add33, ptr %arrayidx3, align 4 + br label %for.inc + +if.else: ; preds = %for.body + %cmp7 = icmp eq i32 %ldr_d, 0 + br i1 %cmp7, label %if.then9, label %if.else15 + +if.then9: ; preds = %if.else + %arrayidx11 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %ldr_a2 = load i32, ptr %arrayidx11, align 4 + %add1334 = add i32 %ldr_a2, 2 + store i32 %add1334, ptr %arrayidx11, align 4 + br label %for.inc + +if.else15: ; preds = %if.else + %arrayidx112 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %ldr_a3 = load i32, ptr %arrayidx112, align 4 + %add1935 = add i32 %ldr_a3, 3 + store i32 %add1935, ptr %arrayidx112, align 4 + br label %for.inc + +for.inc: ; preds = %if.then, %if.else15, %if.then9 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 20 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} \ No newline at end of file diff --git a/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll b/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll index 6bfa967ff23e1..28a3cf50e347a 100644 --- a/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll +++ b/llvm/test/Transforms/SimplifyCFG/dont-hoist-deoptimize.ll @@ -3,21 +3,25 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" target triple = "x86_64-unknown-linux-gnu" +; SimplifyCFG hoists %tmp and %tmp2 but after skipping %tmp3, we reach to the skipping threshold and +; bail out, not hoisting %tmp4. + declare void @llvm.experimental.deoptimize.isVoid(...) #0 define void @widget(i1 %arg) { ; CHECK-LABEL: @widget( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP:%.*]] = trunc i64 5 to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32 ; CHECK-NEXT: br i1 [[ARG:%.*]], label [[BB1:%.*]], label [[BB4:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 0 to i32 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 0 to i32 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 2 to i32 ; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #[[ATTR0:[0-9]+]] [ "deopt"() ] ; CHECK-NEXT: ret void ; CHECK: bb4: -; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 1 to i32 -; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 0 to i32 +; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 1 to i32 +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 2 to i32 ; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #[[ATTR0]] [ "deopt"() ] ; CHECK-NEXT: ret void ; @@ -28,6 +32,7 @@ bb1: ; preds = %bb %tmp = trunc i64 5 to i32 %tmp2 = trunc i64 0 to i32 %tmp3 = trunc i64 0 to i32 + %tmp4 = trunc i64 2 to i32 call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #0 [ "deopt"() ] ret void @@ -35,6 +40,7 @@ bb4: ; preds = %bb %tmp5 = trunc i64 5 to i32 %tmp6 = trunc i64 1 to i32 %tmp7 = trunc i64 0 to i32 + %tmp8 = trunc i64 2 to i32 call void (...) @llvm.experimental.deoptimize.isVoid(i32 13) #0 [ "deopt"() ] ret void } diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll index bfe31d8345d50..285062455e4f5 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll @@ -24,6 +24,58 @@ F: ; preds = %0 ret void } +define void @test_unordered(ptr noalias %b, ptr noalias %c, ptr noalias %Q, ptr noalias %R, i32 %i ) { +; CHECK-LABEL: @test_unordered( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ldR1:%.*]] = load i32, ptr [[R:%.*]], align 8 +; CHECK-NEXT: switch i32 %i, label %bb0 [ +; CHECK-NEXT: i32 2, label %bb1 +; CHECK-NEXT: i32 3, label %bb2 +; CHECK-NEXT: ] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: bb0: +; CHECK-NEXT: [[ldQ:%.*]] = load i32, ptr [[Q:%.*]], align 8 +; CHECK-NEXT: [[mul:%.*]] = mul i32 [[ldQ:%.*]], 2 +; CHECK-NEXT: [[add:%.*]] = add i32 [[ldR1:%.*]], [[mul:%.*]] +; CHECK-NEXT: store i32 [[add:%.*]], ptr [[c:%.*]], align 8 +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: bb1: +; CHECK-NEXT: store i32 [[ldR1:%.*]], ptr [[c:%.*]], align 4 +; CHECK-NEXT: br label [[COMMON_RET:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[ldQ2:%.*]] = load i32, ptr [[Q:%.*]], align 8 +; CHECK-NEXT: [[sub:%.*]] = sub i32 [[ldR1:%.*]], [[ldQ2:%.*]] +; CHECK-NEXT: store i32 [[sub:%.*]], ptr [[c:%.*]], align 8 +; CHECK-NEXT: br label [[COMMON_RET:%.*]] + +entry: + switch i32 %i, label %bb0 [ + i32 2, label %bb1 + i32 3, label %bb2 + ] + +bb0: ; preds = %entry + %ldQ1 = load i32, ptr %Q, align 8 + %mul = mul i32 %ldQ1, 2 + %ldR1 = load i32, ptr %R, align 8 + %add = add i32 %ldR1, %mul + store i32 %add, ptr %c, align 8 + ret void + +bb1: ; preds = entry + %ldR2 = load i32, ptr %R, align 8 + store i32 %ldR2, ptr %c + ret void + +bb2: ; preds = entry + %ldQ2 = load i32, ptr %Q, align 8 + %ldR3 = load i32, ptr %R, align 8 + %sub = sub i32 %ldR3, %ldQ2 + store i32 %sub, ptr %c, align 8 + ret void +} + define void @test_switch(i64 %i, ptr %Q) { ; CHECK-LABEL: @test_switch( ; CHECK-NEXT: common.ret: