diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f1470fd1f7314..f887b34e76422 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2764,8 +2764,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Fix widened non-induction PHIs by setting up the PHI operands. - if (EnableVPlanNativePath) - fixNonInductionPHIs(State); + fixNonInductionPHIs(State); // After vectorization, the exit blocks of the original loop will have // additional predecessors. Invalidate SCEVs for the exit phis in case SE @@ -7324,7 +7323,6 @@ DenseMap LoopVectorizationPlanner::executePlan( "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && "Trying to execute plan with unsupported UF"); - VPlanTransforms::runPass(VPlanTransforms::materializeStepVectors, BestVPlan); // TODO: Move to VPlan transform stage once the transition to the VPlan-based // cost model is complete for better cost estimates. VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index cca3d32c0783e..4332332ef5cc3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1029,17 +1029,11 @@ void VPlan::execute(VPTransformState *State) { if (isa(&R)) continue; - if (isa(&R)) { - PHINode *Phi = nullptr; - if (isa(&R)) { - Phi = cast(State->get(R.getVPSingleValue())); - } else { - auto *WidenPhi = cast(&R); - assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && - "recipe generating only scalars should have been replaced"); - auto *GEP = cast(State->get(WidenPhi)); - Phi = cast(GEP->getPointerOperand()); - } + if (auto *WidenPhi = dyn_cast(&R)) { + assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && + "recipe generating only scalars should have been replaced"); + auto *GEP = cast(State->get(WidenPhi)); + PHINode *Phi = cast(GEP->getPointerOperand()); Phi->setIncomingBlock(1, VectorLatchBB); @@ -1047,10 +1041,6 @@ void VPlan::execute(VPTransformState *State) { // consistent placement of all induction updates. Instruction *Inc = cast(Phi->getIncomingValue(1)); Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator())); - - // Use the steps for the last part as backedge value for the induction. - if (auto *IV = dyn_cast(&R)) - Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand())); continue; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5a3c4a514a5dd..f3306ad7cb8ec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1951,12 +1951,13 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { }; /// A recipe for handling phi nodes of integer and floating-point inductions, -/// producing their vector values. +/// producing their vector values. This is an abstract recipe and must be +/// converted to concrete recipes before executing. class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { TruncInst *Trunc; // If this recipe is unrolled it will have 2 additional operands. - bool isUnrolled() const { return getNumOperands() == 6; } + bool isUnrolled() const { return getNumOperands() == 5; } public: VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, @@ -1992,9 +1993,10 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) - /// Generate the vectorized and scalarized versions of the phi node as - /// needed by their users. - void execute(VPTransformState &State) override; + void execute(VPTransformState &State) override { + llvm_unreachable("cannot execute this recipe, should be expanded via " + "expandVPWidenIntOrFpInductionRecipe"); + } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. @@ -2005,16 +2007,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { VPValue *getVFValue() { return getOperand(2); } const VPValue *getVFValue() const { return getOperand(2); } - // TODO: Remove once VPWidenIntOrFpInduction is fully expanded in - // convertToConcreteRecipes. - VPInstructionWithType *getStepVector() { - auto *StepVector = - cast(getOperand(3)->getDefiningRecipe()); - assert(StepVector->getOpcode() == VPInstruction::StepVector && - "step vector operand must be a VPInstruction::StepVector"); - return StepVector; - } - VPValue *getSplatVFValue() { // If the recipe has been unrolled return the VPValue for the induction // increment. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 048286d7a97bc..1ed0b97849a8d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -952,6 +952,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: + case VPInstruction::Broadcast: case VPInstruction::ReductionStartVector: return true; case VPInstruction::PtrAdd: @@ -1077,15 +1078,14 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, void VPInstructionWithType::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); - switch (getOpcode()) { - case Instruction::ZExt: - case Instruction::Trunc: { + if (isScalarCast()) { Value *Op = State.get(getOperand(0), VPLane(0)); Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()), Op, ResultTy); State.set(this, Cast, VPLane(0)); - break; + return; } + switch (getOpcode()) { case VPInstruction::StepVector: { Value *StepVector = State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF)); @@ -1965,44 +1965,6 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); } -/// This function adds -/// (0 * Step, 1 * Step, 2 * Step, ...) -/// to each vector element of Val. -/// \p Opcode is relevant for FP induction variable. -/// \p InitVec is an integer step vector from 0 with a step of 1. -static Value *getStepVector(Value *Val, Value *Step, Value *InitVec, - Instruction::BinaryOps BinOp, ElementCount VF, - IRBuilderBase &Builder) { - assert(VF.isVector() && "only vector VFs are supported"); - - // Create and check the types. - auto *ValVTy = cast(Val->getType()); - ElementCount VLen = ValVTy->getElementCount(); - - Type *STy = Val->getType()->getScalarType(); - assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && - "Induction Step must be an integer or FP"); - assert(Step->getType() == STy && "Step has wrong type"); - - if (STy->isIntegerTy()) { - Step = Builder.CreateVectorSplat(VLen, Step); - assert(Step->getType() == Val->getType() && "Invalid step vec"); - // FIXME: The newly created binary instructions should contain nsw/nuw - // flags, which can be found from the original scalar operations. - Step = Builder.CreateMul(InitVec, Step); - return Builder.CreateAdd(Val, Step, "induction"); - } - - // Floating point induction. - assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && - "Binary Opcode should be specified for FP induction"); - InitVec = Builder.CreateUIToFP(InitVec, ValVTy); - - Step = Builder.CreateVectorSplat(VLen, Step); - Value *MulOp = Builder.CreateFMul(InitVec, Step); - return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); -} - /// A helper function that returns an integer or floating-point constant with /// value C. static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { @@ -2010,104 +1972,6 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { : ConstantFP::get(Ty, C); } -void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { - assert(!State.Lane && "Int or FP induction being replicated."); - - Value *Start = getStartValue()->getLiveInIRValue(); - const InductionDescriptor &ID = getInductionDescriptor(); - TruncInst *Trunc = getTruncInst(); - IRBuilderBase &Builder = State.Builder; - assert(getPHINode()->getType() == ID.getStartValue()->getType() && - "Types must match"); - assert(State.VF.isVector() && "must have vector VF"); - - // The value from the original loop to which we are mapping the new induction - // variable. - Instruction *EntryVal = Trunc ? cast(Trunc) : getPHINode(); - - // Fast-math-flags propagate from the original induction instruction. - IRBuilder<>::FastMathFlagGuard FMFG(Builder); - if (isa_and_present(ID.getInductionBinOp())) - Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); - - // Now do the actual transformations, and start with fetching the step value. - Value *Step = State.get(getStepValue(), VPLane(0)); - - assert((isa(EntryVal)) && - "Expected either an induction phi-node or a truncate of it!"); - - // Construct the initial value of the vector IV in the vector loop preheader - auto CurrIP = Builder.saveIP(); - BasicBlock *VectorPH = - State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); - Builder.SetInsertPoint(VectorPH->getTerminator()); - if (isa(EntryVal)) { - assert(Start->getType()->isIntegerTy() && - "Truncation requires an integer type"); - auto *TruncType = cast(EntryVal->getType()); - Step = Builder.CreateTrunc(Step, TruncType); - Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); - } - - Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); - Value *SteppedStart = - ::getStepVector(SplatStart, Step, State.get(getStepVector()), - ID.getInductionOpcode(), State.VF, State.Builder); - - // We create vector phi nodes for both integer and floating-point induction - // variables. Here, we determine the kind of arithmetic we will perform. - Instruction::BinaryOps AddOp; - Instruction::BinaryOps MulOp; - if (Step->getType()->isIntegerTy()) { - AddOp = Instruction::Add; - MulOp = Instruction::Mul; - } else { - AddOp = ID.getInductionOpcode(); - MulOp = Instruction::FMul; - } - - Value *SplatVF; - if (VPValue *SplatVFOperand = getSplatVFValue()) { - // The recipe has been unrolled. In that case, fetch the splat value for the - // induction increment. - SplatVF = State.get(SplatVFOperand); - } else { - // Multiply the vectorization factor by the step using integer or - // floating-point arithmetic as appropriate. - Type *StepType = Step->getType(); - Value *RuntimeVF = State.get(getVFValue(), VPLane(0)); - if (Step->getType()->isFloatingPointTy()) - RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType); - else - RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType); - Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); - - // Create a vector splat to use in the induction update. - SplatVF = Builder.CreateVectorSplat(State.VF, Mul); - } - - Builder.restoreIP(CurrIP); - - // We may need to add the step a number of times, depending on the unroll - // factor. The last of those goes into the PHI. - PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); - VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); - VecInd->setDebugLoc(getDebugLoc()); - State.set(this, VecInd); - - Instruction *LastInduction = cast( - Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next")); - LastInduction->setDebugLoc(getDebugLoc()); - - VecInd->addIncoming(SteppedStart, VectorPH); - // Add induction update using an incorrect block temporarily. The phi node - // will be fixed after VPlan execution. Note that at this point the latch - // block cannot be used, as it does not exist yet. - // TODO: Model increment value in VPlan, by turning the recipe into a - // multi-def and a subclass of VPHeaderPHIRecipe. - VecInd->addIncoming(LastInduction, VectorPH); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -3871,12 +3735,14 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPWidenPHIRecipe::execute(VPTransformState &State) { - assert(EnableVPlanNativePath && - "Non-native vplans are not expected to have VPWidenPHIRecipes."); - Value *Op0 = State.get(getOperand(0)); Type *VecTy = Op0->getType(); - Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); + Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); + // Manually move it with the other PHIs in case PHI recipes above this one + // also inserted non-phi instructions. + // TODO: Remove once VPWidenPointerInductionRecipe is also expanded in + // convertToConcreteRecipes. + VecPhi->moveBefore(State.Builder.GetInsertBlock()->getFirstNonPHIIt()); State.set(this, VecPhi); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 05a0e15f9a199..11f0f2a930329 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1358,17 +1358,6 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, WideIV->setStartValue(NewStart); auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1)); WideIV->setStepValue(NewStep); - // TODO: Remove once VPWidenIntOrFpInductionRecipe is fully expanded. - VPInstructionWithType *OldStepVector = WideIV->getStepVector(); - assert(OldStepVector->getNumUsers() == 1 && - "step vector should only be used by single " - "VPWidenIntOrFpInductionRecipe"); - auto *NewStepVector = - new VPInstructionWithType(VPInstruction::StepVector, {}, NewIVTy, {}, - OldStepVector->getDebugLoc()); - NewStepVector->insertAfter(OldStepVector->getDefiningRecipe()); - OldStepVector->replaceAllUsesWith(NewStepVector); - OldStepVector->eraseFromParent(); auto *NewBTC = new VPWidenCastRecipe( Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy); @@ -2518,6 +2507,127 @@ void VPlanTransforms::createInterleaveGroups( } } +/// Expand a VPWidenIntOrFpInduction into executable recipes, for the initial +/// value, phi and backedge value. In the following example: +/// +/// vector.ph: +/// Successor(s): vector loop +/// +/// vector loop: { +/// vector.body: +/// WIDEN-INDUCTION %i = phi %start, %step, %vf +/// ... +/// EMIT branch-on-count ... +/// No successors +/// } +/// +/// WIDEN-INDUCTION will get expanded to: +/// +/// vector.ph: +/// ... +/// vp<%induction.start> = ... +/// vp<%induction.increment> = ... +/// +/// Successor(s): vector loop +/// +/// vector loop: { +/// vector.body: +/// ir<%i> = WIDEN-PHI vp<%induction.start>, vp<%vec.ind.next> +/// ... +/// vp<%vec.ind.next> = add ir<%i>, vp<%induction.increment> +/// EMIT branch-on-count ... +/// No successors +/// } +static void +expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR, + VPTypeAnalysis &TypeInfo) { + VPlan *Plan = WidenIVR->getParent()->getPlan(); + VPValue *Start = WidenIVR->getStartValue(); + VPValue *Step = WidenIVR->getStepValue(); + VPValue *VF = WidenIVR->getVFValue(); + DebugLoc DL = WidenIVR->getDebugLoc(); + + // The value from the original loop to which we are mapping the new induction + // variable. + Type *Ty = TypeInfo.inferScalarType(WidenIVR); + + const InductionDescriptor &ID = WidenIVR->getInductionDescriptor(); + Instruction::BinaryOps AddOp; + Instruction::BinaryOps MulOp; + // FIXME: The newly created binary instructions should contain nsw/nuw + // flags, which can be found from the original scalar operations. + VPIRFlags Flags; + if (ID.getKind() == InductionDescriptor::IK_IntInduction) { + AddOp = Instruction::Add; + MulOp = Instruction::Mul; + } else { + AddOp = ID.getInductionOpcode(); + MulOp = Instruction::FMul; + Flags = ID.getInductionBinOp()->getFastMathFlags(); + } + + // If the phi is truncated, truncate the start and step values. + VPBuilder Builder(Plan->getVectorPreheader()); + Type *StepTy = TypeInfo.inferScalarType(Step); + if (Ty->getScalarSizeInBits() < StepTy->getScalarSizeInBits()) { + assert(StepTy->isIntegerTy() && "Truncation requires an integer type"); + Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty, DL); + Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty, DL); + StepTy = Ty; + } + + // Construct the initial value of the vector IV in the vector loop preheader. + Type *IVIntTy = + IntegerType::get(StepTy->getContext(), StepTy->getScalarSizeInBits()); + VPValue *Init = Builder.createNaryOp(VPInstruction::StepVector, {}, IVIntTy); + if (StepTy->isFloatingPointTy()) + Init = Builder.createWidenCast(Instruction::UIToFP, Init, StepTy); + + VPValue *SplatStart = Builder.createNaryOp(VPInstruction::Broadcast, Start); + VPValue *SplatStep = Builder.createNaryOp(VPInstruction::Broadcast, Step); + + Init = Builder.createNaryOp(MulOp, {Init, SplatStep}, Flags); + Init = + Builder.createNaryOp(AddOp, {SplatStart, Init}, Flags, {}, "induction"); + + // Create the widened phi of the vector IV. + auto *WidePHI = new VPWidenPHIRecipe(WidenIVR->getPHINode(), nullptr, + WidenIVR->getDebugLoc(), "vec.ind"); + WidePHI->addOperand(Init); + WidePHI->insertBefore(WidenIVR); + + // Create the backedge value for the vector IV. + VPValue *Inc; + VPValue *Prev; + // If unrolled, use the increment and prev value from the operands. + if (auto *SplatVF = WidenIVR->getSplatVFValue()) { + Inc = SplatVF; + Prev = WidenIVR->getLastUnrolledPartOperand(); + } else { + // Multiply the vectorization factor by the step using integer or + // floating-point arithmetic as appropriate. + if (StepTy->isFloatingPointTy()) + VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy, + DL); + else + VF = + Builder.createScalarCast(Instruction::CastOps::Trunc, VF, StepTy, DL); + + Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags); + Inc = Builder.createNaryOp(VPInstruction::Broadcast, Inc); + Prev = WidePHI; + } + + VPBasicBlock *ExitingBB = Plan->getVectorLoopRegion()->getExitingBasicBlock(); + Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator()); + auto *Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags, + WidenIVR->getDebugLoc(), "vec.ind.next"); + + WidePHI->addOperand(Next); + + WidenIVR->replaceAllUsesWith(WidePHI); +} + void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) { // Replace loop regions with explicity CFG. SmallVector LoopRegions; @@ -2625,6 +2735,12 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, continue; } + if (auto *WidenIVR = dyn_cast(&R)) { + expandVPWidenIntOrFpInduction(WidenIVR, TypeInfo); + ToRemove.push_back(WidenIVR); + continue; + } + VPValue *VectorStep; VPValue *ScalarStep; if (!match(&R, m_VPInstruction( @@ -2935,27 +3051,6 @@ void VPlanTransforms::convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx, } } -void VPlanTransforms::materializeStepVectors(VPlan &Plan) { - for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - auto *IVR = dyn_cast(&Phi); - if (!IVR) - continue; - - Type *Ty = IVR->getPHINode()->getType(); - if (TruncInst *Trunc = IVR->getTruncInst()) - Ty = Trunc->getType(); - if (Ty->isFloatingPointTy()) - Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits()); - - VPBuilder Builder(Plan.getVectorPreheader()); - VPInstruction *StepVector = Builder.createNaryOp( - VPInstruction::StepVector, {}, Ty, {}, IVR->getDebugLoc()); - assert(IVR->getNumOperands() == 3 && - "can only add step vector before unrolling"); - IVR->addOperand(StepVector); - } -} - void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { if (Plan.hasScalarVFOnly()) return; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 5a03bdb7c6882..7e51c05d1b5b5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -209,11 +209,6 @@ struct VPlanTransforms { optimizeInductionExitUsers(VPlan &Plan, DenseMap &EndValues); - /// Materialize VPInstruction::StepVectors for VPWidenIntOrFpInductionRecipes. - /// TODO: Remove once all of VPWidenIntOrFpInductionRecipe is expanded in - /// convertToConcreteRecipes. - static void materializeStepVectors(VPlan &Plan); - /// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors. static void materializeBroadcasts(VPlan &Plan); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index ae7719757dc30..24c703ae42f0a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -16,9 +16,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] @@ -36,8 +36,8 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]] @@ -100,9 +100,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) -; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP8]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]] @@ -120,8 +120,8 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP14]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index f36161703dba5..976f95ff4f0ba 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -862,8 +862,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; DEFAULT: [[PRED_STORE_CONTINUE14]]: -; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[EXIT:.*]] @@ -964,8 +964,8 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1 ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]] ; PRED: [[PRED_STORE_CONTINUE14]]: -; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: ; PRED-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 4775a6ec3f917..d59607711b5bf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -123,9 +123,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] @@ -246,9 +246,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]]) -; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[MUL_2_I]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP17:%.*]] = mul [[TMP15]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index 5508a65744c6b..895781de31f33 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -87,12 +87,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV1]] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP1]], align 8 ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 3 @@ -184,16 +184,16 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x i32> poison, i32 [[START]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT9]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT10]], zeroinitializer -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32 -; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i32> poison, i32 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i64 0 ; CHECK-NEXT: [[DOTSPLAT14:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT13]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT14]], +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[BC_RESUME_VAL]] to i32 +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[DOTSPLAT14]], %[[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]] ; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX11]], 4 @@ -207,12 +207,12 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC8]] ; CHECK-NEXT: br i1 [[CMP_N20]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL21:%.*]] = phi i64 [ [[N_VEC8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX22:%.*]] = phi i32 [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL23:%.*]] = phi i64 [ [[N_VEC8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX24:%.*]] = phi i32 [ [[RDX_SELECT19]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL21]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX22]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL23]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX24]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[START]], 0 ; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 ; CHECK-NEXT: [[RED_NEXT]] = select i1 [[C]], i32 [[IV_TRUNC]], i32 [[RED]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index 4d3afd71921d6..e4718dc216358 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -394,9 +394,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE36]] ; DEFAULT: [[PRED_STORE_CONTINUE36]]: +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] @@ -514,13 +514,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) -; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; DEFAULT-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; DEFAULT-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 @@ -590,13 +590,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) -; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; OPTSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; OPTSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 @@ -666,13 +666,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 ; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]] ; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 ; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15) -; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[A]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[B]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i8 [[C]], i64 0 ; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; MINSIZE-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv16i8() ; MINSIZE-NEXT: [[TMP11:%.*]] = mul [[TMP10]], splat (i8 1) ; MINSIZE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] ; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll index 74b0c2c0e033a..d02d03b4b437d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll @@ -120,8 +120,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll index 49584bd47353d..f44744071ae58 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll @@ -20,11 +20,11 @@ define void @induction_i7(ptr %dst) #0 { ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i7 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i7 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -85,11 +85,11 @@ define void @induction_i3_zext(ptr %dst) #0 { ; CHECK-NEXT: [[TMP40:%.*]] = mul i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP40]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i3 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() -; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP40]], i64 0 ; CHECK-NEXT: [[DOTSPLAT_:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[DOTSPLAT:%.*]] = trunc [[DOTSPLAT_]] to +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i8() +; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP7]], splat (i3 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP9]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 77e713256d247..7e4edf739695a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -101,11 +101,11 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 @@ -185,11 +185,11 @@ define void @test_array_load2_store2_i16(i32 noundef %C, i32 noundef %D) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[C:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[D:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl [[TMP2]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP0]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 @@ -579,9 +579,9 @@ define void @load_gap_reverse(ptr noalias nocapture readonly %P1, ptr noalias no ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[INDUCTION:%.*]] = sub splat (i64 1023), [[TMP2]] ; CHECK-NEXT: [[DOTNEG:%.*]] = sub nsw i64 0, [[TMP1]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[DOTNEG]], i64 0 @@ -809,9 +809,9 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -958,9 +958,9 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1113,13 +1113,13 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP9:%.*]] = shl [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i64 [[TMP6]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 @@ -1191,13 +1191,13 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3 -; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP21:%.*]] = shl [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP21]], splat (i64 3) ; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3 @@ -1284,14 +1284,14 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = call @llvm.stepvector.nxv4i64() -; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1 -; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = shl [[TMP14]], splat (i64 1) ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP17]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2 +; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1 +; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement poison, i16 [[DOTPRE]], i32 [[TMP34]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 3567aff0ace4e..bd2bd5aa27952 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -37,11 +37,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -60,10 +60,10 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP14]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub zeroinitializer, [[TMP13]] ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP13]], [[TMP16]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK1]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK3]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -83,11 +83,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -108,11 +108,11 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP14]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = sub zeroinitializer, [[TMP13]] ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv32i8( [[TMP13]], [[TMP16]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave2.nxv32i1( [[TMP7]], [[TMP7]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv32i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP15]], i32 1, [[INTERLEAVED_MASK3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP17]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: @@ -182,11 +182,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -201,7 +201,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP12]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP13]], i32 1, [[TMP10]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -221,11 +221,11 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -243,7 +243,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP13]], i32 1, [[TMP10]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP14]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: @@ -309,13 +309,13 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -331,7 +331,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP13]] ; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP14]], i32 1, [[TMP11]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -352,13 +352,13 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[CONV3]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -378,7 +378,7 @@ define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr no ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), [[TMP15]], i32 1, [[TMP12]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT4]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP16]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP6:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: @@ -456,11 +456,11 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_TAIL_FOLDING: vector.body: ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -483,10 +483,10 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; SCALAR_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = sext i32 [[TMP8]] to i64 ; SCALAR_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP19]] ; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave4.nxv64i8( [[TMP15]], [[TMP16]], [[TMP17]], [[TMP18]]) -; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP7]], [[TMP7]], [[TMP7]], [[TMP7]]) -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, [[INTERLEAVED_MASK1]]) +; SCALAR_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP7]], [[TMP7]], [[TMP7]], [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, [[INTERLEAVED_MASK3]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: @@ -506,11 +506,11 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call i32 @llvm.usub.sat.i32(i32 1024, i32 [[TMP3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 0, i32 1024) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP1]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -535,11 +535,11 @@ define dso_local void @masked_strided_factor4(ptr noalias nocapture readonly %p, ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = sext i32 [[TMP8]] to i64 ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[Q]], i64 [[TMP19]] ; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave4.nxv64i8( [[TMP15]], [[TMP16]], [[TMP17]], [[TMP18]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK1:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP7]], [[TMP7]], [[TMP7]], [[TMP7]]) -; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, [[INTERLEAVED_MASK1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[INTERLEAVED_MASK3:%.*]] = call @llvm.vector.interleave4.nxv64i1( [[TMP7]], [[TMP7]], [[TMP7]], [[TMP7]]) +; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.store.nxv64i8.p0( [[INTERLEAVED_VEC]], ptr [[TMP20]], i32 1, [[INTERLEAVED_MASK3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP1]] ; PREDICATED_TAIL_FOLDING-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 [[INDEX]], i32 [[TMP4]]) -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP21]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP8:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll index 022789ad9de70..fea57fa8b6b68 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll @@ -107,8 +107,8 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) { ; VF4-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 ; VF4-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; VF4: [[PRED_STORE_CONTINUE6]]: -; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll index a7a7b1af5953b..1d898fbaaed36 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/optsize_minsize.ll @@ -390,9 +390,9 @@ define void @tail_predicate_without_optsize(ptr %p, i8 %a, i8 %b, i8 %c, i32 %n) ; DEFAULT-NEXT: store i8 [[TMP71]], ptr [[TMP70]], align 1 ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE36]] ; DEFAULT: [[PRED_STORE_CONTINUE36]]: +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; DEFAULT-NEXT: [[VEC_IND_NEXT2]] = add <16 x i8> [[VEC_IND1]], splat (i8 16) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[FOR_COND_CLEANUP:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll index b7c9612e57aec..79425ae3a67ec 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-masked-access.ll @@ -21,11 +21,11 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_EPILOGUE-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_EPILOGUE-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_EPILOGUE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_EPILOGUE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_EPILOGUE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_EPILOGUE: vector.body: ; SCALAR_EPILOGUE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -38,8 +38,8 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[TMP11:%.*]] = or disjoint [[TMP8]], splat (i32 1) ; SCALAR_EPILOGUE-NEXT: [[TMP12:%.*]] = zext nneg [[TMP11]] to ; SCALAR_EPILOGUE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP12]] -; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP13]], i32 1, [[TMP7]], poison) -; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER1]]) +; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP13]], i32 1, [[TMP7]], poison) +; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER3]]) ; SCALAR_EPILOGUE-NEXT: [[TMP15:%.*]] = zext nneg [[TMP8]] to ; SCALAR_EPILOGUE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP15]] ; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( [[TMP14]], [[TMP16]], i32 1, [[TMP7]]) @@ -48,7 +48,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP18]] ; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( [[TMP17]], [[TMP19]], i32 1, [[TMP7]]) ; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_EPILOGUE-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: @@ -69,11 +69,11 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -88,8 +88,8 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = or disjoint [[TMP7]], splat (i32 1) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = zext nneg [[TMP10]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP11]] -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP12]], i32 1, [[TMP6]], poison) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP12]], i32 1, [[TMP6]], poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = zext nneg [[TMP7]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP14]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( [[TMP13]], [[TMP15]], i32 1, [[TMP6]]) @@ -98,7 +98,7 @@ define void @masked_strided_factor2(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP17]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( [[TMP16]], [[TMP18]], i32 1, [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: @@ -190,11 +190,11 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[N_VEC:%.*]] = sub nuw nsw i32 1024, [[N_MOD_VF]] ; SCALAR_EPILOGUE-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; SCALAR_EPILOGUE-NEXT: [[TMP5:%.*]] = shl i32 [[TMP4]], 4 -; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALAR_EPILOGUE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; SCALAR_EPILOGUE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALAR_EPILOGUE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() +; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; SCALAR_EPILOGUE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALAR_EPILOGUE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALAR_EPILOGUE: vector.body: ; SCALAR_EPILOGUE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -209,16 +209,16 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP13]], i32 1, [[TMP7]], poison) ; SCALAR_EPILOGUE-NEXT: [[TMP14:%.*]] = zext nneg [[TMP9]] to ; SCALAR_EPILOGUE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP14]] -; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP15]], i32 1, [[TMP7]], poison) +; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP15]], i32 1, [[TMP7]], poison) ; SCALAR_EPILOGUE-NEXT: [[TMP16:%.*]] = zext nneg [[TMP10]] to ; SCALAR_EPILOGUE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP16]] -; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP17]], i32 1, [[TMP7]], poison) +; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP17]], i32 1, [[TMP7]], poison) ; SCALAR_EPILOGUE-NEXT: [[TMP18:%.*]] = zext nneg [[TMP11]] to ; SCALAR_EPILOGUE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP18]] -; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP19]], i32 1, [[TMP7]], poison) -; SCALAR_EPILOGUE-NEXT: [[TMP20:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER1]]) +; SCALAR_EPILOGUE-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP19]], i32 1, [[TMP7]], poison) +; SCALAR_EPILOGUE-NEXT: [[TMP20:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER3]]) ; SCALAR_EPILOGUE-NEXT: [[TMP21:%.*]] = sub zeroinitializer, [[TMP20]] -; SCALAR_EPILOGUE-NEXT: [[TMP22:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER2]], [[WIDE_MASKED_GATHER3]]) +; SCALAR_EPILOGUE-NEXT: [[TMP22:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER4]], [[WIDE_MASKED_GATHER5]]) ; SCALAR_EPILOGUE-NEXT: [[TMP23:%.*]] = sub zeroinitializer, [[TMP22]] ; SCALAR_EPILOGUE-NEXT: [[TMP24:%.*]] = zext nneg [[TMP8]] to ; SCALAR_EPILOGUE-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP24]] @@ -233,7 +233,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; SCALAR_EPILOGUE-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP30]] ; SCALAR_EPILOGUE-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( [[TMP23]], [[TMP31]], i32 1, [[TMP7]]) ; SCALAR_EPILOGUE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]] -; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALAR_EPILOGUE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; SCALAR_EPILOGUE-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SCALAR_EPILOGUE-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_EPILOGUE: middle.block: @@ -254,11 +254,11 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_TAIL_FOLDING-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 4 -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv16i32() ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[CONV]], i64 0 ; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 -; PREDICATED_TAIL_FOLDING-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = call @llvm.stepvector.nxv16i32() +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 +; PREDICATED_TAIL_FOLDING-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PREDICATED_TAIL_FOLDING-NEXT: br label [[VECTOR_BODY:%.*]] ; PREDICATED_TAIL_FOLDING: vector.body: ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -275,16 +275,16 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP12]], i32 1, [[TMP6]], poison) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP13:%.*]] = zext nneg [[TMP8]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP13]] -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP14]], i32 1, [[TMP6]], poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP14]], i32 1, [[TMP6]], poison) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP15:%.*]] = zext nneg [[TMP9]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP15]] -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP16]], i32 1, [[TMP6]], poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP16]], i32 1, [[TMP6]], poison) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP17:%.*]] = zext nneg [[TMP10]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[P]], [[TMP17]] -; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP18]], i32 1, [[TMP6]], poison) -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER1]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call @llvm.masked.gather.nxv16i8.nxv16p0( [[TMP18]], i32 1, [[TMP6]], poison) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP19:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER3]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP20:%.*]] = sub zeroinitializer, [[TMP19]] -; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER2]], [[WIDE_MASKED_GATHER3]]) +; PREDICATED_TAIL_FOLDING-NEXT: [[TMP21:%.*]] = call @llvm.smax.nxv16i8( [[WIDE_MASKED_GATHER4]], [[WIDE_MASKED_GATHER5]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP22:%.*]] = sub zeroinitializer, [[TMP21]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP23:%.*]] = zext nneg [[TMP7]] to ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP23]] @@ -299,7 +299,7 @@ define void @masked_strided_factor4(ptr noalias nocapture readonly %p, ptr noali ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP29]] ; PREDICATED_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( [[TMP22]], [[TMP30]], i32 1, [[TMP6]]) ; PREDICATED_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] -; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; PREDICATED_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; PREDICATED_TAIL_FOLDING-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; PREDICATED_TAIL_FOLDING-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; PREDICATED_TAIL_FOLDING: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index c64f6df075a04..3e4d337c0706c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -22,9 +22,9 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLENUNK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; VLENUNK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 ; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; VLENUNK-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; VLENUNK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll index 88d9ed2ce201e..2f9ff20bf0f98 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -20,13 +20,13 @@ define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1001, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv8i64() ; CHECK-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP5]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 3dc17e615048e..51a8b451dffd9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -137,8 +137,8 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue32: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT1:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index f89a863d1e5f5..79590f5060ad4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -553,9 +553,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; STRIDED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; STRIDED-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[STRIDE]], i64 0 ; STRIDED-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; STRIDED-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv4i64() ; STRIDED-NEXT: [[TMP14:%.*]] = mul [[TMP12]], splat (i64 1) ; STRIDED-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP14]] ; STRIDED-NEXT: [[TMP17:%.*]] = mul i64 1, [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 70c04ded5cf57..827612cfe36d5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -325,9 +325,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i64() ; SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] @@ -432,9 +432,9 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv4i64() ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] @@ -996,11 +996,11 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] @@ -1127,11 +1127,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 -; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP8:%.*]] = mul [[TMP6]], splat (i64 1) ; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP5]] @@ -1233,11 +1233,11 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT1:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.stepvector.nxv2i64() ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP5]], splat (i64 1) ; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 1, [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll index 427123cfca6d4..cd246053bcb30 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -29,17 +29,17 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 -; IF-EVL-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; IF-EVL-NEXT: [[TMP12:%.*]] = mul [[TMP10]], splat (i64 1) ; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP12]] -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp ule [[STEP_ADD]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index 7d9ed7d6215c5..05a495d51c458 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -110,8 +110,8 @@ define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] @@ -214,8 +214,8 @@ define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) { ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 38b58fbfd1021..53fd2ed43972c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -39,8 +39,8 @@ define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -91,8 +91,8 @@ define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, ptr %ou ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nusw float, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -143,8 +143,8 @@ define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, ptr %out ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr nuw float, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -196,8 +196,8 @@ define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -248,8 +248,8 @@ define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP6]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -305,8 +305,8 @@ define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -356,8 +356,8 @@ define void @preserve_nuw_nsw_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -409,8 +409,8 @@ define void @drop_scalar_exact(ptr noalias nocapture readonly %input, ptr %outpu ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP9]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -518,8 +518,8 @@ define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -570,8 +570,8 @@ define void @preserve_exact_no_addr(ptr %output) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -698,8 +698,8 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP15]], i32 0 ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP16]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; @@ -778,8 +778,8 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP10]], i32 0 ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 7aeb32afe43be..0a85548f8750b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -411,9 +411,9 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll index 4fbee321b6a48..7f2544ddf149d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll @@ -519,8 +519,8 @@ define void @interleave_store_double_i64(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> zeroinitializer, <2 x double> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] @@ -640,8 +640,8 @@ define void @interleave_store_i64_double_2(ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> zeroinitializer, <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: store <4 x double> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll index 6480c0ab1099d..02d48cbda1aab 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/outer_loop_test1_no_explicit_vect_width.ll @@ -71,8 +71,8 @@ ; AVX: br i1 %[[InnerCond]], label %[[ForInc]], label %[[InnerLoop]] ; AVX: [[ForInc]]: -; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8) ; AVX: %[[IndNext]] = add nuw i64 %[[Ind]], 8 +; AVX: %[[VecIndNext]] = add <8 x i64> %[[VecInd]], splat (i64 8) ; AVX: br i1 true, label %middle.block, label %vector.body @arr2 = external global [8 x i32], align 16 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 4038ace617c17..99650592d2dea 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -131,7 +131,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[VECTOR_BODY29:%.*]] -; CHECK: vector.body28: +; CHECK: vector.body30: ; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH25]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY29]] ] ; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY29]] ] ; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , [[VECTOR_PH25]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY29]] ] @@ -153,18 +153,18 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) ; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] -; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK35:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block35: +; CHECK-NEXT: br i1 [[TMP41]], label [[MIDDLE_BLOCK37:%.*]], label [[VECTOR_BODY29]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block37: ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK43:%.*]] -; CHECK: vec.epilog.iter.check42: +; CHECK: vec.epilog.iter.check44: ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] ; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH40]], label [[VEC_EPILOG_PH42]] -; CHECK: vec.epilog.ph41: +; CHECK: vec.epilog.ph43: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] ; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] ; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] @@ -183,7 +183,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY49:%.*]] -; CHECK: vec.epilog.vector.body49: +; CHECK: vec.epilog.vector.body57: ; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH42]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY49]] ] ; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY49]] ] ; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH42]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY49]] ] @@ -206,10 +206,10 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK40:%.*]], label [[VEC_EPILOG_VECTOR_BODY49]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: vec.epilog.middle.block62: +; CHECK: vec.epilog.middle.block64: ; CHECK-NEXT: [[CMP_N65:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[CMP_N65]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH40]] -; CHECK: vec.epilog.scalar.ph40: +; CHECK: vec.epilog.scalar.ph42: ; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 8, [[ITER_CHECK22]] ] ; CHECK-NEXT: [[BC_RESUME_VAL67:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK40]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK43]] ], [ 0, [[ITER_CHECK22]] ] ; CHECK-NEXT: br label [[FOR_BODY_US:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 1365e9f73d854..6e62ff842c6d1 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -245,8 +245,8 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT6]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 9be26d4247a36..7684d274a75cf 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -870,7 +870,7 @@ for.end: ; } ; ; -define i32 @PR27246(ptr %dst) { +define i32 @PR27246() { ; UNROLL-NO-IC-LABEL: @PR27246( ; UNROLL-NO-IC-NEXT: entry: ; UNROLL-NO-IC-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] @@ -882,8 +882,7 @@ define i32 @PR27246(ptr %dst) { ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = sub i32 [[I_016]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[I_016]] +; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], @@ -892,13 +891,10 @@ define i32 @PR27246(ptr %dst) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; UNROLL-NO-IC-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4) -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 @@ -906,21 +902,19 @@ define i32 @PR27246(ptr %dst) { ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] -; UNROLL-NO-IC-NEXT: br label [[FOR_COND2:%.*]] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-IC: for.cond.cleanup: ; UNROLL-NO-IC-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[E_1_LCSSA_LCSSA]] ; UNROLL-NO-IC: for.cond1: -; UNROLL-NO-IC-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; UNROLL-NO-IC-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_016]] +; UNROLL-NO-IC-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-IC-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1 ; UNROLL-NO-IC-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 -; UNROLL-NO-IC-NEXT: store i32 [[E_1]], ptr [[GEP_DST]], align 4 -; UNROLL-NO-IC-NEXT: br i1 [[CMP2]], label [[FOR_COND2]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-IC: for.cond.cleanup3: -; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND2]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 ; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] @@ -936,38 +930,33 @@ define i32 @PR27246(ptr %dst) { ; UNROLL-NO-VF: vector.ph: ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = sub i32 [[I_016]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[I_016]] -; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] +; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: -; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND1]] ] +; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]] -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 -; UNROLL-NO-VF-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] -; UNROLL-NO-VF-NEXT: br label [[FOR_COND2:%.*]] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: ; UNROLL-NO-VF-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[E_1_LCSSA_LCSSA]] ; UNROLL-NO-VF: for.cond1: -; UNROLL-NO-VF-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; UNROLL-NO-VF-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_016]] +; UNROLL-NO-VF-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1 ; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 -; UNROLL-NO-VF-NEXT: store i32 [[E_1]], ptr [[GEP_DST]], align 4 -; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND2]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-VF: for.cond.cleanup3: -; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND2]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[OFFSET_IDX]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 ; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] @@ -983,24 +972,18 @@ define i32 @PR27246(ptr %dst) { ; SINK-AFTER: vector.ph: ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]] -; SINK-AFTER-NEXT: [[TMP0:%.*]] = sub i32 [[I_016]], [[N_VEC]] -; SINK-AFTER-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[I_016]] -; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3 +; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i64 0 ; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], -; SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]] +; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: -; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND1]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[FOR_COND1]] ] -; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[FOR_COND1]] ] -; SINK-AFTER-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; SINK-AFTER-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 +; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4) -; SINK-AFTER-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 @@ -1008,21 +991,19 @@ define i32 @PR27246(ptr %dst) { ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] -; SINK-AFTER-NEXT: br label [[FOR_COND2:%.*]] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] +; SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]] ; SINK-AFTER: for.cond.cleanup: ; SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] ; SINK-AFTER-NEXT: ret i32 [[E_1_LCSSA_LCSSA]] ; SINK-AFTER: for.cond1: -; SINK-AFTER-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; SINK-AFTER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[I_016]] +; SINK-AFTER-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] +; SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1 ; SINK-AFTER-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1 -; SINK-AFTER-NEXT: store i32 [[E_1]], ptr [[GEP_DST]], align 4 -; SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND2]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]] ; SINK-AFTER: for.cond.cleanup3: -; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND2]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49 ; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]] @@ -1042,10 +1023,8 @@ for.cond.cleanup: for.cond1: %e.1 = phi i32 [ %k.0, %for.cond1 ], [ %e.015, %for.cond1.preheader ] %k.0 = phi i32 [ %dec, %for.cond1 ], [ %i.016, %for.cond1.preheader ] - %gep.dst = getelementptr inbounds i32, ptr %dst, i32 %i.016 %cmp2 = icmp sgt i32 %k.0, 1 %dec = add nsw i32 %k.0, -1 - store i32 %e.1, ptr %gep.dst br i1 %cmp2, label %for.cond1, label %for.cond.cleanup3 for.cond.cleanup3: @@ -1072,22 +1051,22 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 8 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 10 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 12 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 14 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2 ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP7]], 2 -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP8]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP13]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP14]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP15]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1169,22 +1148,22 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 2 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 4 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 6 +; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4 +; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6 +; SINK-AFTER-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2 ; SINK-AFTER-NEXT: [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2 -; SINK-AFTER-NEXT: [[TMP11:%.*]] = add nuw nsw i64 [[TMP7]], 2 -; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP8]] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP7]] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP8]] ; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] ; SINK-AFTER-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] -; SINK-AFTER-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] +; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 ; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4 -; SINK-AFTER-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP13]], align 4 -; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP14]], align 4 -; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP15]], align 4 +; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP13]], align 4 +; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1372,27 +1351,27 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[VECTOR_RECUR]], 1 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i32 [[VECTOR_RECUR]], [[X:%.*]] -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[X]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[VECTOR_RECUR]], [[X:%.*]] +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], [[X]] ; UNROLL-NO-VF-NEXT: [[TMP3]] = add nuw i32 [[VECTOR_RECUR]], 2 -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 96 -; UNROLL-NO-VF-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], 96 +; UNROLL-NO-VF-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT1:%.*]] = phi i32 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: +; UNROLL-NO-VF-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[VAL_PHI:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VAL_PHI1:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT1]], [[SCALAR_PH]] ], [ [[ADDX1:%.*]], [[FOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[VAL_PHI]], 1 -; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[VAL_PHI]] to i64 -; UNROLL-NO-VF-NEXT: [[ADDX1]] = add i32 [[VAL_PHI]], [[X]] -; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[VAL_PHI]], 95 +; UNROLL-NO-VF-NEXT: [[INC]] = add i32 [[INC_PHI]], 1 +; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 +; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] +; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 ; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-VF: for.end: -; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI1]], [[FOR_BODY]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[VAL_PHI]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]] ; ; SINK-AFTER-LABEL: @extract_second_last_iteration( @@ -2473,7 +2452,178 @@ for.end12.loopexit: ; preds = %cond.end ret void } -define i32 @sink_into_replication_region(i32 %y, ptr %dst) { +; Dead instructions, like the exit condition are not part of the actual VPlan +; and do not need to be sunk. PR44634. +define void @sink_dead_inst(ptr %a) { +; UNROLL-NO-IC-LABEL: @sink_dead_inst( +; UNROLL-NO-IC-NEXT: entry: +; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC: vector.ph: +; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] +; UNROLL-NO-IC: vector.body: +; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add <4 x i16> [[TMP1]], splat (i16 5) +; UNROLL-NO-IC-NEXT: [[TMP5]] = add <4 x i16> [[TMP2]], splat (i16 5) +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10) +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]] +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4 +; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2 +; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2 +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 +; UNROLL-NO-IC-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; UNROLL-NO-IC: middle.block: +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; UNROLL-NO-IC: scalar.ph: +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]] +; UNROLL-NO-IC: for.cond: +; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15 +; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-IC-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 +; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 +; UNROLL-NO-IC-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]] +; UNROLL-NO-IC-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2 +; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; UNROLL-NO-IC: for.end: +; UNROLL-NO-IC-NEXT: ret void +; +; UNROLL-NO-VF-LABEL: @sink_dead_inst( +; UNROLL-NO-VF-NEXT: entry: +; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF: vector.ph: +; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] +; UNROLL-NO-VF: vector.body: +; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[TMP1]], 1 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i16 [[TMP2]], 5 +; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP3]], 5 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub i16 [[VECTOR_RECUR]], 10 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sub i16 [[TMP5]], 10 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]] +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A]], i16 [[TMP1]] +; UNROLL-NO-VF-NEXT: store i16 [[TMP7]], ptr [[TMP9]], align 2 +; UNROLL-NO-VF-NEXT: store i16 [[TMP8]], ptr [[TMP10]], align 2 +; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42 +; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; UNROLL-NO-VF: middle.block: +; UNROLL-NO-VF-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; UNROLL-NO-VF: scalar.ph: +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT2:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]] +; UNROLL-NO-VF: for.cond: +; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT2]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15 +; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-VF-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 +; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 +; UNROLL-NO-VF-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]] +; UNROLL-NO-VF-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2 +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; UNROLL-NO-VF: for.end: +; UNROLL-NO-VF-NEXT: ret void +; +; SINK-AFTER-LABEL: @sink_dead_inst( +; SINK-AFTER-NEXT: entry: +; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SINK-AFTER: vector.ph: +; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] +; SINK-AFTER: vector.body: +; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; SINK-AFTER-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], splat (i16 5) +; SINK-AFTER-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], splat (i16 10) +; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]] +; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0 +; SINK-AFTER-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP7]], align 2 +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 +; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; SINK-AFTER: middle.block: +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 +; SINK-AFTER-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SINK-AFTER: scalar.ph: +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]] +; SINK-AFTER: for.cond: +; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[REC_2:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[REC_2]], 15 +; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; SINK-AFTER-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 +; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 +; SINK-AFTER-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[A]], i16 [[IV]] +; SINK-AFTER-NEXT: store i16 [[USE_REC_1]], ptr [[GEP]], align 2 +; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +; SINK-AFTER: for.end: +; SINK-AFTER-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: + %iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ] + %rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ] + %rec.2 = phi i32 [ -27, %entry ], [ %rec.2.prev, %for.cond ] + %use.rec.1 = sub i16 %rec.1, 10 + %cmp = icmp eq i32 %rec.2, 15 + %iv.next = add i16 %iv, 1 + %rec.2.prev = zext i16 %iv.next to i32 + %rec.1.prev = add i16 %iv.next, 5 + %gep = getelementptr i16, ptr %a, i16 %iv + store i16 %use.rec.1, ptr %gep + br i1 %cmp, label %for.end, label %for.cond + +for.end: + ret void +} + +define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-LABEL: @sink_into_replication_region( ; UNROLL-NO-IC-NEXT: bb: ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[Y:%.*]], 1 @@ -2564,74 +2714,18 @@ define i32 @sink_into_replication_region(i32 %y, ptr %dst) { ; UNROLL-NO-IC: pred.udiv.continue18: ; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE16]] ], [ [[TMP37]], [[PRED_UDIV_IF17]] ] ; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; UNROLL-NO-IC-NEXT: br i1 [[TMP39]], label [[PRED_UDIV_IF19:%.*]], label [[PRED_UDIV_CONTINUE21:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP39]], label [[PRED_UDIV_IF19:%.*]], label [[PRED_UDIV_CONTINUE20]] ; UNROLL-NO-IC: pred.udiv.if19: ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = add i32 [[OFFSET_IDX]], -7 ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = udiv i32 219220132, [[TMP40]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3 -; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE21]] +; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE20]] ; UNROLL-NO-IC: pred.udiv.continue20: ; UNROLL-NO-IC-NEXT: [[TMP43]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE18]] ], [ [[TMP42]], [[PRED_UDIV_IF19]] ] ; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP23]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP43]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]] ; UNROLL-NO-IC-NEXT: [[TMP47]] = add <4 x i32> [[VEC_PHI1]], [[TMP45]] -; UNROLL-NO-IC-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; UNROLL-NO-IC-NEXT: br i1 [[TMP64]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; UNROLL-NO-IC: pred.store.if: -; UNROLL-NO-IC-NEXT: [[TMP65:%.*]] = extractelement <4 x i32> [[TMP44]], i32 0 -; UNROLL-NO-IC-NEXT: store i32 [[TMP65]], ptr [[DST:%.*]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE]] -; UNROLL-NO-IC: pred.store.continue: -; UNROLL-NO-IC-NEXT: [[TMP66:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; UNROLL-NO-IC-NEXT: br i1 [[TMP66]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]] -; UNROLL-NO-IC: pred.store.if21: -; UNROLL-NO-IC-NEXT: [[TMP67:%.*]] = extractelement <4 x i32> [[TMP44]], i32 1 -; UNROLL-NO-IC-NEXT: store i32 [[TMP67]], ptr [[DST]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE22]] -; UNROLL-NO-IC: pred.store.continue22: -; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; UNROLL-NO-IC-NEXT: br i1 [[TMP52]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]] -; UNROLL-NO-IC: pred.store.if23: -; UNROLL-NO-IC-NEXT: [[TMP53:%.*]] = extractelement <4 x i32> [[TMP44]], i32 2 -; UNROLL-NO-IC-NEXT: store i32 [[TMP53]], ptr [[DST]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE24]] -; UNROLL-NO-IC: pred.store.continue24: -; UNROLL-NO-IC-NEXT: [[TMP54:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; UNROLL-NO-IC-NEXT: br i1 [[TMP54]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]] -; UNROLL-NO-IC: pred.store.if25: -; UNROLL-NO-IC-NEXT: [[TMP55:%.*]] = extractelement <4 x i32> [[TMP44]], i32 3 -; UNROLL-NO-IC-NEXT: store i32 [[TMP55]], ptr [[DST]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE26]] -; UNROLL-NO-IC: pred.store.continue26: -; UNROLL-NO-IC-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0 -; UNROLL-NO-IC-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]] -; UNROLL-NO-IC: pred.store.if27: -; UNROLL-NO-IC-NEXT: [[TMP57:%.*]] = extractelement <4 x i32> [[TMP45]], i32 0 -; UNROLL-NO-IC-NEXT: store i32 [[TMP57]], ptr [[DST]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE28]] -; UNROLL-NO-IC: pred.store.continue28: -; UNROLL-NO-IC-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1 -; UNROLL-NO-IC-NEXT: br i1 [[TMP58]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]] -; UNROLL-NO-IC: pred.store.if29: -; UNROLL-NO-IC-NEXT: [[TMP59:%.*]] = extractelement <4 x i32> [[TMP45]], i32 1 -; UNROLL-NO-IC-NEXT: store i32 [[TMP59]], ptr [[DST]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE30]] -; UNROLL-NO-IC: pred.store.continue30: -; UNROLL-NO-IC-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2 -; UNROLL-NO-IC-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32:%.*]] -; UNROLL-NO-IC: pred.store.if31: -; UNROLL-NO-IC-NEXT: [[TMP61:%.*]] = extractelement <4 x i32> [[TMP45]], i32 2 -; UNROLL-NO-IC-NEXT: store i32 [[TMP61]], ptr [[DST]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_STORE_CONTINUE32]] -; UNROLL-NO-IC: pred.store.continue32: -; UNROLL-NO-IC-NEXT: [[TMP62:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 -; UNROLL-NO-IC-NEXT: br i1 [[TMP62]], label [[PRED_STORE_IF33:%.*]], label [[PRED_UDIV_CONTINUE20]] -; UNROLL-NO-IC: pred.store.if33: -; UNROLL-NO-IC-NEXT: [[TMP63:%.*]] = extractelement <4 x i32> [[TMP45]], i32 3 -; UNROLL-NO-IC-NEXT: store i32 [[TMP63]], ptr [[DST]], align 4 -; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE20]] -; UNROLL-NO-IC: pred.store.continue34: ; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -2656,7 +2750,6 @@ define i32 @sink_into_replication_region(i32 %y, ptr %dst) { ; UNROLL-NO-IC-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; UNROLL-NO-IC-NEXT: store i32 [[VAR4]], ptr [[DST]], align 4 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 ; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]] ; @@ -2688,25 +2781,15 @@ define i32 @sink_into_replication_region(i32 %y, ptr %dst) { ; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE]] ; UNROLL-NO-VF: pred.udiv.continue: ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4]] ; UNROLL-NO-VF: pred.udiv.if3: ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -1 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = udiv i32 219220132, [[TMP7]] -; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE5]] +; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE4]] ; UNROLL-NO-VF: pred.udiv.continue4: ; UNROLL-NO-VF-NEXT: [[TMP9]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP8]], [[PRED_UDIV_IF3]] ] ; UNROLL-NO-VF-NEXT: [[TMP10]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]] ; UNROLL-NO-VF-NEXT: [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP6]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; UNROLL-NO-VF: pred.store.if: -; UNROLL-NO-VF-NEXT: store i32 [[VECTOR_RECUR]], ptr [[DST:%.*]], align 4 -; UNROLL-NO-VF-NEXT: br label [[PRED_STORE_CONTINUE]] -; UNROLL-NO-VF: pred.store.continue: -; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF5:%.*]], label [[PRED_UDIV_CONTINUE4]] -; UNROLL-NO-VF: pred.store.if5: -; UNROLL-NO-VF-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4 -; UNROLL-NO-VF-NEXT: br label [[PRED_UDIV_CONTINUE4]] -; UNROLL-NO-VF: pred.store.continue6: ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP10]], i32 [[VEC_PHI]] ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP11]], i32 [[VEC_PHI1]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -2730,7 +2813,6 @@ define i32 @sink_into_replication_region(i32 %y, ptr %dst) { ; UNROLL-NO-VF-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; UNROLL-NO-VF-NEXT: store i32 [[VAR4]], ptr [[DST]], align 4 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 ; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]] ; @@ -2785,44 +2867,16 @@ define i32 @sink_into_replication_region(i32 %y, ptr %dst) { ; SINK-AFTER: pred.udiv.continue6: ; SINK-AFTER-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP16]], [[PRED_UDIV_IF5]] ] ; SINK-AFTER-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; SINK-AFTER-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]] +; SINK-AFTER-NEXT: br i1 [[TMP18]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]] ; SINK-AFTER: pred.udiv.if7: ; SINK-AFTER-NEXT: [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3 ; SINK-AFTER-NEXT: [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]] ; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 3 -; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE9]] +; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; SINK-AFTER: pred.udiv.continue8: ; SINK-AFTER-NEXT: [[TMP22]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP21]], [[PRED_UDIV_IF7]] ] ; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]] -; SINK-AFTER-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; SINK-AFTER-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; SINK-AFTER: pred.store.if: -; SINK-AFTER-NEXT: [[TMP34:%.*]] = extractelement <4 x i32> [[TMP23]], i32 0 -; SINK-AFTER-NEXT: store i32 [[TMP34]], ptr [[DST:%.*]], align 4 -; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE]] -; SINK-AFTER: pred.store.continue: -; SINK-AFTER-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; SINK-AFTER-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; SINK-AFTER: pred.store.if9: -; SINK-AFTER-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1 -; SINK-AFTER-NEXT: store i32 [[TMP28]], ptr [[DST]], align 4 -; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE10]] -; SINK-AFTER: pred.store.continue10: -; SINK-AFTER-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; SINK-AFTER-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] -; SINK-AFTER: pred.store.if11: -; SINK-AFTER-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2 -; SINK-AFTER-NEXT: store i32 [[TMP30]], ptr [[DST]], align 4 -; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE12]] -; SINK-AFTER: pred.store.continue12: -; SINK-AFTER-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; SINK-AFTER-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF13:%.*]], label [[PRED_UDIV_CONTINUE8]] -; SINK-AFTER: pred.store.if13: -; SINK-AFTER-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3 -; SINK-AFTER-NEXT: store i32 [[TMP32]], ptr [[DST]], align 4 -; SINK-AFTER-NEXT: br label [[PRED_UDIV_CONTINUE8]] -; SINK-AFTER: pred.store.continue14: ; SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -2845,7 +2899,6 @@ define i32 @sink_into_replication_region(i32 %y, ptr %dst) { ; SINK-AFTER-NEXT: [[VAR6]] = add i32 [[VAR5]], [[VAR4]] ; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 -; SINK-AFTER-NEXT: store i32 [[VAR4]], ptr [[DST]], align 4 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 ; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF29:![0-9]+]], !llvm.loop [[LOOP30:![0-9]+]] ; @@ -2863,7 +2916,6 @@ bb: %var6 = add i32 %var5, %var4 %var7 = udiv i32 219220132, %var3 %var8 = add nsw i32 %var3, -1 - store i32 %var4, ptr %dst %var9 = icmp slt i32 %var3, 2 br i1 %var9, label %bb1, label %bb2, !prof !2 } @@ -3293,6 +3345,287 @@ bb: br i1 %var9, label %bb1, label %bb2, !prof !2 } +; %vec.dead will be marked as dead instruction in the vector loop and no recipe +; will be created for it. Make sure a valid sink target is used. +define i32 @sink_after_dead_inst(ptr %A.ptr) { +; UNROLL-NO-IC-LABEL: @sink_after_dead_inst( +; UNROLL-NO-IC-NEXT: entry: +; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC: vector.ph: +; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] +; UNROLL-NO-IC: vector.body: +; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 +; UNROLL-NO-IC-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; UNROLL-NO-IC: middle.block: +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; UNROLL-NO-IC: scalar.ph: +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] +; UNROLL-NO-IC: loop: +; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 +; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true +; UNROLL-NO-IC-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true +; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-IC-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]] +; UNROLL-NO-IC-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]] +; UNROLL-NO-IC-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32 +; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 +; UNROLL-NO-IC-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] +; UNROLL-NO-IC-NEXT: store i32 0, ptr [[A_GEP]], align 4 +; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]] +; UNROLL-NO-IC: for.end: +; UNROLL-NO-IC-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: ret i32 [[FOR_LCSSA]] +; +; UNROLL-NO-VF-LABEL: @sink_after_dead_inst( +; UNROLL-NO-VF-NEXT: entry: +; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF: vector.ph: +; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] +; UNROLL-NO-VF: vector.body: +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[VECTOR_RECUR]] to i16 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[TMP1]], 1 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = or i16 [[TMP2]], [[TMP2]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = or i16 [[TMP3]], [[TMP3]] +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = zext i16 [[TMP5]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]] +; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP8]], align 4 +; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP9]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP7]] = add nuw i32 [[VECTOR_RECUR]], 2 +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP7]], 16 +; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; UNROLL-NO-VF: middle.block: +; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; UNROLL-NO-VF: scalar.ph: +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] +; UNROLL-NO-VF: loop: +; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 +; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true +; UNROLL-NO-VF-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true +; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-VF-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]] +; UNROLL-NO-VF-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]] +; UNROLL-NO-VF-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32 +; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 +; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] +; UNROLL-NO-VF-NEXT: store i32 0, ptr [[A_GEP]], align 4 +; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]] +; UNROLL-NO-VF: for.end: +; UNROLL-NO-VF-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: ret i32 [[FOR_LCSSA]] +; +; SINK-AFTER-LABEL: @sink_after_dead_inst( +; SINK-AFTER-NEXT: entry: +; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SINK-AFTER: vector.ph: +; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] +; SINK-AFTER: vector.body: +; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; SINK-AFTER-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] +; SINK-AFTER-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 +; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; SINK-AFTER: middle.block: +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SINK-AFTER: scalar.ph: +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: br label [[LOOP:%.*]] +; SINK-AFTER: loop: +; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; SINK-AFTER-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ] +; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[FOR]], 15 +; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i1 [[CMP]], true +; SINK-AFTER-NEXT: [[VEC_DEAD:%.*]] = and i1 [[C]], true +; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; SINK-AFTER-NEXT: [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]] +; SINK-AFTER-NEXT: [[B3:%.*]] = and i1 [[CMP]], [[C]] +; SINK-AFTER-NEXT: [[FOR_PREV]] = zext i16 [[B1]] to i32 +; SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 +; SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[IV]] +; SINK-AFTER-NEXT: store i32 0, ptr [[A_GEP]], align 4 +; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP34:![0-9]+]] +; SINK-AFTER: for.end: +; SINK-AFTER-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: ret i32 [[FOR_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] + %for = phi i32 [ 0, %entry ], [ %for.prev, %loop ] + %cmp = icmp eq i32 %for, 15 + %C = icmp eq i1 %cmp, true + %vec.dead = and i1 %C, 1 + %iv.next = add i16 %iv, 1 + %B1 = or i16 %iv.next, %iv.next + %B3 = and i1 %cmp, %C + %for.prev = zext i16 %B1 to i32 + + %ext = zext i1 %B3 to i32 + %A.gep = getelementptr i32, ptr %A.ptr, i16 %iv + store i32 0, ptr %A.gep + br i1 %vec.dead, label %for.end, label %loop + +for.end: + ret i32 %for +} + +; %rec.1 only has %use.rec.1 as use, which can be removed. This enables %rec.1 +; to be removed also. +define void @unused_recurrence(ptr %a) { +; UNROLL-NO-IC-LABEL: @unused_recurrence( +; UNROLL-NO-IC-NEXT: entry: +; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC: vector.ph: +; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] +; UNROLL-NO-IC: vector.body: +; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1) +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], splat (i16 5) +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NO-IC: middle.block: +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; UNROLL-NO-IC: scalar.ph: +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 997, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]] +; UNROLL-NO-IC: for.cond: +; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-IC-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 +; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 +; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NO-IC: for.end: +; UNROLL-NO-IC-NEXT: ret void +; +; UNROLL-NO-VF-LABEL: @unused_recurrence( +; UNROLL-NO-VF-NEXT: entry: +; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF: vector.ph: +; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] +; UNROLL-NO-VF: vector.body: +; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 +; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]] +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[TMP1]], 5 +; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 +; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NO-VF: middle.block: +; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; UNROLL-NO-VF: scalar.ph: +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]] +; UNROLL-NO-VF: for.cond: +; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; UNROLL-NO-VF-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 +; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NO-VF: for.end: +; UNROLL-NO-VF-NEXT: ret void +; +; SINK-AFTER-LABEL: @unused_recurrence( +; SINK-AFTER-NEXT: entry: +; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; SINK-AFTER: vector.ph: +; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] +; SINK-AFTER: vector.body: +; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1) +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], splat (i16 5) +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) +; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028 +; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; SINK-AFTER: middle.block: +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SINK-AFTER: scalar.ph: +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]] +; SINK-AFTER: for.cond: +; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[REC_1:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ] +; SINK-AFTER-NEXT: [[USE_REC_1:%.*]] = sub i16 [[REC_1]], 10 +; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 +; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i16 [[IV]], 1000 +; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +; SINK-AFTER: for.end: +; SINK-AFTER-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: + %iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ] + %rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ] + %use.rec.1 = sub i16 %rec.1, 10 + %iv.next= add i16 %iv, 1 + %rec.1.prev = add i16 %iv.next, 5 + %cmp = icmp eq i16 %iv, 1000 + br i1 %cmp, label %for.end, label %for.cond + +for.end: + ret void +} + ; Test case for https://github.com/llvm/llvm-project/issues/95520. define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-IC-LABEL: @recurence_uniform_load( diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll index a622193290c8f..e27734755dfb2 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll @@ -59,8 +59,8 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) { ; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; CHECK: [[PRED_STORE_CONTINUE4]]: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -132,8 +132,8 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) { ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -203,8 +203,8 @@ define void @ptr_doesnt_depend_on_poison_or_ub(ptr noalias %dst, i16 noundef %of ; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -276,8 +276,8 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) { ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -350,8 +350,8 @@ define void @ptr_depends_on_noundef_load(ptr noalias %dst) { ; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, [[EXIT:label %.*]], label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d973e451d887d..a4f2b077cb066 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -282,3 +282,141 @@ for.cond: ; preds = %for.body, %entry for.end: ; preds = %for.cond ret void } + +; Test that when WidenPointerInductionRecipes are ordered before other +; WidenIntOrFpInductionRecipes that their PHIs are emitted in the right place. +define void @outside_lattice(ptr noalias %p, ptr noalias %q, i32 %n) { +; DEFAULT-LABEL: @outside_lattice( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 +; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 +; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; DEFAULT: vector.scevcheck: +; DEFAULT-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; DEFAULT-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; DEFAULT-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; DEFAULT-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; DEFAULT: vector.ph: +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 +; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]] +; DEFAULT-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 +; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] +; DEFAULT: vector.body: +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[OFFSET_IDX]] +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; DEFAULT-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 +; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]] +; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; DEFAULT-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; DEFAULT-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; DEFAULT-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; DEFAULT: middle.block: +; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; DEFAULT-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; DEFAULT: scalar.ph: +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] +; DEFAULT: for.body: +; DEFAULT-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] +; DEFAULT-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ] +; DEFAULT-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]] +; DEFAULT-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8 +; DEFAULT-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]] +; DEFAULT-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4 +; DEFAULT-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1 +; DEFAULT-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1 +; DEFAULT-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]] +; DEFAULT-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]] +; DEFAULT: for.end: +; DEFAULT-NEXT: ret void +; +; STRIDED-LABEL: @outside_lattice( +; STRIDED-NEXT: entry: +; STRIDED-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 +; STRIDED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) +; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 4 +; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; STRIDED: vector.scevcheck: +; STRIDED-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; STRIDED-NEXT: [[TMP1:%.*]] = add i32 [[UMAX]], -1 +; STRIDED-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +; STRIDED-NEXT: br i1 [[TMP2]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; STRIDED: vector.ph: +; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 4 +; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]] +; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 4 +; STRIDED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]] +; STRIDED-NEXT: [[IND_END2:%.*]] = trunc i64 [[N_VEC]] to i32 +; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] +; STRIDED: vector.body: +; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> +; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; STRIDED-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[OFFSET_IDX]] +; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP6]], align 8 +; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i32 [[OFFSET_IDX]] +; STRIDED-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; STRIDED-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP8]], align 4 +; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16 +; STRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; STRIDED-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; STRIDED: middle.block: +; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] +; STRIDED-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; STRIDED: scalar.ph: +; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY:%.*]] ], [ [[P]], [[VECTOR_SCEVCHECK]] ] +; STRIDED-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; STRIDED-NEXT: br label [[FOR_BODY:%.*]] +; STRIDED: for.body: +; STRIDED-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], [[FOR_BODY]] ] +; STRIDED-NEXT: [[IV_INT:%.*]] = phi i32 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[IV_INT_NEXT:%.*]], [[FOR_BODY]] ] +; STRIDED-NEXT: [[P_GEP:%.*]] = getelementptr inbounds ptr, ptr [[P]], i32 [[IV_INT]] +; STRIDED-NEXT: store ptr [[IV_PTR]], ptr [[P_GEP]], align 8 +; STRIDED-NEXT: [[Q_GEP:%.*]] = getelementptr inbounds i32, ptr [[Q]], i32 [[IV_INT]] +; STRIDED-NEXT: store i32 [[IV_INT]], ptr [[Q_GEP]], align 4 +; STRIDED-NEXT: [[IV_INT_NEXT]] = add i32 [[IV_INT]], 1 +; STRIDED-NEXT: [[IV_PTR_NEXT]] = getelementptr inbounds i32, ptr [[IV_PTR]], i32 1 +; STRIDED-NEXT: [[DONE:%.*]] = icmp ult i32 [[IV_INT_NEXT]], [[N]] +; STRIDED-NEXT: br i1 [[DONE]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] +; STRIDED: for.end: +; STRIDED-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %iv.ptr = phi ptr [ %p, %entry ], [ %iv.ptr.next, %for.body ] + %iv.int = phi i32 [ 0, %entry ], [ %iv.int.next, %for.body ] + + %p.gep = getelementptr inbounds ptr, ptr %p, i32 %iv.int + store ptr %iv.ptr, ptr %p.gep + + %q.gep = getelementptr inbounds i32, ptr %q, i32 %iv.int + store i32 %iv.int, ptr %q.gep + + %iv.int.next = add i32 %iv.int, 1 + %iv.ptr.next = getelementptr inbounds i32, ptr %iv.ptr, i32 1 + + %done = icmp ult i32 %iv.int.next, %n + br i1 %done, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index d497f0c22dbbc..fbe3a7a470e86 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -116,7 +116,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if2: ; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -125,12 +125,12 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue3: ; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; CHECK: pred.load.if5: +; CHECK: pred.load.if4: ; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 @@ -139,12 +139,12 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: +; CHECK: pred.load.continue5: ; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 ; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.if7: +; CHECK: pred.load.if6: ; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 @@ -153,7 +153,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: +; CHECK: pred.load.continue7: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer @@ -321,7 +321,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if2: ; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -330,12 +330,12 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue3: ; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; CHECK: pred.load.if5: +; CHECK: pred.load.if4: ; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 @@ -344,12 +344,12 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: +; CHECK: pred.load.continue5: ; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 ; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.if7: +; CHECK: pred.load.if6: ; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 @@ -358,7 +358,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: +; CHECK: pred.load.continue7: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1) @@ -436,7 +436,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: +; CHECK: pred.load.if2: ; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 @@ -445,12 +445,12 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 4 ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP22]], i64 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: +; CHECK: pred.load.continue3: ; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP23]], [[PRED_LOAD_IF3]] ] ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] -; CHECK: pred.load.if5: +; CHECK: pred.load.if4: ; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 @@ -459,12 +459,12 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP28]], align 4 ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP32]], i64 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: +; CHECK: pred.load.continue5: ; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP34:%.*]] = phi <4 x i32> [ [[TMP24]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP33]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 ; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.if7: +; CHECK: pred.load.if6: ; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] ; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 @@ -473,7 +473,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP38]], align 4 ; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP48]], i64 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] -; CHECK: pred.load.continue8: +; CHECK: pred.load.continue7: ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP50:%.*]] = phi <4 x i32> [ [[TMP34]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP49]], [[PRED_LOAD_IF7]] ] ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index a70d8f72c8a33..bb84dbf8ed236 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -658,9 +658,9 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF1-NEXT: [[N_VEC:%.*]] = sub i32 96, [[N_MOD_VF]] ; CHECK-VF4UF1-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 -; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 ; CHECK-VF4UF1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = mul [[TMP6]], splat (i32 1) ; CHECK-VF4UF1-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] ; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = mul i32 1, [[TMP5]] @@ -707,17 +707,17 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 ; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 -; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 ; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv4i32() ; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) ; CHECK-VF4UF2-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF2: [[VECTOR_BODY]]: ; CHECK-VF4UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4UF2-NEXT: [[STEP_ADD:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT1:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; CHECK-VF4UF2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT:%.*]] = add [[STEP_ADD]], [[BROADCAST_SPLAT2]] ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = add [[VEC_IND_NEXT]], [[BROADCAST_SPLAT]] ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index f136b0e2e0b31..10f96284c0184 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -23,12 +23,12 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() @@ -103,12 +103,12 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv1i64() -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index 2a48e0a5e5310..15db687ba64ff 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -19,17 +19,17 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[STEP_2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() ; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 5ff43dcf42bcf..ec1e8fa1e1b33 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -918,8 +918,8 @@ define i64 @same_exit_block_pre_inc_use4() { ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.split: ; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 130db548ca8cb..fe533672f2ca2 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -291,8 +291,8 @@ define void @redundant_branch_and_blends_without_mask(ptr %A) { ; CHECK-NEXT: store i32 [[TMP34]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; CHECK: [[PRED_STORE_CONTINUE12]]: -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll index 59277186195fc..7654bc9a141e0 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll @@ -65,3 +65,68 @@ loop.latch: exit: ret void } + +; Check that VPWidenIntOrFPInductionRecipe is expanded into smaller recipes in +; the final VPlan. +define void @iv_expand(ptr %p, i64 %n) { +; CHECK-LABEL: LV: Checking a loop in 'iv_expand' +; CHECK: VPlan 'Initial VPlan for VF={8},UF>=1' { +; CHECK: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> +; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<%0> +; CHECK-NEXT: vp<%4> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<%4> +; CHECK-NEXT: vp<%5> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN ir<%x> = load vp<%5> +; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv> +; CHECK-NEXT: vp<%6> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN store vp<%6>, ir<%y> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%3>, vp<%1> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): middle.block +; CHECK: VPlan 'Final VPlan for VF={8},UF={1}' +; CHECK: ir-bb: +; CHECK-NEXT: IR %n.mod.vf = urem i64 %n, 8 +; CHECK-NEXT: IR %n.vec = sub i64 %n, %n.mod.vf +; CHECK-NEXT: EMIT vp<[[STEP_VECTOR:%.+]]> = step-vector +; CHECK-NEXT: EMIT vp<[[BROADCAST_0:%.+]]> = broadcast ir<0> +; CHECK-NEXT: EMIT vp<[[BROADCAST_1:%.+]]> = broadcast ir<1> +; CHECK-NEXT: EMIT vp<[[MUL:%.+]]> = mul vp<[[STEP_VECTOR]]>, vp<[[BROADCAST_1]]> +; CHECK-NEXT: EMIT vp<[[INDUCTION:%.+]]> = add vp<[[BROADCAST_0]]>, vp<[[MUL]]> +; CHECK-NEXT: EMIT vp<[[TRUNC:%.+]]> = trunc ir<8> to i64 +; CHECK-NEXT: EMIT vp<[[INC:%.+]]> = mul ir<1>, vp<[[TRUNC]]> +; CHECK-NEXT: EMIT vp<[[BROADCAST_INC:%.+]]> = broadcast vp<[[INC]]> +; CHECK-NEXT: Successor(s): vector.body +; CHECK-EMPTY: +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT-SCALAR vp<[[SCALAR_PHI:%.+]]> = phi [ ir<0>, ir-bb ], [ vp<%index.next>, vector.body ] +; CHECK-NEXT: WIDEN-PHI ir<%iv> = phi [ vp<[[INDUCTION]]>, ir-bb ], [ vp<%vec.ind.next>, vector.body ] +; CHECK-NEXT: CLONE ir<%q> = getelementptr ir<%p>, vp<[[SCALAR_PHI]]> +; CHECK-NEXT: vp<[[VEC_PTR_1:%.+]]> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN ir<%x> = load vp<[[VEC_PTR_1]]> +; CHECK-NEXT: WIDEN ir<%y> = add ir<%x>, ir<%iv> +; CHECK-NEXT: vp<[[VEC_PTR_2:%.+]]> = vector-pointer ir<%q> +; CHECK-NEXT: WIDEN store vp<[[VEC_PTR_2]]>, ir<%y> +; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<[[SCALAR_PHI]]>, ir<8> +; CHECK-NEXT: EMIT vp<%vec.ind.next> = add ir<%iv>, vp<[[BROADCAST_INC]]> +; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<%n.vec> +; CHECK-NEXT: Successor(s): middle.block, vector.body +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %q = getelementptr i64, ptr %p, i64 %iv + %x = load i64, ptr %q + %y = add i64 %x, %iv + store i64 %y, ptr %q + %iv.next = add i64 %iv, 1 + %done = icmp eq i64 %iv.next, %n + br i1 %done, label %exit, label %loop + +exit: + ret void +}