diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h index b0277c2b52595..830163984e37b 100644 --- a/llvm/include/llvm/IR/VectorBuilder.h +++ b/llvm/include/llvm/IR/VectorBuilder.h @@ -99,11 +99,11 @@ class VectorBuilder { const Twine &Name = Twine()); /// Emit a VP reduction intrinsic call for recurrence kind. - /// \param RdxID The intrinsic ID of llvm.vector.reduce.* + /// \param ID The intrinsic ID of call Intrinsic /// \param ValTy The type of operand which the reduction operation is /// performed. /// \param VecOpArray The operand list. - Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy, + Value *createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy, ArrayRef VecOpArray, const Twine &Name = Twine()); }; diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp index 737f49b1334d7..d629a2fb6af7b 100644 --- a/llvm/lib/IR/VectorBuilder.cpp +++ b/llvm/lib/IR/VectorBuilder.cpp @@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy, return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name); } -Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID, - Type *ValTy, +Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy, ArrayRef InstOpArray, const Twine &Name) { - auto VPID = VPIntrinsic::getForIntrinsic(RdxID); - assert(VPReductionIntrinsic::isVPReduction(VPID) && - "No VPIntrinsic for this reduction"); + auto VPID = VPIntrinsic::getForIntrinsic(ID); + assert(VPIntrinsic::isVPIntrinsic(VPID) && + "No VPIntrinsic for this Intrinsic"); return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index b84c94f718b28..d30a72bcf187f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1118,6 +1118,8 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::vp_udiv: case Intrinsic::vp_urem: case Intrinsic::vp_xor: + // TODO: add new patch for it. + case Intrinsic::vp_fneg: // vp float arithmetic ops. case Intrinsic::vp_fadd: case Intrinsic::vp_fsub: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 70047273c3b9a..2dac2d43f7f3a 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src, Type *SrcEltTy = SrcTy->getElementType(); Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags()); Value *Ops[] = {Iden, Src}; - return VBuilder.createSimpleReduction(Id, SrcTy, Ops); + return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops); } Value *llvm::createReduction(IRBuilderBase &B, @@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder, Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd); auto *SrcTy = cast(Src->getType()); Value *Ops[] = {Start, Src}; - return VBuilder.createSimpleReduction(Id, SrcTy, Ops); + return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops); } void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 421bb9a40da47..811b1425fdb38 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11720,7 +11720,7 @@ InstructionCost BoUpSLP::getSpillCost() const { if (auto *FPMO = dyn_cast(II)) FMF = FPMO->getFastMathFlags(); IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys, - FMF); + FMF, II); InstructionCost IntrCost = TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput); InstructionCost CallCost = TTI->getCallInstrCost( diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4e5878cae2ddc..6a0b283dacbe1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -868,7 +868,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenGEPSC: case VPRecipeBase::VPWidenIntrinsicSC: case VPRecipeBase::VPWidenSC: - case VPRecipeBase::VPWidenEVLSC: case VPRecipeBase::VPWidenSelectSC: case VPRecipeBase::VPBlendSC: case VPRecipeBase::VPPredInstPHISC: @@ -1063,7 +1062,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { static inline bool classof(const VPRecipeBase *R) { return R->getVPDefID() == VPRecipeBase::VPInstructionSC || R->getVPDefID() == VPRecipeBase::VPWidenSC || - R->getVPDefID() == VPRecipeBase::VPWidenEVLSC || R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || @@ -1436,16 +1434,11 @@ class VPIRInstruction : public VPRecipeBase { class VPWidenRecipe : public VPRecipeWithIRFlags { unsigned Opcode; -protected: - template - VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, - iterator_range Operands) - : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {} - public: template VPWidenRecipe(Instruction &I, iterator_range Operands) - : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {} + : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), + Opcode(I.getOpcode()) {} ~VPWidenRecipe() override = default; @@ -1455,15 +1448,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { return R; } - static inline bool classof(const VPRecipeBase *R) { - return R->getVPDefID() == VPRecipeBase::VPWidenSC || - R->getVPDefID() == VPRecipeBase::VPWidenEVLSC; - } - - static inline bool classof(const VPUser *U) { - auto *R = dyn_cast(U); - return R && classof(R); - } + VP_CLASSOF_IMPL(VPDef::VPWidenSC) /// Produce a widened instruction using the opcode and operands of the recipe, /// processing State.VF elements. @@ -1482,54 +1467,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags { #endif }; -/// A recipe for widening operations with vector-predication intrinsics with -/// explicit vector length (EVL). -class VPWidenEVLRecipe : public VPWidenRecipe { - using VPRecipeWithIRFlags::transferFlags; - -public: - template - VPWidenEVLRecipe(Instruction &I, iterator_range Operands, VPValue &EVL) - : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) { - addOperand(&EVL); - } - VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL) - : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) { - transferFlags(W); - } - - ~VPWidenEVLRecipe() override = default; - - VPWidenRecipe *clone() override final { - llvm_unreachable("VPWidenEVLRecipe cannot be cloned"); - return nullptr; - } - - VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC); - - VPValue *getEVL() { return getOperand(getNumOperands() - 1); } - const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); } - - /// Produce a vp-intrinsic using the opcode and operands of the recipe, - /// processing EVL elements. - void execute(VPTransformState &State) override final; - - /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - // EVL in that recipe is always the last operand, thus any use before means - // the VPValue should be vectorized. - return getEVL() == Op; - } - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override final; -#endif -}; - /// VPWidenCastRecipe is a recipe to create vector cast instructions. class VPWidenCastRecipe : public VPRecipeWithIRFlags { /// Cast instruction opcode. @@ -1648,6 +1585,16 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { MayWriteToMemory(CI.mayWriteToMemory()), MayHaveSideEffects(CI.mayHaveSideEffects()) {} + template + VPWidenIntrinsicRecipe(Instruction &I, Intrinsic::ID VectorIntrinsicID, + iterator_range Operands, Type *Ty, + DebugLoc DL = {}) + : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, Operands, I), + VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), + MayReadFromMemory(I.mayReadFromMemory()), + MayWriteToMemory(I.mayWriteToMemory()), + MayHaveSideEffects(I.mayHaveSideEffects()) {} + VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, DebugLoc DL = {}) diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 8b8ab6be99b0d..f50a1286316a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -267,9 +267,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { [this](const VPRecipeBase *R) { return inferScalarType(R->getOperand(0)); }) - .Case( + .Case( [this](const auto *R) { return inferScalarTypeForRecipe(R); }) .Case([](const VPWidenIntrinsicRecipe *R) { return R->getResultType(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2ecd546633825..42928aa47cfb6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -99,7 +99,6 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPWidenLoadSC: case VPWidenPHISC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -143,7 +142,6 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPWidenIntOrFpInductionSC: case VPWidenPHISC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -184,7 +182,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPWidenPHISC: case VPWidenPointerInductionSC: case VPWidenSC: - case VPWidenEVLSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); @@ -970,24 +967,52 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { Args.push_back(Arg); } - // Use vector version of the intrinsic. - Module *M = State.Builder.GetInsertBlock()->getModule(); - Function *VectorF = - Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); - assert(VectorF && "Can't retrieve vector intrinsic."); + if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) && + VectorIntrinsicID != Intrinsic::vp_select) { + VectorBuilder VBuilder(State.Builder); + Value *Mask = + State.Builder.CreateVectorSplat(State.VF, State.Builder.getTrue()); + VBuilder.setMask(Mask).setEVL(Args.back()); + // Remove EVL from Args + Args.pop_back(); + + if (VPCmpIntrinsic::isVPCmp(VectorIntrinsicID)) { + auto &Ctx = State.Builder.getContext(); + Value *Pred = MetadataAsValue::get( + Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate()))); + Args.push_back(Pred); + } - auto *CI = cast_or_null(getUnderlyingValue()); - SmallVector OpBundles; - if (CI) - CI->getOperandBundlesAsDefs(OpBundles); + Value *VPInst = VBuilder.createSimpleIntrinsic( + VectorIntrinsicID, TysForDecl[0], Args, "vp.call"); - CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); + if (isa(VPInst)) + setFlags(cast(VPInst)); - setFlags(V); + if (!VPInst->getType()->isVoidTy()) + State.set(this, VPInst); + State.addMetadata(VPInst, + dyn_cast_or_null(getUnderlyingValue())); + } else { + // Use vector version of the intrinsic. + Module *M = State.Builder.GetInsertBlock()->getModule(); + Function *VectorF = + Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); + assert(VectorF && "Can't retrieve vector intrinsic."); - if (!V->getType()->isVoidTy()) - State.set(this, V); - State.addMetadata(V, CI); + auto *CI = cast_or_null(getUnderlyingValue()); + SmallVector OpBundles; + if (CI) + CI->getOperandBundlesAsDefs(OpBundles); + + CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); + + setFlags(V); + + if (!V->getType()->isVoidTy()) + State.set(this, V); + State.addMetadata(V, CI); + } } InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, @@ -1019,6 +1044,18 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF, ParamTys.push_back( ToVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF)); + if (std::optional FOp = + VPIntrinsic::getFunctionalOpcodeForVP(VectorIntrinsicID)) { + if (VPCmpIntrinsic::isVPCmp(VectorIntrinsicID)) { + Instruction *CtxI = dyn_cast_or_null(getUnderlyingValue()); + Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF); + return Ctx.TTI.getCmpSelInstrCost(FOp.value(), VectorTy, nullptr, + getPredicate(), CostKind, + {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_AnyValue, TTI::OP_None}, CtxI); + } + } + // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst. FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags(); IntrinsicCostAttributes CostAttrs( @@ -1430,42 +1467,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF, } } -void VPWidenEVLRecipe::execute(VPTransformState &State) { - unsigned Opcode = getOpcode(); - // TODO: Support other opcodes - if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) - llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute"); - - State.setDebugLocFrom(getDebugLoc()); - - assert(State.get(getOperand(0))->getType()->isVectorTy() && - "VPWidenEVLRecipe should not be used for scalars"); - - VPValue *EVL = getEVL(); - Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true); - IRBuilderBase &BuilderIR = State.Builder; - VectorBuilder Builder(BuilderIR); - Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue()); - - SmallVector Ops; - for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) { - VPValue *VPOp = getOperand(I); - Ops.push_back(State.get(VPOp)); - } - - Builder.setMask(Mask).setEVL(EVLArg); - Value *VPInst = - Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op"); - // Currently vp-intrinsics only accept FMF flags. - // TODO: Enable other flags when support is added. - if (isa(VPInst)) - setFlags(cast(VPInst)); - - State.set(this, VPInst); - State.addMetadata(VPInst, - dyn_cast_or_null(getUnderlyingValue())); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -1475,15 +1476,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, printFlags(O); printOperands(O, SlotTracker); } - -void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "WIDEN "; - printAsOperand(O, SlotTracker); - O << " = vp." << Instruction::getOpcodeName(getOpcode()); - printFlags(O); - printOperands(O, SlotTracker); -} #endif void VPWidenCastRecipe::execute(VPTransformState &State) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 622b2592f3e09..f947da6076730 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1465,10 +1465,15 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { }) .Case([&](VPWidenRecipe *W) -> VPRecipeBase * { unsigned Opcode = W->getOpcode(); - if (!Instruction::isBinaryOp(Opcode) && - !Instruction::isUnaryOp(Opcode)) + if (Opcode == Instruction::Freeze) return nullptr; - return new VPWidenEVLRecipe(*W, EVL); + auto *I = cast(W->getUnderlyingInstr()); + SmallVector Ops(W->operands()); + Ops.push_back(&EVL); + Intrinsic::ID VPID = VPIntrinsic::getForOpcode(W->getOpcode()); + return new VPWidenIntrinsicRecipe( + *I, VPID, make_range(Ops.begin(), Ops.end()), I->getType(), + I->getDebugLoc()); }) .Case([&](VPReductionRecipe *Red) { VPValue *NewMask = GetNewMask(Red->getCondOp()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 691b0d40823cf..e35bd66cdaab8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -344,7 +344,6 @@ class VPDef { VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, - VPWidenEVLSC, VPWidenSelectSC, VPBlendSC, VPHistogramSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 3b7ba61454899..f147302964a3d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -148,10 +148,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { .Case([&](const VPWidenLoadEVLRecipe *L) { return VerifyEVLUse(*L, 1); }) - .Case([&](const VPWidenEVLRecipe *W) { - return VerifyEVLUse( - *W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); - }) .Case([&](const VPReductionEVLRecipe *R) { return VerifyEVLUse(*R, 2); }) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index fc12dd54f88df..1166d4cdc76b3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -69,7 +69,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = call @llvm.vp.icmp.nxv4i32( %vp.op.load, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), metadata !"sgt", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 %9) ; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call @llvm.vp.select.nxv4i32( [[TMP19]], [[VP_OP_LOAD]], zeroinitializer, i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] @@ -282,7 +282,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) -; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = call @llvm.vp.icmp.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer), metadata !"sgt", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select [[TMP16]], [[TMP19]], zeroinitializer ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[VP_OP_LOAD]], [[TMP20]], i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll index 99da5058fbf92..c915ffd9a80d9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -45,18 +45,18 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne [[VP_OP_LOAD]], zeroinitializer -; IF-EVL-NEXT: [[TMP18:%.*]] = select [[TMP14]], [[TMP17]], zeroinitializer -; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD3]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP20]], [[TMP18]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[VP_OP:%.*]] = call @llvm.vp.icmp.nxv4i32( [[VP_OP_LOAD]], zeroinitializer, metadata !"ne", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP17:%.*]] = select [[TMP14]], [[VP_OP]], zeroinitializer +; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP19]], [[TMP17]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[VP_OP4:%.*]] = call @llvm.vp.add.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD3]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP10]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP4]], ptr align 4 [[TMP19]], [[TMP17]], i32 [[TMP10]]) +; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] -; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -65,13 +65,13 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL: for.body: ; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]] -; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP23]], 0 +; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP22]], 0 ; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; IF-EVL: if.then: ; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]] -; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 -; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], [[TMP24]] +; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP22]], [[TMP23]] ; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 ; IF-EVL-NEXT: br label [[FOR_INC]] ; IF-EVL: for.inc: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 9a001f36da7d4..c291128b1e50c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -143,7 +143,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]] ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP14:%.*]] = call @llvm.vp.icmp.nxv4i32( %vp.op.load, shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer), metadata !"slt", shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 %5) ; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], zeroinitializer ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] ; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]] @@ -166,6 +166,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] + ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll new file mode 100644 index 0000000000000..dc96d7921356a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll @@ -0,0 +1,116 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s + +define void @vp_icmp(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; IF-EVL-NEXT: Live-in ir<%N> = original trip-count + +; IF-EVL: vector.ph: +; IF-EVL-NEXT: Successor(s): vector loop + +; IF-EVL: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> +; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ICMP:%.+]]> = call sgt llvm.vp.icmp(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[ZEXT:%.+]]> = zext ir<[[ICMP]]> to i32 +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ZEXT]]>, vp<[[EVL]]> +; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } + +entry: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %gep = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %gep, align 4 + %cmp4 = icmp sgt i32 %0, %1 + %conv5 = zext i1 %cmp4 to i32 + %arrayidx7 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %conv5, ptr %arrayidx7, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +define void @vp_fcmp(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; IF-EVL-NEXT: Live-in ir<%N> = original trip-count + +; IF-EVL: vector.ph: +; IF-EVL-NEXT: Successor(s): vector loop + +; IF-EVL: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> +; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[FCMP:%.+]]> = call ogt llvm.vp.fcmp(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>) +; IF-EVL-NEXT: WIDEN-CAST ir<[[UITOFP:%.+]]> = uitofp ir<[[FCMP]]> to float +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UITOFP]]>, vp<[[EVL]]> +; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } + +entry: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %b, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %gep = getelementptr inbounds float, ptr %c, i64 %iv + %1 = load float, ptr %gep, align 4 + %cmp4 = fcmp ogt float %0, %1 + %conv6 = uitofp i1 %cmp4 to float + %arrayidx8 = getelementptr inbounds float, ptr %a, i64 %iv + store float %conv6, ptr %arrayidx8, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll index 706b6f8882984..f1f7dbca0b10b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -32,7 +32,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> -; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ADD:%.+]]> = call nsw llvm.vp.add(ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index 6d6cfb5e9d18e..92a9cb10f63e2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -27,10 +27,10 @@ ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> - ; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]> - ; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = vp.sub ir<0>, ir<[[LD2]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CMP:%.+]]> = call sgt llvm.vp.icmp(ir<[[LD1]]>, ir<[[LD2]]>, vp<[[EVL]]>) + ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SUB:%.+]]> = call llvm.vp.sub(ir<0>, ir<[[LD2]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SELECT:%.+]]> = call llvm.vp.select(ir<[[CMP]]>, ir<%1>, ir<%2>, vp<[[EVL]]>) - ; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add vp<[[SELECT]]>, ir<[[LD1]]>, vp<[[EVL]]> + ; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ADD:%.+]]> = call llvm.vp.add(vp<[[SELECT]]>, ir<[[LD1]]>, vp<[[EVL]]>) ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>