@@ -3063,17 +3063,42 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30633063 case Instruction::Call: {
30643064 auto *CalledFn =
30653065 cast<Function>(getOperand (getNumOperands () - 1 )->getLiveInIRValue ());
3066- if (CalledFn->isIntrinsic ())
3067- break ;
30683066
3067+ SmallVector<const VPValue *> ArgOps (drop_end (operands ()));
30693068 SmallVector<Type *, 4 > Tys;
3070- for (VPValue *ArgOp : drop_end ( operands ()) )
3069+ for (const VPValue *ArgOp : ArgOps )
30713070 Tys.push_back (Ctx.Types .inferScalarType (ArgOp));
3071+
3072+ if (CalledFn->isIntrinsic ())
3073+ // Various pseudo-intrinsics with costs of 0 are scalarized instead of
3074+ // vectorized via VPWidenIntrinsicRecipe. Return 0 for them early.
3075+ switch (CalledFn->getIntrinsicID ()) {
3076+ case Intrinsic::assume:
3077+ case Intrinsic::lifetime_end:
3078+ case Intrinsic::lifetime_start:
3079+ case Intrinsic::sideeffect:
3080+ case Intrinsic::pseudoprobe:
3081+ case Intrinsic::experimental_noalias_scope_decl: {
3082+ assert (getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3083+ ElementCount::getFixed (1 ), Ctx) == 0 &&
3084+ " scalarizing intrinsic should be free" );
3085+ return InstructionCost (0 );
3086+ }
3087+ default :
3088+ break ;
3089+ }
3090+
30723091 Type *ResultTy = Ctx.Types .inferScalarType (this );
30733092 InstructionCost ScalarCallCost =
30743093 Ctx.TTI .getCallInstrCost (CalledFn, ResultTy, Tys, Ctx.CostKind );
3075- if (isSingleScalar ())
3094+ if (isSingleScalar ()) {
3095+ if (CalledFn->isIntrinsic ())
3096+ ScalarCallCost = std::min (
3097+ ScalarCallCost,
3098+ getCostForIntrinsics (CalledFn->getIntrinsicID (), ArgOps, *this ,
3099+ ElementCount::getFixed (1 ), Ctx));
30763100 return ScalarCallCost;
3101+ }
30773102
30783103 if (VF.isScalable ())
30793104 return InstructionCost::getInvalid ();
@@ -3094,7 +3119,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
30943119 // incur any overhead.
30953120 SmallPtrSet<const VPValue *, 4 > UniqueOperands;
30963121 Tys.clear ();
3097- for (auto *Op : drop_end ( operands ()) ) {
3122+ for (auto *Op : ArgOps ) {
30983123 if (Op->isLiveIn () || isa<VPReplicateRecipe, VPPredInstPHIRecipe>(Op) ||
30993124 !UniqueOperands.insert (Op).second )
31003125 continue ;
@@ -3104,8 +3129,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,
31043129 Ctx.TTI .getOperandsScalarizationOverhead (Tys, Ctx.CostKind );
31053130 }
31063131
3107- return ScalarCallCost * (isSingleScalar () ? 1 : VF.getFixedValue ()) +
3108- ScalarizationCost;
3132+ return ScalarCallCost * VF.getFixedValue () + ScalarizationCost;
31093133 }
31103134 case Instruction::Add:
31113135 case Instruction::Sub:
0 commit comments