@@ -1290,8 +1290,8 @@ class LoopVectorizationCostModel {
12901290 if (VF.isScalar () || Uniforms.contains (VF))
12911291 return ;
12921292 setCostBasedWideningDecision (VF);
1293- setVectorizedCallDecision (VF);
12941293 collectLoopUniforms (VF);
1294+ setVectorizedCallDecision (VF);
12951295 collectLoopScalars (VF);
12961296 }
12971297
@@ -6194,6 +6194,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
61946194 assert (!VF.isScalar () &&
61956195 " Trying to set a vectorization decision for a scalar VF" );
61966196
6197+ auto ForcedScalar = ForcedScalars.find (VF);
61976198 for (BasicBlock *BB : TheLoop->blocks ()) {
61986199 // For each instruction in the old loop.
61996200 for (Instruction &I : *BB) {
@@ -6206,14 +6207,37 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
62066207 InstructionCost VectorCost = InstructionCost::getInvalid ();
62076208 InstructionCost IntrinsicCost = InstructionCost::getInvalid ();
62086209 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
6209-
62106210 Function *ScalarFunc = CI->getCalledFunction ();
62116211 Type *ScalarRetTy = CI->getType ();
62126212 SmallVector<Type *, 4 > Tys, ScalarTys;
6213- bool MaskRequired = Legal->isMaskRequired (CI);
62146213 for (auto &ArgOp : CI->args ())
62156214 ScalarTys.push_back (ArgOp->getType ());
62166215
6216+ // Estimate cost of scalarized vector call. The source operands are
6217+ // assumed to be vectors, so we need to extract individual elements from
6218+ // there, execute VF scalar calls, and then gather the result into the
6219+ // vector return value.
6220+ InstructionCost ScalarCallCost =
6221+ TTI.getCallInstrCost (ScalarFunc, ScalarRetTy, ScalarTys, CostKind);
6222+
6223+ // Compute costs of unpacking argument values for the scalar calls and
6224+ // packing the return values to a vector.
6225+ InstructionCost ScalarizationCost =
6226+ getScalarizationOverhead (CI, VF, CostKind);
6227+
6228+ ScalarCost = ScalarCallCost * VF.getKnownMinValue () + ScalarizationCost;
6229+ // Honor ForcedScalars decision.
6230+ // TODO: For calls, it might still be more profitable to widen. Use
6231+ // VPlan-based cost model to compare different options.
6232+ if (VF.isVector () && ForcedScalar != ForcedScalars.end () &&
6233+ ForcedScalar->second .contains (CI)) {
6234+ setCallWideningDecision (CI, VF, CM_Scalarize, nullptr ,
6235+ Intrinsic::not_intrinsic, std::nullopt ,
6236+ ScalarCost);
6237+ continue ;
6238+ }
6239+
6240+ bool MaskRequired = Legal->isMaskRequired (CI);
62176241 // Compute corresponding vector type for return value and arguments.
62186242 Type *RetTy = ToVectorTy (ScalarRetTy, VF);
62196243 for (Type *ScalarTy : ScalarTys)
@@ -6229,20 +6253,6 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
62296253 continue ;
62306254 }
62316255
6232- // Estimate cost of scalarized vector call. The source operands are
6233- // assumed to be vectors, so we need to extract individual elements from
6234- // there, execute VF scalar calls, and then gather the result into the
6235- // vector return value.
6236- InstructionCost ScalarCallCost =
6237- TTI.getCallInstrCost (ScalarFunc, ScalarRetTy, ScalarTys, CostKind);
6238-
6239- // Compute costs of unpacking argument values for the scalar calls and
6240- // packing the return values to a vector.
6241- InstructionCost ScalarizationCost =
6242- getScalarizationOverhead (CI, VF, CostKind);
6243-
6244- ScalarCost = ScalarCallCost * VF.getKnownMinValue () + ScalarizationCost;
6245-
62466256 // Find the cost of vectorizing the call, if we can find a suitable
62476257 // vector variant of the function.
62486258 bool UsesMask = false ;
0 commit comments