@@ -10206,111 +10206,93 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1020610206
1020710207 bool DisableRuntimeUnroll = false ;
1020810208 MDNode *OrigLoopID = L->getLoopID ();
10209- {
10210- using namespace ore ;
10211- if (!VectorizeLoop) {
10212- assert (IC > 1 && " interleave count should not be 1 or 0" );
10213- // If we decided that it is not legal to vectorize the loop, then
10214- // interleave it.
10215- VPlan &BestPlan = LVP.getPlanFor (VF.Width );
10216- InnerLoopVectorizer Unroller (L, PSE, LI, DT, TTI, AC,
10217- ElementCount::getFixed (1 ), IC, &CM, BFI, PSI,
10218- Checks, BestPlan);
10219-
10220- // TODO: Move to general VPlan pipeline once epilogue loops are also
10221- // supported.
10222- VPlanTransforms::runPass (
10223- VPlanTransforms::materializeConstantVectorTripCount, BestPlan,
10224- VF.Width , IC, PSE);
10225- LVP.addMinimumIterationCheck (BestPlan, VF.Width , IC,
10226- VF.MinProfitableTripCount );
10227- LVP.executePlan (VF.Width , IC, BestPlan, Unroller, DT, false );
10209+ // If we decided that it is *legal* to interleave or vectorize the loop, then
10210+ // do it.
1022810211
10229- ORE->emit ([&]() {
10230- return OptimizationRemark (LV_NAME, " Interleaved" , L->getStartLoc (),
10231- L->getHeader ())
10232- << " interleaved loop (interleaved count: "
10233- << NV (" InterleaveCount" , IC) << " )" ;
10234- });
10235- } else {
10236- // If we decided that it is *legal* to vectorize the loop, then do it.
10237-
10238- VPlan &BestPlan = LVP.getPlanFor (VF.Width );
10239- // Consider vectorizing the epilogue too if it's profitable.
10240- VectorizationFactor EpilogueVF =
10241- LVP.selectEpilogueVectorizationFactor (VF.Width , IC);
10242- if (EpilogueVF.Width .isVector ()) {
10243- std::unique_ptr<VPlan> BestMainPlan (BestPlan.duplicate ());
10244-
10245- // The first pass vectorizes the main loop and creates a scalar epilogue
10246- // to be vectorized by executing the plan (potentially with a different
10247- // factor) again shortly afterwards.
10248- VPlan &BestEpiPlan = LVP.getPlanFor (EpilogueVF.Width );
10249- BestEpiPlan.getMiddleBlock ()->setName (" vec.epilog.middle.block" );
10250- preparePlanForMainVectorLoop (*BestMainPlan, BestEpiPlan);
10251- EpilogueLoopVectorizationInfo EPI (VF.Width , IC, EpilogueVF.Width , 1 ,
10252- BestEpiPlan);
10253- EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TTI, AC, EPI, &CM,
10254- BFI, PSI, Checks, *BestMainPlan);
10255- auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10256- *BestMainPlan, MainILV, DT, false );
10257- ++LoopsVectorized;
10258-
10259- // Second pass vectorizes the epilogue and adjusts the control flow
10260- // edges from the first pass.
10261- EpilogueVectorizerEpilogueLoop EpilogILV (
10262- L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI, PSI, Checks, BestEpiPlan);
10263- EpilogILV.setTripCount (MainILV.getTripCount ());
10264- preparePlanForEpilogueVectorLoop (BestEpiPlan, L, ExpandedSCEVs, EPI);
10265-
10266- LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV,
10267- DT, true );
10268-
10269- // Fix induction resume values from the additional bypass block.
10270- BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock ();
10271- IRBuilder<> BypassBuilder (BypassBlock,
10272- BypassBlock->getFirstInsertionPt ());
10273- BasicBlock *PH = L->getLoopPreheader ();
10274- for (const auto &[IVPhi, II] : LVL.getInductionVars ()) {
10275- auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10276- Value *V = createInductionAdditionalBypassValues (
10277- IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount ,
10278- LVL.getPrimaryInduction ());
10279- // TODO: Directly add as extra operand to the VPResumePHI recipe.
10280- Inc->setIncomingValueForBlock (BypassBlock, V);
10281- }
10282- ++LoopsEpilogueVectorized;
10212+ VPlan &BestPlan = LVP.getPlanFor (VF.Width );
10213+ // Consider vectorizing the epilogue too if it's profitable.
10214+ VectorizationFactor EpilogueVF =
10215+ LVP.selectEpilogueVectorizationFactor (VF.Width , IC);
10216+ if (EpilogueVF.Width .isVector ()) {
10217+ std::unique_ptr<VPlan> BestMainPlan (BestPlan.duplicate ());
10218+
10219+ // The first pass vectorizes the main loop and creates a scalar epilogue
10220+ // to be vectorized by executing the plan (potentially with a different
10221+ // factor) again shortly afterwards.
10222+ VPlan &BestEpiPlan = LVP.getPlanFor (EpilogueVF.Width );
10223+ BestEpiPlan.getMiddleBlock ()->setName (" vec.epilog.middle.block" );
10224+ preparePlanForMainVectorLoop (*BestMainPlan, BestEpiPlan);
10225+ EpilogueLoopVectorizationInfo EPI (VF.Width , IC, EpilogueVF.Width , 1 ,
10226+ BestEpiPlan);
10227+ EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI,
10228+ PSI, Checks, *BestMainPlan);
10229+ auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
10230+ *BestMainPlan, MainILV, DT, false );
10231+ ++LoopsVectorized;
10232+
10233+ // Second pass vectorizes the epilogue and adjusts the control flow
10234+ // edges from the first pass.
10235+ EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TTI, AC, EPI, &CM,
10236+ BFI, PSI, Checks, BestEpiPlan);
10237+ EpilogILV.setTripCount (MainILV.getTripCount ());
10238+ preparePlanForEpilogueVectorLoop (BestEpiPlan, L, ExpandedSCEVs, EPI);
10239+
10240+ LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV, DT,
10241+ true );
10242+
10243+ // Fix induction resume values from the additional bypass block.
10244+ BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock ();
10245+ IRBuilder<> BypassBuilder (BypassBlock, BypassBlock->getFirstInsertionPt ());
10246+ BasicBlock *PH = L->getLoopPreheader ();
10247+ for (const auto &[IVPhi, II] : LVL.getInductionVars ()) {
10248+ auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock (PH));
10249+ Value *V = createInductionAdditionalBypassValues (
10250+ IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount ,
10251+ LVL.getPrimaryInduction ());
10252+ // TODO: Directly add as extra operand to the VPResumePHI recipe.
10253+ Inc->setIncomingValueForBlock (BypassBlock, V);
10254+ }
10255+ ++LoopsEpilogueVectorized;
10256+
10257+ if (!Checks.hasChecks ())
10258+ DisableRuntimeUnroll = true ;
10259+ } else {
10260+ InnerLoopVectorizer LB (L, PSE, LI, DT, TTI, AC, VF.Width , IC, &CM, BFI, PSI,
10261+ Checks, BestPlan);
10262+ // TODO: Move to general VPlan pipeline once epilogue loops are also
10263+ // supported.
10264+ VPlanTransforms::runPass (
10265+ VPlanTransforms::materializeConstantVectorTripCount, BestPlan, VF.Width ,
10266+ IC, PSE);
10267+ LVP.addMinimumIterationCheck (BestPlan, VF.Width , IC,
10268+ VF.MinProfitableTripCount );
1028310269
10284- if (!Checks.hasChecks ())
10285- DisableRuntimeUnroll = true ;
10286- } else {
10287- InnerLoopVectorizer LB (L, PSE, LI, DT, TTI, AC, VF.Width , IC, &CM, BFI,
10288- PSI, Checks, BestPlan);
10289- // TODO: Move to general VPlan pipeline once epilogue loops are also
10290- // supported.
10291- VPlanTransforms::runPass (
10292- VPlanTransforms::materializeConstantVectorTripCount, BestPlan,
10293- VF.Width , IC, PSE);
10294- LVP.addMinimumIterationCheck (BestPlan, VF.Width , IC,
10295- VF.MinProfitableTripCount );
10296-
10297- LVP.executePlan (VF.Width , IC, BestPlan, LB, DT, false );
10298- ++LoopsVectorized;
10299-
10300- // Add metadata to disable runtime unrolling a scalar loop when there
10301- // are no runtime checks about strides and memory. A scalar loop that is
10302- // rarely used is not worth unrolling.
10303- if (!Checks.hasChecks ())
10304- DisableRuntimeUnroll = true ;
10305- }
10306- // Report the vectorization decision.
10307- reportVectorization (ORE, L, VF, IC);
10308- }
10270+ LVP.executePlan (VF.Width , IC, BestPlan, LB, DT, false );
10271+ ++LoopsVectorized;
1030910272
10310- if (ORE->allowExtraAnalysis (LV_NAME))
10311- checkMixedPrecision (L, ORE);
10273+ // Add metadata to disable runtime unrolling a scalar loop when there
10274+ // are no runtime checks about strides and memory. A scalar loop that is
10275+ // rarely used is not worth unrolling.
10276+ if (!Checks.hasChecks () && !VF.Width .isScalar ())
10277+ DisableRuntimeUnroll = true ;
10278+ }
10279+ if (VF.Width .isScalar ()) {
10280+ using namespace ore ;
10281+ assert (IC > 1 );
10282+ ORE->emit ([&]() {
10283+ return OptimizationRemark (LV_NAME, " Interleaved" , L->getStartLoc (),
10284+ L->getHeader ())
10285+ << " interleaved loop (interleaved count: "
10286+ << NV (" InterleaveCount" , IC) << " )" ;
10287+ });
10288+ } else {
10289+ // Report the vectorization decision.
10290+ reportVectorization (ORE, L, VF, IC);
1031210291 }
1031310292
10293+ if (ORE->allowExtraAnalysis (LV_NAME))
10294+ checkMixedPrecision (L, ORE);
10295+
1031410296 assert (DT->verify (DominatorTree::VerificationLevel::Fast) &&
1031510297 " DT not preserved correctly" );
1031610298
0 commit comments