5858#include " LoopVectorizationPlanner.h"
5959#include " VPRecipeBuilder.h"
6060#include " VPlanHCFGBuilder.h"
61+ #include " VPlanHCFGTransforms.h"
6162#include " llvm/ADT/APInt.h"
6263#include " llvm/ADT/ArrayRef.h"
6364#include " llvm/ADT/DenseMap.h"
@@ -234,7 +235,7 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
234235 cl::desc(" The maximum interleave count to use when interleaving a scalar "
235236 " reduction in a nested loop." ));
236237
237- static cl::opt<bool > EnableVPlanNativePath (
238+ cl::opt<bool > EnableVPlanNativePath (
238239 " enable-vplan-native-path" , cl::init(false ), cl::Hidden,
239240 cl::desc(" Enable VPlan-native vectorization path with "
240241 " support for outer loop vectorization." ));
@@ -419,6 +420,9 @@ class InnerLoopVectorizer {
419420 // / the instruction.
420421 void setDebugLocFromInst (IRBuilder<> &B, const Value *Ptr);
421422
423+ // / Fix the non-induction PHIs in the OrigPHIsToFix vector.
424+ void fixNonInductionPHIs (void );
425+
422426protected:
423427 friend class LoopVectorizationPlanner ;
424428
@@ -686,6 +690,10 @@ class InnerLoopVectorizer {
686690 // Holds the end values for each induction variable. We save the end values
687691 // so we can later fix-up the external users of the induction variables.
688692 DenseMap<PHINode *, Value *> IVEndValues;
693+
694+ // Vector of original scalar PHIs whose corresponding widened PHIs need to be
695+ // fixed up at the end of vector code generation.
696+ SmallVector<PHINode *, 8 > OrigPHIsToFix;
689697};
690698
691699class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -888,6 +896,12 @@ class LoopVectorizationCostModel {
888896 // / vectorization factor \p VF.
889897 bool isProfitableToScalarize (Instruction *I, unsigned VF) const {
890898 assert (VF > 1 && " Profitable to scalarize relevant only for VF > 1." );
899+
900+ // Cost model is not run in the VPlan-native path - return conservative
901+ // result until this changes.
902+ if (EnableVPlanNativePath)
903+ return false ;
904+
891905 auto Scalars = InstsToScalarize.find (VF);
892906 assert (Scalars != InstsToScalarize.end () &&
893907 " VF not yet analyzed for scalarization profitability" );
@@ -898,6 +912,12 @@ class LoopVectorizationCostModel {
898912 bool isUniformAfterVectorization (Instruction *I, unsigned VF) const {
899913 if (VF == 1 )
900914 return true ;
915+
916+ // Cost model is not run in the VPlan-native path - return conservative
917+ // result until this changes.
918+ if (EnableVPlanNativePath)
919+ return false ;
920+
901921 auto UniformsPerVF = Uniforms.find (VF);
902922 assert (UniformsPerVF != Uniforms.end () &&
903923 " VF not yet analyzed for uniformity" );
@@ -908,6 +928,12 @@ class LoopVectorizationCostModel {
908928 bool isScalarAfterVectorization (Instruction *I, unsigned VF) const {
909929 if (VF == 1 )
910930 return true ;
931+
932+ // Cost model is not run in the VPlan-native path - return conservative
933+ // result until this changes.
934+ if (EnableVPlanNativePath)
935+ return false ;
936+
911937 auto ScalarsPerVF = Scalars.find (VF);
912938 assert (ScalarsPerVF != Scalars.end () &&
913939 " Scalar values are not calculated for VF" );
@@ -962,6 +988,12 @@ class LoopVectorizationCostModel {
962988 // / through the cost modeling.
963989 InstWidening getWideningDecision (Instruction *I, unsigned VF) {
964990 assert (VF >= 2 && " Expected VF >=2" );
991+
992+ // Cost model is not run in the VPlan-native path - return conservative
993+ // result until this changes.
994+ if (EnableVPlanNativePath)
995+ return CM_GatherScatter;
996+
965997 std::pair<Instruction *, unsigned > InstOnVF = std::make_pair (I, VF);
966998 auto Itr = WideningDecisions.find (InstOnVF);
967999 if (Itr == WideningDecisions.end ())
@@ -1397,8 +1429,16 @@ struct LoopVectorize : public FunctionPass {
13971429 AU.addRequired <LoopAccessLegacyAnalysis>();
13981430 AU.addRequired <DemandedBitsWrapperPass>();
13991431 AU.addRequired <OptimizationRemarkEmitterWrapperPass>();
1400- AU.addPreserved <LoopInfoWrapperPass>();
1401- AU.addPreserved <DominatorTreeWrapperPass>();
1432+
1433+ // We currently do not preserve loopinfo/dominator analyses with outer loop
1434+ // vectorization. Until this is addressed, mark these analyses as preserved
1435+ // only for non-VPlan-native path.
1436+ // TODO: Preserve Loop and Dominator analyses for VPlan-native path.
1437+ if (!EnableVPlanNativePath) {
1438+ AU.addPreserved <LoopInfoWrapperPass>();
1439+ AU.addPreserved <DominatorTreeWrapperPass>();
1440+ }
1441+
14021442 AU.addPreserved <BasicAAWrapperPass>();
14031443 AU.addPreserved <GlobalsAAWrapperPass>();
14041444 }
@@ -1749,8 +1789,9 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
17491789 assert (!V->getType ()->isVectorTy () && " Can't widen a vector" );
17501790 assert (!V->getType ()->isVoidTy () && " Type does not produce a value" );
17511791
1752- // If we have a stride that is replaced by one, do it here.
1753- if (Legal->hasStride (V))
1792+ // If we have a stride that is replaced by one, do it here. Defer this for
1793+ // the VPlan-native path until we start running Legal checks in that path.
1794+ if (!EnableVPlanNativePath && Legal->hasStride (V))
17541795 V = ConstantInt::get (V->getType (), 1 );
17551796
17561797 // If we have a vector mapped to this value, return it.
@@ -2416,6 +2457,10 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
24162457}
24172458
24182459void InnerLoopVectorizer::emitMemRuntimeChecks (Loop *L, BasicBlock *Bypass) {
2460+ // VPlan-native path does not do any analysis for runtime checks currently.
2461+ if (EnableVPlanNativePath)
2462+ return ;
2463+
24192464 BasicBlock *BB = L->getLoopPreheader ();
24202465
24212466 // Generate the code that checks in runtime if arrays overlap. We put the
@@ -3060,6 +3105,13 @@ void InnerLoopVectorizer::fixVectorizedLoop() {
30603105 if (VF > 1 )
30613106 truncateToMinimalBitwidths ();
30623107
3108+ // Fix widened non-induction PHIs by setting up the PHI operands.
3109+ if (OrigPHIsToFix.size ()) {
3110+ assert (EnableVPlanNativePath &&
3111+ " Unexpected non-induction PHIs for fixup in non VPlan-native path" );
3112+ fixNonInductionPHIs ();
3113+ }
3114+
30633115 // At this point every instruction in the original loop is widened to a
30643116 // vector form. Now we need to fix the recurrences in the loop. These PHI
30653117 // nodes are currently empty because we did not want to introduce cycles.
@@ -3532,12 +3584,62 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
35323584 } while (Changed);
35333585}
35343586
3587+ void InnerLoopVectorizer::fixNonInductionPHIs () {
3588+ for (PHINode *OrigPhi : OrigPHIsToFix) {
3589+ PHINode *NewPhi =
3590+ cast<PHINode>(VectorLoopValueMap.getVectorValue (OrigPhi, 0 ));
3591+ unsigned NumIncomingValues = OrigPhi->getNumIncomingValues ();
3592+
3593+ SmallVector<BasicBlock *, 2 > ScalarBBPredecessors (
3594+ predecessors (OrigPhi->getParent ()));
3595+ SmallVector<BasicBlock *, 2 > VectorBBPredecessors (
3596+ predecessors (NewPhi->getParent ()));
3597+ assert (ScalarBBPredecessors.size () == VectorBBPredecessors.size () &&
3598+ " Scalar and Vector BB should have the same number of predecessors" );
3599+
3600+ // The insertion point in Builder may be invalidated by the time we get
3601+ // here. Force the Builder insertion point to something valid so that we do
3602+ // not run into issues during insertion point restore in
3603+ // getOrCreateVectorValue calls below.
3604+ Builder.SetInsertPoint (NewPhi);
3605+
3606+ // The predecessor order is preserved and we can rely on mapping between
3607+ // scalar and vector block predecessors.
3608+ for (unsigned i = 0 ; i < NumIncomingValues; ++i) {
3609+ BasicBlock *NewPredBB = VectorBBPredecessors[i];
3610+
3611+ // When looking up the new scalar/vector values to fix up, use incoming
3612+ // values from original phi.
3613+ Value *ScIncV =
3614+ OrigPhi->getIncomingValueForBlock (ScalarBBPredecessors[i]);
3615+
3616+ // Scalar incoming value may need a broadcast
3617+ Value *NewIncV = getOrCreateVectorValue (ScIncV, 0 );
3618+ NewPhi->addIncoming (NewIncV, NewPredBB);
3619+ }
3620+ }
3621+ }
3622+
35353623void InnerLoopVectorizer::widenPHIInstruction (Instruction *PN, unsigned UF,
35363624 unsigned VF) {
3625+ PHINode *P = cast<PHINode>(PN);
3626+ if (EnableVPlanNativePath) {
3627+ // Currently we enter here in the VPlan-native path for non-induction
3628+ // PHIs where all control flow is uniform. We simply widen these PHIs.
3629+ // Create a vector phi with no operands - the vector phi operands will be
3630+ // set at the end of vector code generation.
3631+ Type *VecTy =
3632+ (VF == 1 ) ? PN->getType () : VectorType::get (PN->getType (), VF);
3633+ Value *VecPhi = Builder.CreatePHI (VecTy, PN->getNumOperands (), " vec.phi" );
3634+ VectorLoopValueMap.setVectorValue (P, 0 , VecPhi);
3635+ OrigPHIsToFix.push_back (P);
3636+
3637+ return ;
3638+ }
3639+
35373640 assert (PN->getParent () == OrigLoop->getHeader () &&
35383641 " Non-header phis should have been handled elsewhere" );
35393642
3540- PHINode *P = cast<PHINode>(PN);
35413643 // In order to support recurrences we need to be able to vectorize Phi nodes.
35423644 // Phi nodes have cycles, so we need to vectorize them in two stages. This is
35433645 // stage #1: We create a new vector PHI node with no incoming edges. We'll use
@@ -3893,6 +3995,10 @@ void InnerLoopVectorizer::updateAnalysis() {
38933995 // Forget the original basic block.
38943996 PSE.getSE ()->forgetLoop (OrigLoop);
38953997
3998+ // DT is not kept up-to-date for outer loop vectorization
3999+ if (EnableVPlanNativePath)
4000+ return ;
4001+
38964002 // Update the dominator tree information.
38974003 assert (DT->properlyDominates (LoopBypassBlocks.front (), LoopExitBlock) &&
38984004 " Entry does not dominate exit." );
@@ -6527,6 +6633,13 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
65276633 VPlanHCFGBuilder HCFGBuilder (OrigLoop, LI, *Plan);
65286634 HCFGBuilder.buildHierarchicalCFG ();
65296635
6636+ SmallPtrSet<Instruction *, 1 > DeadInstructions;
6637+ VPlanHCFGTransforms::VPInstructionsToVPRecipes (
6638+ Plan, Legal->getInductionVars (), DeadInstructions);
6639+
6640+ for (unsigned VF = Range.Start ; VF < Range.End ; VF *= 2 )
6641+ Plan->addVF (VF);
6642+
65306643 return Plan;
65316644}
65326645
@@ -6728,11 +6841,26 @@ static bool processLoopInVPlanNativePath(
67286841 Hints.getForce () != LoopVectorizeHints::FK_Enabled && F->optForSize ();
67296842
67306843 // Plan how to best vectorize, return the best VF and its cost.
6731- LVP.planInVPlanNativePath (OptForSize, UserVF);
6844+ VectorizationFactor VF = LVP.planInVPlanNativePath (OptForSize, UserVF);
67326845
6733- // Returning false. We are currently not generating vector code in the VPlan
6734- // native path.
6735- return false ;
6846+ // If we are stress testing VPlan builds, do not attempt to generate vector
6847+ // code.
6848+ if (VPlanBuildStressTest)
6849+ return false ;
6850+
6851+ LVP.setBestPlan (VF.Width , 1 );
6852+
6853+ InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, UserVF, 1 , LVL,
6854+ &CM);
6855+ LLVM_DEBUG (dbgs () << " Vectorizing outer loop in \" "
6856+ << L->getHeader ()->getParent ()->getName () << " \"\n " );
6857+ LVP.executePlan (LB, DT);
6858+
6859+ // Mark the loop as already vectorized to avoid vectorizing again.
6860+ Hints.setAlreadyVectorized ();
6861+
6862+ LLVM_DEBUG (verifyFunction (*L->getHeader ()->getParent ()));
6863+ return true ;
67366864}
67376865
67386866bool LoopVectorizePass::processLoop (Loop *L) {
@@ -7123,8 +7251,15 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
71237251 if (!Changed)
71247252 return PreservedAnalyses::all ();
71257253 PreservedAnalyses PA;
7126- PA.preserve <LoopAnalysis>();
7127- PA.preserve <DominatorTreeAnalysis>();
7254+
7255+ // We currently do not preserve loopinfo/dominator analyses with outer loop
7256+ // vectorization. Until this is addressed, mark these analyses as preserved
7257+ // only for non-VPlan-native path.
7258+ // TODO: Preserve Loop and Dominator analyses for VPlan-native path.
7259+ if (!EnableVPlanNativePath) {
7260+ PA.preserve <LoopAnalysis>();
7261+ PA.preserve <DominatorTreeAnalysis>();
7262+ }
71287263 PA.preserve <BasicAA>();
71297264 PA.preserve <GlobalsAA>();
71307265 return PA;
0 commit comments