Skip to content

Commit f07dc6f

Browse files
committed
[LV] Remove special handling for interlaving only. (NFC)
Remove the special code for handling interleaving only, as it will be handled naturally by the generic code handling arbitrary IC & VF.
1 parent cdbef27 commit f07dc6f

File tree

1 file changed

+82
-100
lines changed

1 file changed

+82
-100
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 82 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -10206,111 +10206,93 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1020610206

1020710207
bool DisableRuntimeUnroll = false;
1020810208
MDNode *OrigLoopID = L->getLoopID();
10209-
{
10210-
using namespace ore;
10211-
if (!VectorizeLoop) {
10212-
assert(IC > 1 && "interleave count should not be 1 or 0");
10213-
// If we decided that it is not legal to vectorize the loop, then
10214-
// interleave it.
10215-
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
10216-
InnerLoopVectorizer Unroller(L, PSE, LI, DT, TTI, AC,
10217-
ElementCount::getFixed(1), IC, &CM, BFI, PSI,
10218-
Checks, BestPlan);
10219-
10220-
// TODO: Move to general VPlan pipeline once epilogue loops are also
10221-
// supported.
10222-
VPlanTransforms::runPass(
10223-
VPlanTransforms::materializeConstantVectorTripCount, BestPlan,
10224-
VF.Width, IC, PSE);
10225-
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
10226-
VF.MinProfitableTripCount);
10227-
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
10209+
// If we decided that it is *legal* to interleave or vectorize the loop, then
10210+
// do it.
1022810211

10229-
ORE->emit([&]() {
10230-
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
10231-
L->getHeader())
10232-
<< "interleaved loop (interleaved count: "
10233-
<< NV("InterleaveCount", IC) << ")";
10234-
});
10235-
} else {
10236-
// If we decided that it is *legal* to vectorize the loop, then do it.
10237-
10238-
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
10239-
// Consider vectorizing the epilogue too if it's profitable.
10240-
VectorizationFactor EpilogueVF =
10241-
LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
10242-
if (EpilogueVF.Width.isVector()) {
10243-
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
10244-
10245-
// The first pass vectorizes the main loop and creates a scalar epilogue
10246-
// to be vectorized by executing the plan (potentially with a different
10247-
// factor) again shortly afterwards.
10248-
VPlan &BestEpiPlan = LVP.getPlanFor(EpilogueVF.Width);
10249-
BestEpiPlan.getMiddleBlock()->setName("vec.epilog.middle.block");
10250-
preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
10251-
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
10252-
BestEpiPlan);
10253-
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
10254-
BFI, PSI, Checks, *BestMainPlan);
10255-
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
10256-
*BestMainPlan, MainILV, DT, false);
10257-
++LoopsVectorized;
10258-
10259-
// Second pass vectorizes the epilogue and adjusts the control flow
10260-
// edges from the first pass.
10261-
EpilogueVectorizerEpilogueLoop EpilogILV(
10262-
L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI, PSI, Checks, BestEpiPlan);
10263-
EpilogILV.setTripCount(MainILV.getTripCount());
10264-
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
10265-
10266-
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
10267-
DT, true);
10268-
10269-
// Fix induction resume values from the additional bypass block.
10270-
BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock();
10271-
IRBuilder<> BypassBuilder(BypassBlock,
10272-
BypassBlock->getFirstInsertionPt());
10273-
BasicBlock *PH = L->getLoopPreheader();
10274-
for (const auto &[IVPhi, II] : LVL.getInductionVars()) {
10275-
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10276-
Value *V = createInductionAdditionalBypassValues(
10277-
IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount,
10278-
LVL.getPrimaryInduction());
10279-
// TODO: Directly add as extra operand to the VPResumePHI recipe.
10280-
Inc->setIncomingValueForBlock(BypassBlock, V);
10281-
}
10282-
++LoopsEpilogueVectorized;
10212+
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
10213+
// Consider vectorizing the epilogue too if it's profitable.
10214+
VectorizationFactor EpilogueVF =
10215+
LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
10216+
if (EpilogueVF.Width.isVector()) {
10217+
std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
10218+
10219+
// The first pass vectorizes the main loop and creates a scalar epilogue
10220+
// to be vectorized by executing the plan (potentially with a different
10221+
// factor) again shortly afterwards.
10222+
VPlan &BestEpiPlan = LVP.getPlanFor(EpilogueVF.Width);
10223+
BestEpiPlan.getMiddleBlock()->setName("vec.epilog.middle.block");
10224+
preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
10225+
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
10226+
BestEpiPlan);
10227+
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI,
10228+
PSI, Checks, *BestMainPlan);
10229+
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
10230+
*BestMainPlan, MainILV, DT, false);
10231+
++LoopsVectorized;
10232+
10233+
// Second pass vectorizes the epilogue and adjusts the control flow
10234+
// edges from the first pass.
10235+
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
10236+
BFI, PSI, Checks, BestEpiPlan);
10237+
EpilogILV.setTripCount(MainILV.getTripCount());
10238+
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
10239+
10240+
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT,
10241+
true);
10242+
10243+
// Fix induction resume values from the additional bypass block.
10244+
BasicBlock *BypassBlock = EpilogILV.getAdditionalBypassBlock();
10245+
IRBuilder<> BypassBuilder(BypassBlock, BypassBlock->getFirstInsertionPt());
10246+
BasicBlock *PH = L->getLoopPreheader();
10247+
for (const auto &[IVPhi, II] : LVL.getInductionVars()) {
10248+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10249+
Value *V = createInductionAdditionalBypassValues(
10250+
IVPhi, II, BypassBuilder, ExpandedSCEVs, EPI.VectorTripCount,
10251+
LVL.getPrimaryInduction());
10252+
// TODO: Directly add as extra operand to the VPResumePHI recipe.
10253+
Inc->setIncomingValueForBlock(BypassBlock, V);
10254+
}
10255+
++LoopsEpilogueVectorized;
10256+
10257+
if (!Checks.hasChecks())
10258+
DisableRuntimeUnroll = true;
10259+
} else {
10260+
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, BFI, PSI,
10261+
Checks, BestPlan);
10262+
// TODO: Move to general VPlan pipeline once epilogue loops are also
10263+
// supported.
10264+
VPlanTransforms::runPass(
10265+
VPlanTransforms::materializeConstantVectorTripCount, BestPlan, VF.Width,
10266+
IC, PSE);
10267+
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
10268+
VF.MinProfitableTripCount);
1028310269

10284-
if (!Checks.hasChecks())
10285-
DisableRuntimeUnroll = true;
10286-
} else {
10287-
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, BFI,
10288-
PSI, Checks, BestPlan);
10289-
// TODO: Move to general VPlan pipeline once epilogue loops are also
10290-
// supported.
10291-
VPlanTransforms::runPass(
10292-
VPlanTransforms::materializeConstantVectorTripCount, BestPlan,
10293-
VF.Width, IC, PSE);
10294-
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
10295-
VF.MinProfitableTripCount);
10296-
10297-
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
10298-
++LoopsVectorized;
10299-
10300-
// Add metadata to disable runtime unrolling a scalar loop when there
10301-
// are no runtime checks about strides and memory. A scalar loop that is
10302-
// rarely used is not worth unrolling.
10303-
if (!Checks.hasChecks())
10304-
DisableRuntimeUnroll = true;
10305-
}
10306-
// Report the vectorization decision.
10307-
reportVectorization(ORE, L, VF, IC);
10308-
}
10270+
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
10271+
++LoopsVectorized;
1030910272

10310-
if (ORE->allowExtraAnalysis(LV_NAME))
10311-
checkMixedPrecision(L, ORE);
10273+
// Add metadata to disable runtime unrolling a scalar loop when there
10274+
// are no runtime checks about strides and memory. A scalar loop that is
10275+
// rarely used is not worth unrolling.
10276+
if (!Checks.hasChecks() && !VF.Width.isScalar())
10277+
DisableRuntimeUnroll = true;
10278+
}
10279+
if (VF.Width.isScalar()) {
10280+
using namespace ore;
10281+
assert(IC > 1);
10282+
ORE->emit([&]() {
10283+
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
10284+
L->getHeader())
10285+
<< "interleaved loop (interleaved count: "
10286+
<< NV("InterleaveCount", IC) << ")";
10287+
});
10288+
} else {
10289+
// Report the vectorization decision.
10290+
reportVectorization(ORE, L, VF, IC);
1031210291
}
1031310292

10293+
if (ORE->allowExtraAnalysis(LV_NAME))
10294+
checkMixedPrecision(L, ORE);
10295+
1031410296
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1031510297
"DT not preserved correctly");
1031610298

0 commit comments

Comments
 (0)