Skip to content

[VPlan] Explicitly handle scalar pointer inductions. #83068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
f4dabdf
[VPlan] Update VPInst::onlyFirstLaneUsed to check users.
fhahn Jan 31, 2024
b08e892
[VPlan] Consistently use (Part, 0) for first lane scalar values
fhahn Jan 31, 2024
f56e217
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 3, 2024
172dbf6
!fixup fix merge
fhahn Feb 3, 2024
916a7d2
[VPlan] Explicitly handle scalar pointer inductions.
fhahn Jan 29, 2024
d2c51ec
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 6, 2024
82d74df
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 7, 2024
c6797e6
!fixup address latest comments, thanks!
fhahn Feb 7, 2024
53f2937
!fixup fix formatting
fhahn Feb 7, 2024
b0a78f6
Merge branch 'users/fhahn/vplan-uniform-scalar-lanes' into vplan-vect…
fhahn Feb 7, 2024
e6d2db8
!fixup Address latest comments, thanks!
fhahn Feb 7, 2024
5065331
!Fixup split generateInstruction into per-part and per lane.
fhahn Feb 7, 2024
f38d682
!fixup address comments in VPlanTransforms.cpp, thanks!
fhahn Feb 7, 2024
a166da5
Merge branch 'main' into users/fhahn/vplan-uniform-scalar-lanes
fhahn Feb 8, 2024
df9cad0
Merge branch 'users/fhahn/vplan-uniform-scalar-lanes' into vplan-vect…
fhahn Feb 8, 2024
ab14184
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Feb 26, 2024
133776f
!fixup fix things after update to main.
fhahn Feb 26, 2024
d8173fb
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 1, 2024
0bb9f5c
!fixup fix formatting.
fhahn Mar 1, 2024
3a698c0
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 7, 2024
1e41111
!fixup address latest comments, thanks!
fhahn Mar 7, 2024
8d05e99
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 11, 2024
6f4516f
fixup address comments.
fhahn Mar 11, 2024
5f4e4aa
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 11, 2024
c936a4e
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 18, 2024
a9df1d9
!fixup address latest comments, thanks!
fhahn Mar 18, 2024
9f68460
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 25, 2024
74cb095
!fixup address comments, thansk!
fhahn Mar 25, 2024
4211565
Merge remote-tracking branch 'origin/main' into vplan-vector-ptr-iv-t…
fhahn Mar 26, 2024
643969c
!fixup address latest comments, thanks!
fhahn Mar 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 2 additions & 33 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9137,42 +9137,11 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
"Not a pointer induction according to InductionDescriptor!");
assert(cast<PHINode>(getUnderlyingInstr())->getType()->isPointerTy() &&
"Unexpected type.");
assert(!onlyScalarsGenerated(State.VF.isScalable()) &&
"Recipe should have been replaced");

auto *IVR = getParent()->getPlan()->getCanonicalIV();
PHINode *CanonicalIV = cast<PHINode>(State.get(IVR, 0, /*IsScalar*/ true));

if (onlyScalarsGenerated(State.VF.isScalable())) {
// This is the normalized GEP that starts counting at zero.
Value *PtrInd = State.Builder.CreateSExtOrTrunc(
CanonicalIV, IndDesc.getStep()->getType());
// Determine the number of scalars we need to generate for each unroll
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
bool IsUniform = vputils::onlyFirstLaneUsed(this);
assert((IsUniform || !State.VF.isScalable()) &&
"Cannot scalarize a scalable VF");
unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();

for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *PartStart =
createStepForVF(State.Builder, PtrInd->getType(), State.VF, Part);

for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = State.Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);

Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
Value *SclrGep = emitTransformedIndex(
State.Builder, GlobalIdx, IndDesc.getStartValue(), Step,
IndDesc.getKind(), IndDesc.getInductionBinOp());
SclrGep->setName("next.gep");
State.set(this, SclrGep, VPIteration(Part, Lane));
}
}
return;
}

Type *PhiType = IndDesc.getStep()->getType();

// Build a pointer phi
Expand Down
7 changes: 2 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -860,11 +860,8 @@ void VPlan::execute(VPTransformState *State) {
Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
} else {
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
// TODO: Split off the case that all users of a pointer phi are scalar
// from the VPWidenPointerInductionRecipe.
if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
continue;

assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
"recipe generating only scalars should have been replaced");
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
Phi = cast<PHINode>(GEP->getPointerOperand());
}
Expand Down
32 changes: 22 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,10 @@ class VPInstruction : public VPRecipeWithIRFlags {
BranchOnCount,
BranchOnCond,
ComputeReductionResult,
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar valuse (either for the first lane only or
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

values ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks!

// for all lanes, depending on its uses).
PtrAdd,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth documenting somewhere what this VPInstruction/Opcode represents, including being scalar.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

};

private:
Expand All @@ -1164,11 +1168,19 @@ class VPInstruction : public VPRecipeWithIRFlags {
/// An optional name that can be used for the generated IR instruction.
const std::string Name;

/// Utility method serving execute(): generates a single instance of the
/// modeled instruction. \returns the generated value for \p Part.
/// In some cases an existing value is returned rather than a generated
/// Returns true if this VPInstruction generates scalar values only.
bool doesGenerateScalars() const;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add Only at the end, or at the beginning - onlyScalarsGenerated(), as in VPWidenPointerInductionRecipe above?

It's somewhat confusing, given VPInstructions such as ComputeReductionResult that also generate scalar(s) are excluded. See more below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to doesGeneratePerAllLanes as suggested below.


/// Utility methods serving execute(): generates a single instance of the
/// modeled instruction for a given part. \returns the generated value for \p
/// Part. In some cases an existing value is returned rather than a generated
/// one.
Value *generateInstruction(VPTransformState &State, unsigned Part);
Value *generatePerPart(VPTransformState &State, unsigned Part);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth updating documentation?
generateInstructionPerPart(), generateInstructionPerLane()??

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!


/// Utility methods serving execute(): generates a scalar single instance of
/// the modeled instruction for a given lane. \returns the scalar generated
/// value for lane \p Lane.
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);

#if !defined(NDEBUG)
/// Return true if the VPInstruction is a floating point math operation, i.e.
Expand Down Expand Up @@ -2488,12 +2500,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
/// for floating point inductions.
const FPMathOperator *FPBinOp;

VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
const FPMathOperator *FPBinOp, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
Kind(Kind), FPBinOp(FPBinOp) {}

public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Expand All @@ -2502,6 +2508,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
Start, CanonicalIV, Step) {}

VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind,
const FPMathOperator *FPBinOp, VPValue *Start,
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}),
Kind(Kind), FPBinOp(FPBinOp) {}

~VPDerivedIVRecipe() override = default;

VPRecipeBase *clone() override {
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
CachedTypes[OtherV] = ResTy;
return ResTy;
}
case VPInstruction::PtrAdd:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: worth asserting and caching the type of the other operand, i.e., join the above cases of ICmp and FOR Splice?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PtrAdd's operand have different types, with the first one being a pointer and the second one being an integer offset.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right, of course. Perhaps worth a comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

return inferScalarType(R->getOperand(0));
default:
break;
}
Expand Down
54 changes: 48 additions & 6 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::PtrAdd:
return false;
default:
return true;
Expand Down Expand Up @@ -273,8 +274,27 @@ VPInstruction::VPInstruction(unsigned Opcode,
assert(isFPMathOp() && "this op can't take fast-math flags");
}

Value *VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
bool VPInstruction::doesGenerateScalars() const {
return Opcode == VPInstruction::PtrAdd;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit confusing: PtrAdd is (currently) the only VPInstruction that may generate multiple scalars - per lane, or a single scalar - per lane zero only, but there are other VPInstructions that generate a single scalar - per part.

Perhaps doesGeneratePerLane() would be more accurate, or doesGeneratePerAllLanes()? See below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to use doesGeneratePerAllLanes.

}

Value *VPInstruction::generatePerLane(VPTransformState &State,
const VPIteration &Lane) {
IRBuilderBase &Builder = State.Builder;
Builder.SetCurrentDebugLocation(getDebugLoc());

switch (getOpcode()) {
case VPInstruction::PtrAdd: {
auto *P = Builder.CreatePtrAdd(State.get(getOperand(0), Lane),
State.get(getOperand(1), Lane), Name);
return P;
}
default:
llvm_unreachable("Unsupported opcode for instruction");
}
}

Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
IRBuilderBase &Builder = State.Builder;
Builder.SetCurrentDebugLocation(getDebugLoc());

Expand Down Expand Up @@ -352,7 +372,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State,
Value *Step =
createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
Value *Sub = Builder.CreateSub(ScalarTC, Step);
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
Value *Cmp =
Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: unrelated clang-format fix?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

undone, thanks!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still visible, missing a commit?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be undone now, need to be careful to avoid clang-format from undoing the change.

Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
return Builder.CreateSelect(Cmp, Sub, Zero);
}
Expand Down Expand Up @@ -515,15 +536,32 @@ void VPInstruction::execute(VPTransformState &State) {
if (hasFastMathFlags())
State.Builder.setFastMathFlags(getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *GeneratedValue = generateInstruction(State, Part);
bool OnlyFirstLaneDefined =
vputils::onlyFirstLaneUsed(this) ||
getOpcode() == VPInstruction::ComputeReductionResult;
if (doesGenerateScalars()) {
if (OnlyFirstLaneDefined) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps something like the following would be clearer:

    // First deal with generating multiple values - per all lanes.
    if (doesGeneratePerAllLanes()) {
      for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
           Lane != NumLanes; ++Lane) {
        Value *P = generatePerLane(State, VPIteration(Part, Lane));
        State.set(this, P, VPIteration(Part, Lane));
      }
      continue;
    }

    // Now deal with generating a single value - per first lane or per part.
    Value *GeneratedValue = DoesGeneratePerFirstLaneOnly ?
      generatePerLane(State, VPIteration(Part, 0)) :
      generatePerPart(State, Part);
    ...

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated roughly as suggested. also updated to check if only a scalar is needed for the first lane and a scalar can be generated (set to OnlyGenerateFirstLane).

This allows delegating to generatePerPart from generatePerLane, for cases not handled there (all except PtrAdd). This is a consequence of having ocpodes that may generate a vector or scalar per part, sharing codegen for both.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This delegation may raise confusion, worth an explanation, or a clearer alternative?

Value *P = generatePerLane(State, VPIteration(Part, 0));
State.set(this, P, Part, /*IsScalar*/ true);
continue;
}

for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
Lane != NumLanes; ++Lane) {
Value *P = generatePerLane(State, VPIteration(Part, Lane));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can assert P here, consistent with asserts of GeneratedValue below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added, thanks!

State.set(this, P, VPIteration(Part, Lane));
}
continue;
}

Value *GeneratedValue = generatePerPart(State, Part);
if (!hasResult())
continue;
assert(GeneratedValue && "generateInstruction must produce a value");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generatePerPart rather than generateInstruction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, thanks!


bool IsVector = GeneratedValue->getType()->isVectorTy();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Confusion above cont'd: if (doesGenerateScalars()) above took care of scalars, IsVector here is expected to be true. But for ComputeReductionResult, e.g..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to use OnlyGenerateFirstLane; in those cases a scalar must be returned and set.

State.set(this, GeneratedValue, Part, !IsVector);
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult ||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&
assert((IsVector || OnlyFirstLaneDefined || State.VF.isScalar()) &&
"scalar value but not only first lane used");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"scalar value but not only first lane used");
"scalar value but not only first lane defined");

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated, thanks!

}
}
Expand All @@ -537,6 +575,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
default:
return false;
case Instruction::ICmp:
case VPInstruction::PtrAdd:
// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
case VPInstruction::ActiveLaneMask:
Expand Down Expand Up @@ -594,6 +633,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
case VPInstruction::PtrAdd:
O << "ptradd";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down
51 changes: 44 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,15 +500,18 @@ static void removeDeadRecipes(VPlan &Plan) {
}
}

static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
static VPValue *createScalarIVSteps(VPlan &Plan,
InductionDescriptor::InductionKind Kind,
Instruction::BinaryOps InductionOpcode,
FPMathOperator *FPBinOp,
ScalarEvolution &SE, Instruction *TruncI,
VPValue *StartV, VPValue *Step,
VPBasicBlock::iterator IP) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
VPSingleDefRecipe *BaseIV = CanonicalIV;
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step);
if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step);
HeaderVPBB->insert(BaseIV, IP);
}

Expand Down Expand Up @@ -538,7 +541,9 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
VecPreheader->appendRecipe(Step->getDefiningRecipe());
}

VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step);
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
BaseIV, Step, InductionOpcode,
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
HeaderVPBB->insert(Steps, IP);
return Steps;
}
Expand All @@ -547,12 +552,42 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID,
/// provide them by building scalar steps off of the canonical scalar IV and
/// update the original IV's users. This is an optional optimization to reduce
/// the needs of vector extracts.
/// If all users of VPWidenPointerInductionRecipe only use its scalar values,
/// replace it with a PtrAdd (IndStart, ScalarIVSteps (0, Step)).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sounds more like a mandatory, functional legalization of induction recipes, rather than an optional, performance optimization - referring to the optimizeInductions() name.

Furthermore, this conceptually introduces two types or stages of the recipes - before and after legalization - which could be represented as two distinct recipes/opcodes, or by recording an indicator whether the recipe was legalized or not. Although these seem unneeded atm.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this is now required (similar to adjusting reductions). Should we move it to a separate transform or possibly clarify the name of the function?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clarifying the name of the function sounds fine to me.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated name and comments

static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
SmallVector<VPRecipeBase *> ToRemove;
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
// Replace wide pointer inductions which have only their scalars used by
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
continue;

const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
VPValue *StartV = Plan.getVPValueOrAddLiveIn(
ConstantInt::get(ID.getStep()->getType(), 0));
VPValue *StepV = PtrIV->getOperand(1);
VPRecipeBase *Steps =
createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction,
Instruction::Add, nullptr, SE, nullptr, StartV,
StepV, InsertPt)
->getDefiningRecipe();

auto *Recipe =
new VPInstruction(VPInstruction::PtrAdd,
{PtrIV->getStartValue(), Steps->getVPSingleValue()},
PtrIV->getDebugLoc(), "next.gep");

Recipe->insertAfter(Steps);
PtrIV->replaceAllUsesWith(Recipe);
continue;
}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps worth moving here some of the documentation above that described what happens next.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment, thanks!

// Replace widened induction with scalar steps for users that only use
// scalars.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good if these two cases that createScalarIVSteps for scalar users only, would share something.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think try to share createScalarIVSteps for both would make things more complicated, as they have a lot of different arguments. left as is for now.

auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!WideIV)
continue;
Expand All @@ -562,9 +597,11 @@ static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
continue;

const InductionDescriptor &ID = WideIV->getInductionDescriptor();
VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(),
WideIV->getStartValue(),
WideIV->getStepValue(), InsertPt);
VPValue *Steps = createScalarIVSteps(
Plan, ID.getKind(), ID.getInductionOpcode(),
dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE,
WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
InsertPt);

// Update scalar users of IV to use Step instead.
if (!HasOnlyVectorVFs)
Expand Down
Loading