-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[VPlan] Explicitly handle scalar pointer inductions. #83068
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 21 commits
f4dabdf
b08e892
f56e217
172dbf6
916a7d2
d2c51ec
82d74df
c6797e6
53f2937
b0a78f6
e6d2db8
5065331
f38d682
a166da5
df9cad0
ab14184
133776f
d8173fb
0bb9f5c
3a698c0
1e41111
8d05e99
6f4516f
5f4e4aa
c936a4e
a9df1d9
9f68460
74cb095
4211565
643969c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1155,6 +1155,10 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
BranchOnCount, | ||
BranchOnCond, | ||
ComputeReductionResult, | ||
// Add an offset in bytes (second operand) to a base pointer (first | ||
// operand). Only generates scalar valuse (either for the first lane only or | ||
// for all lanes, depending on its uses). | ||
PtrAdd, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth documenting somewhere what this VPInstruction/Opcode represents, including being scalar. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
}; | ||
|
||
private: | ||
|
@@ -1164,11 +1168,19 @@ class VPInstruction : public VPRecipeWithIRFlags { | |
/// An optional name that can be used for the generated IR instruction. | ||
const std::string Name; | ||
|
||
/// Utility method serving execute(): generates a single instance of the | ||
/// modeled instruction. \returns the generated value for \p Part. | ||
/// In some cases an existing value is returned rather than a generated | ||
/// Returns true if this VPInstruction generates scalar values only. | ||
bool doesGenerateScalars() const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: add It's somewhat confusing, given VPInstructions such as ComputeReductionResult that also generate scalar(s) are excluded. See more below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to |
||
|
||
/// Utility methods serving execute(): generates a single instance of the | ||
/// modeled instruction for a given part. \returns the generated value for \p | ||
/// Part. In some cases an existing value is returned rather than a generated | ||
/// one. | ||
Value *generateInstruction(VPTransformState &State, unsigned Part); | ||
Value *generatePerPart(VPTransformState &State, unsigned Part); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth updating documentation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||
|
||
/// Utility methods serving execute(): generates a scalar single instance of | ||
/// the modeled instruction for a given lane. \returns the scalar generated | ||
/// value for lane \p Lane. | ||
Value *generatePerLane(VPTransformState &State, const VPIteration &Lane); | ||
|
||
#if !defined(NDEBUG) | ||
/// Return true if the VPInstruction is a floating point math operation, i.e. | ||
|
@@ -2488,12 +2500,6 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { | |
/// for floating point inductions. | ||
const FPMathOperator *FPBinOp; | ||
|
||
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, | ||
const FPMathOperator *FPBinOp, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), | ||
Kind(Kind), FPBinOp(FPBinOp) {} | ||
|
||
public: | ||
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
|
@@ -2502,6 +2508,12 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { | |
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()), | ||
Start, CanonicalIV, Step) {} | ||
|
||
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, | ||
const FPMathOperator *FPBinOp, VPValue *Start, | ||
VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) | ||
: VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), | ||
Kind(Kind), FPBinOp(FPBinOp) {} | ||
|
||
~VPDerivedIVRecipe() override = default; | ||
|
||
VPRecipeBase *clone() override { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { | |
CachedTypes[OtherV] = ResTy; | ||
return ResTy; | ||
} | ||
case VPInstruction::PtrAdd: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: worth asserting and caching the type of the other operand, i.e., join the above cases of ICmp and FOR Splice? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PtrAdd's operand have different types, with the first one being a pointer and the second one being an integer offset. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, right, of course. Perhaps worth a comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||
return inferScalarType(R->getOperand(0)); | ||
default: | ||
break; | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -127,6 +127,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { | |||||
case VPInstruction::Not: | ||||||
case VPInstruction::CalculateTripCountMinusVF: | ||||||
case VPInstruction::CanonicalIVIncrementForPart: | ||||||
case VPInstruction::PtrAdd: | ||||||
return false; | ||||||
default: | ||||||
return true; | ||||||
|
@@ -273,8 +274,27 @@ VPInstruction::VPInstruction(unsigned Opcode, | |||||
assert(isFPMathOp() && "this op can't take fast-math flags"); | ||||||
} | ||||||
|
||||||
Value *VPInstruction::generateInstruction(VPTransformState &State, | ||||||
unsigned Part) { | ||||||
bool VPInstruction::doesGenerateScalars() const { | ||||||
return Opcode == VPInstruction::PtrAdd; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit confusing: PtrAdd is (currently) the only VPInstruction that may generate multiple scalars - per lane, or a single scalar - per lane zero only, but there are other VPInstructions that generate a single scalar - per part. Perhaps There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to use |
||||||
} | ||||||
|
||||||
Value *VPInstruction::generatePerLane(VPTransformState &State, | ||||||
const VPIteration &Lane) { | ||||||
IRBuilderBase &Builder = State.Builder; | ||||||
Builder.SetCurrentDebugLocation(getDebugLoc()); | ||||||
|
||||||
switch (getOpcode()) { | ||||||
case VPInstruction::PtrAdd: { | ||||||
auto *P = Builder.CreatePtrAdd(State.get(getOperand(0), Lane), | ||||||
State.get(getOperand(1), Lane), Name); | ||||||
return P; | ||||||
} | ||||||
default: | ||||||
llvm_unreachable("Unsupported opcode for instruction"); | ||||||
} | ||||||
} | ||||||
|
||||||
Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { | ||||||
IRBuilderBase &Builder = State.Builder; | ||||||
Builder.SetCurrentDebugLocation(getDebugLoc()); | ||||||
|
||||||
|
@@ -352,7 +372,8 @@ Value *VPInstruction::generateInstruction(VPTransformState &State, | |||||
Value *Step = | ||||||
createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); | ||||||
Value *Sub = Builder.CreateSub(ScalarTC, Step); | ||||||
Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); | ||||||
Value *Cmp = | ||||||
Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: unrelated clang-format fix? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. undone, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. still visible, missing a commit? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be undone now, need to be careful to avoid clang-format from undoing the change. |
||||||
Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); | ||||||
return Builder.CreateSelect(Cmp, Sub, Zero); | ||||||
} | ||||||
|
@@ -515,15 +536,32 @@ void VPInstruction::execute(VPTransformState &State) { | |||||
if (hasFastMathFlags()) | ||||||
State.Builder.setFastMathFlags(getFastMathFlags()); | ||||||
for (unsigned Part = 0; Part < State.UF; ++Part) { | ||||||
Value *GeneratedValue = generateInstruction(State, Part); | ||||||
bool OnlyFirstLaneDefined = | ||||||
vputils::onlyFirstLaneUsed(this) || | ||||||
getOpcode() == VPInstruction::ComputeReductionResult; | ||||||
if (doesGenerateScalars()) { | ||||||
if (OnlyFirstLaneDefined) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps something like the following would be clearer:
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated roughly as suggested. also updated to check if only a scalar is needed for the first lane and a scalar can be generated (set to This allows delegating to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This delegation may raise confusion, worth an explanation, or a clearer alternative? |
||||||
Value *P = generatePerLane(State, VPIteration(Part, 0)); | ||||||
State.set(this, P, Part, /*IsScalar*/ true); | ||||||
continue; | ||||||
} | ||||||
|
||||||
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue(); | ||||||
Lane != NumLanes; ++Lane) { | ||||||
Value *P = generatePerLane(State, VPIteration(Part, Lane)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can assert P here, consistent with asserts of GeneratedValue below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||||||
State.set(this, P, VPIteration(Part, Lane)); | ||||||
} | ||||||
continue; | ||||||
} | ||||||
|
||||||
Value *GeneratedValue = generatePerPart(State, Part); | ||||||
if (!hasResult()) | ||||||
continue; | ||||||
assert(GeneratedValue && "generateInstruction must produce a value"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. generatePerPart rather than generateInstruction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed, thanks! |
||||||
|
||||||
bool IsVector = GeneratedValue->getType()->isVectorTy(); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Confusion above cont'd: if (doesGenerateScalars()) above took care of scalars, IsVector here is expected to be true. But for ComputeReductionResult, e.g.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to use |
||||||
State.set(this, GeneratedValue, Part, !IsVector); | ||||||
assert((IsVector || getOpcode() == VPInstruction::ComputeReductionResult || | ||||||
State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) && | ||||||
assert((IsVector || OnlyFirstLaneDefined || State.VF.isScalar()) && | ||||||
"scalar value but not only first lane used"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated, thanks! |
||||||
} | ||||||
} | ||||||
|
@@ -537,6 +575,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |||||
default: | ||||||
return false; | ||||||
case Instruction::ICmp: | ||||||
case VPInstruction::PtrAdd: | ||||||
// TODO: Cover additional opcodes. | ||||||
return vputils::onlyFirstLaneUsed(this); | ||||||
case VPInstruction::ActiveLaneMask: | ||||||
|
@@ -594,6 +633,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |||||
case VPInstruction::ComputeReductionResult: | ||||||
O << "compute-reduction-result"; | ||||||
break; | ||||||
case VPInstruction::PtrAdd: | ||||||
O << "ptradd"; | ||||||
break; | ||||||
default: | ||||||
O << Instruction::getOpcodeName(getOpcode()); | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -500,15 +500,18 @@ static void removeDeadRecipes(VPlan &Plan) { | |
} | ||
} | ||
|
||
static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | ||
static VPValue *createScalarIVSteps(VPlan &Plan, | ||
InductionDescriptor::InductionKind Kind, | ||
Instruction::BinaryOps InductionOpcode, | ||
FPMathOperator *FPBinOp, | ||
ScalarEvolution &SE, Instruction *TruncI, | ||
VPValue *StartV, VPValue *Step, | ||
VPBasicBlock::iterator IP) { | ||
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | ||
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); | ||
VPSingleDefRecipe *BaseIV = CanonicalIV; | ||
if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) { | ||
BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); | ||
if (!CanonicalIV->isCanonical(Kind, StartV, Step)) { | ||
BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step); | ||
HeaderVPBB->insert(BaseIV, IP); | ||
} | ||
|
||
|
@@ -538,7 +541,9 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | |
VecPreheader->appendRecipe(Step->getDefiningRecipe()); | ||
} | ||
|
||
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); | ||
VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe( | ||
BaseIV, Step, InductionOpcode, | ||
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()); | ||
HeaderVPBB->insert(Steps, IP); | ||
return Steps; | ||
} | ||
|
@@ -547,12 +552,42 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, | |
/// provide them by building scalar steps off of the canonical scalar IV and | ||
/// update the original IV's users. This is an optional optimization to reduce | ||
/// the needs of vector extracts. | ||
/// If all users of VPWidenPointerInductionRecipe only use its scalar values, | ||
/// replace it with a PtrAdd (IndStart, ScalarIVSteps (0, Step)). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sounds more like a mandatory, functional legalization of induction recipes, rather than an optional, performance optimization - referring to the Furthermore, this conceptually introduces two types or stages of the recipes - before and after legalization - which could be represented as two distinct recipes/opcodes, or by recording an indicator whether the recipe was legalized or not. Although these seem unneeded atm. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes this is now required (similar to adjusting reductions). Should we move it to a separate transform or possibly clarify the name of the function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clarifying the name of the function sounds fine to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated name and comments |
||
static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | ||
SmallVector<VPRecipeBase *> ToRemove; | ||
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); | ||
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1)); | ||
VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi(); | ||
for (VPRecipeBase &Phi : HeaderVPBB->phis()) { | ||
// Replace wide pointer inductions which have only their scalars used by | ||
// PtrAdd(IndStart, ScalarIVSteps (0, Step)). | ||
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) { | ||
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF())) | ||
continue; | ||
|
||
const InductionDescriptor &ID = PtrIV->getInductionDescriptor(); | ||
VPValue *StartV = Plan.getVPValueOrAddLiveIn( | ||
ConstantInt::get(ID.getStep()->getType(), 0)); | ||
VPValue *StepV = PtrIV->getOperand(1); | ||
VPRecipeBase *Steps = | ||
createScalarIVSteps(Plan, InductionDescriptor::IK_IntInduction, | ||
Instruction::Add, nullptr, SE, nullptr, StartV, | ||
StepV, InsertPt) | ||
->getDefiningRecipe(); | ||
|
||
auto *Recipe = | ||
new VPInstruction(VPInstruction::PtrAdd, | ||
{PtrIV->getStartValue(), Steps->getVPSingleValue()}, | ||
PtrIV->getDebugLoc(), "next.gep"); | ||
|
||
Recipe->insertAfter(Steps); | ||
PtrIV->replaceAllUsesWith(Recipe); | ||
continue; | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth moving here some of the documentation above that described what happens next. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! |
||
// Replace widened induction with scalar steps for users that only use | ||
// scalars. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be good if these two cases that createScalarIVSteps for scalar users only, would share something. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think try to share |
||
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi); | ||
if (!WideIV) | ||
continue; | ||
|
@@ -562,9 +597,11 @@ static void optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { | |
continue; | ||
|
||
const InductionDescriptor &ID = WideIV->getInductionDescriptor(); | ||
VPValue *Steps = createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(), | ||
WideIV->getStartValue(), | ||
WideIV->getStepValue(), InsertPt); | ||
VPValue *Steps = createScalarIVSteps( | ||
Plan, ID.getKind(), ID.getInductionOpcode(), | ||
dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE, | ||
WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(), | ||
InsertPt); | ||
|
||
// Update scalar users of IV to use Step instead. | ||
if (!HasOnlyVectorVFs) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
values ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed, thanks!