Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ enum class RecurKind {
FindLastIVUMax, ///< FindLast reduction with select(cmp(),x,y) where one of
///< (x,y) is increasing loop induction, and both x and y
///< are integer type, producing a UMax reduction.
FindLast, ///< FindLast reduction with select(cmp(),x,y) where x and y
///< are an integer type, one is the current recurrence value,
///< and the other is an arbitrary value.
// clang-format on
// TODO: Any_of and FindLast reduction need not be restricted to integer type
// only.
Expand Down Expand Up @@ -175,13 +178,12 @@ class RecurrenceDescriptor {
/// Returns a struct describing whether the instruction is either a
/// Select(ICmp(A, B), X, Y), or
/// Select(FCmp(A, B), X, Y)
/// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst)
/// loop induction variable, and the other is a PHI value.
// TODO: Support non-monotonic variable. FindLast does not need be restricted
// to increasing loop induction variables.
LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop,
PHINode *OrigPhi, Instruction *I,
ScalarEvolution &SE);
/// where one of (X, Y) is an increasing (FindLastIV) or decreasing
/// (FindFirstIV) loop induction variable, or an arbitrary integer value
/// (FindLast), and the other is a PHI value.
LLVM_ABI static InstDesc isFindPattern(RecurKind Kind, Loop *TheLoop,
PHINode *OrigPhi, Instruction *I,
ScalarEvolution &SE);

/// Returns a struct describing if the instruction is a
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
Expand Down Expand Up @@ -305,6 +307,13 @@ class RecurrenceDescriptor {
isFindLastIVRecurrenceKind(Kind);
}

/// Returns true if the recurrence kind is of the form
/// select(cmp(),x,y) where one of (x,y) is an arbitrary value and the
/// other is a recurrence.
static bool isFindLastRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FindLast;
}

/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() const { return RecurrenceType; }
Expand Down
37 changes: 29 additions & 8 deletions llvm/lib/Analysis/IVDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
// TODO: Make type-agnostic.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is not type-agnostic, should this be reflected in the name of the recurrence kind?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It isn't for the other FindFirst/FindLast (though that might be inferred by U/S Min/Max) or AnyOf. I think it's just the fp-based reduction types that are prefixed with an extra F.

I did experiment with treating FindLast separately in AddReductionVar when it checks the type and everything was fine, but decided to leave that out of the initial patch.

case RecurKind::FindLast:
return true;
}
return false;
Expand Down Expand Up @@ -691,9 +693,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
// value of the data type or a non-constant value by using mask and multiple
// reduction operations.
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
PHINode *OrigPhi, Instruction *I,
ScalarEvolution &SE) {
RecurrenceDescriptor::isFindPattern(RecurKind Kind, Loop *TheLoop,
PHINode *OrigPhi, Instruction *I,
ScalarEvolution &SE) {
// TODO: Support the vectorization of FindLastIV when the reduction phi is
// used by more than one select instruction. This vectorization is only
// performed when the SCEV of each increasing induction variable used by the
Expand All @@ -702,8 +704,10 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
return InstDesc(false, I);

// We are looking for selects of the form:
// select(cmp(), phi, loop_induction) or
// select(cmp(), loop_induction, phi)
// select(cmp(), phi, value) or
// select(cmp(), value, phi)
// where 'value' is be a loop induction variable
// (for FindFirstIV/FindLastIV) or an arbitrary value (for FindLast).
// TODO: Match selects with multi-use cmp conditions.
Value *NonRdxPhi = nullptr;
if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
Expand All @@ -712,6 +716,17 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
m_Value(NonRdxPhi)))))
return InstDesc(false, I);

if (isFindLastRecurrenceKind(Kind)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Above (not this line), this function is called isFindIVPattern, and has a large comment explaining that with an example. Maybe update that documentation to explain the "FindLastRecurance" case (and possibly rename the function to account for that too?).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's also the comment above for We are looking for selects of the form: which should now in include the FindLast case.

// Must be an integer scalar.
Type *Type = OrigPhi->getType();
if (!Type->isIntegerTy())
return InstDesc(false, I);

// FIXME: Support more complex patterns, including multiple selects.
// The Select must be used only outside the loop and by the PHI.
return InstDesc(I, RecurKind::FindLast);
}

// Returns either FindFirstIV/FindLastIV, if such a pattern is found, or
// std::nullopt.
auto GetRecurKind = [&](Value *V) -> std::optional<RecurKind> {
Expand Down Expand Up @@ -920,8 +935,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
Kind == RecurKind::Add || Kind == RecurKind::Mul ||
Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs)
return isConditionalRdxPattern(I);
if (isFindIVRecurrenceKind(Kind) && SE)
return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
if ((isFindIVRecurrenceKind(Kind) || isFindLastRecurrenceKind(Kind)) && SE)
return isFindPattern(Kind, L, OrigPhi, I, *SE);
[[fallthrough]];
case Instruction::FCmp:
case Instruction::ICmp:
Expand Down Expand Up @@ -1118,7 +1133,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
<< "\n");
return true;
}

if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
DT, SE)) {
LLVM_DEBUG(dbgs() << "Found a FindLast reduction PHI." << *Phi << "\n");
return true;
}
// Not a reduction of known type.
return false;
}
Expand Down Expand Up @@ -1248,6 +1267,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
return Instruction::FCmp;
case RecurKind::FindLast:
return Instruction::Select;
default:
llvm_unreachable("Unknown recurrence operation");
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5451,6 +5451,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::FMax:
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
case RecurKind::FindLast:
return true;
default:
return false;
Expand Down
46 changes: 39 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4045,6 +4045,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
continue;
case VPDef::VPReductionSC:
case VPDef::VPActiveLaneMaskPHISC:
case VPDef::VPLastActiveMaskPHISC:
case VPDef::VPWidenCallSC:
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
Expand Down Expand Up @@ -4265,11 +4266,15 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
// Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum
// reductions need special handling and are currently unsupported.
// FindLast reductions also require special handling for the synthesized
// mask PHI.
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
if (!Legal->isReductionVariable(&Phi))
return Legal->isFixedOrderRecurrence(&Phi);
return RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind());
RecurKind Kind =
Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind();
return RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) ||
RecurrenceDescriptor::isFPMinMaxNumRecurrenceKind(Kind);
}))
return false;

Expand Down Expand Up @@ -4559,6 +4564,12 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
IsaPred<VPReductionPHIRecipe>);

// FIXME: implement interleaving for FindLast transform correctly.
for (auto &[_, RdxDesc] : Legal->getReductionVars())
if (RecurrenceDescriptor::isFindLastRecurrenceKind(
RdxDesc.getRecurrenceKind()))
return 1;

// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0) {
Expand Down Expand Up @@ -8488,6 +8499,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
*Plan, Builder))
return nullptr;

// Create whole-vector selects for find-last recurrences.
VPlanTransforms::runPass(VPlanTransforms::convertFindLastRecurrences, *Plan,
RecipeBuilder);

if (useActiveLaneMask(Style)) {
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
// TailFoldingStyle is visible there.
Expand Down Expand Up @@ -8581,10 +8596,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;

RecurKind Kind = PhiR->getRecurrenceKind();
assert(
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
"AnyOf and FindIV reductions are not allowed for in-loop reductions");
assert(!RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
"AnyOf, FindIV, and FindLast reductions are not allowed for in-loop "
"reductions");

// Collect the chain of "link" recipes for the reduction starting at PhiR.
SetVector<VPSingleDefRecipe *> Worklist;
Expand Down Expand Up @@ -8884,7 +8900,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurKind RK = RdxDesc.getRecurrenceKind();
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) &&
!RecurrenceDescriptor::isFindLastRecurrenceKind(RK))) {
VPBuilder PHBuilder(Plan->getVectorPreheader());
VPValue *Iden = Plan->getOrAddLiveIn(
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
Expand Down Expand Up @@ -10006,6 +10023,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Override IC if user provided an interleave count.
IC = UserIC > 0 ? UserIC : IC;

// FIXME: Enable interleaving for last_active reductions.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would be required to enable interleaving?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completing the final reduction outside of the loop, especially the mask phis.

if (any_of(make_second_range(LVL.getReductionVars()), [&](auto &RdxDesc) {
return RecurrenceDescriptor::isFindLastRecurrenceKind(
RdxDesc.getRecurrenceKind());
})) {
LLVM_DEBUG(dbgs() << "LV: Not interleaving without vectorization due "
<< "to conditional scalar assignments.\n");
IntDiagMsg = {
"ConditionalAssignmentPreventsScalarInterleaving",
"Unable to interleave without vectorization due to conditional "
"assignments"};
InterleaveLoop = false;
IC = 1;
}

// Emit diagnostic messages, if any.
const char *VAPassName = Hints.vectorizeAnalysisPassName();
if (!VectorizeLoop && !InterleaveLoop) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25174,6 +25174,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FindLast:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
Expand Down Expand Up @@ -25315,6 +25316,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FindLast:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
Expand Down Expand Up @@ -25421,6 +25423,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FindLast:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPPredInstPHISC:
case VPRecipeBase::VPCanonicalIVPHISC:
case VPRecipeBase::VPActiveLaneMaskPHISC:
case VPRecipeBase::VPLastActiveMaskPHISC:
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
case VPRecipeBase::VPWidenPHISC:
case VPRecipeBase::VPWidenIntOrFpInductionSC:
Expand Down Expand Up @@ -1064,6 +1065,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
/// Returns the value for vscale.
VScale,
OpsEnd = VScale,
/// Extracts the last active lane based on a predicate vector operand.
ExtractLastActive,
};

/// Returns true if this VPInstruction generates scalar values for all lanes.
Expand Down Expand Up @@ -2273,8 +2276,9 @@ class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe,
}

VPWidenPHIRecipe *clone() override {
auto *C = new VPWidenPHIRecipe(cast<PHINode>(getUnderlyingValue()),
getOperand(0), getDebugLoc(), Name);
auto *C =
new VPWidenPHIRecipe(cast_if_present<PHINode>(getUnderlyingValue()),
getOperand(0), getDebugLoc(), Name);
for (VPValue *Op : llvm::drop_begin(operands()))
C->addOperand(Op);
return C;
Expand All @@ -2287,6 +2291,10 @@ class LLVM_ABI_FOR_TEST VPWidenPHIRecipe : public VPSingleDefRecipe,
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;

/// Return the cost of this VPWidenPHIRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractLastLanePerPart:
case VPInstruction::ExtractPenultimateElement: {
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ExtractLastActive: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
return VecTy->getElementType();
Expand Down
32 changes: 32 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::Load:
case VPInstruction::AnyOf:
case VPInstruction::BranchOnCond:
case VPInstruction::Broadcast:
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
case VPInstruction::CalculateTripCountMinusVF:
Expand Down Expand Up @@ -550,6 +551,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ActiveLaneMask:
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ReductionStartVector:
case VPInstruction::ExtractLastActive:
return 3;
case VPInstruction::ComputeFindIVResult:
return 4;
Expand Down Expand Up @@ -1005,6 +1007,17 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case VPInstruction::ResumeForEpilogue:
return State.get(getOperand(0), true);
case VPInstruction::ExtractLastActive: {
Value *Data = State.get(getOperand(0));
Value *Mask = State.get(getOperand(1));
Value *Default = State.get(getOperand(2), /*IsScalar=*/true);
Type *VTy = Data->getType();

Module *M = State.Builder.GetInsertBlock()->getModule();
Function *ExtractLast = Intrinsic::getOrInsertDeclaration(
M, Intrinsic::experimental_vector_extract_last_active, {VTy});
return Builder.CreateCall(ExtractLast, {Data, Mask, Default});
}
default:
llvm_unreachable("Unsupported opcode for instruction");
}
Expand Down Expand Up @@ -1141,6 +1154,15 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
{PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
}
case VPInstruction::ExtractLastActive: {
Type *ScalarTy = Ctx.Types.inferScalarType(this);
Type *VecTy = toVectorTy(ScalarTy, VF);
Type *MaskTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF);
IntrinsicCostAttributes ICA(
Intrinsic::experimental_vector_extract_last_active, ScalarTy,
{VecTy, MaskTy, ScalarTy});
return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind);
}
case VPInstruction::FirstOrderRecurrenceSplice: {
assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");
SmallVector<int> Mask(VF.getKnownMinValue());
Expand Down Expand Up @@ -1197,6 +1219,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindIVResult ||
getOpcode() == VPInstruction::ExtractLastActive ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
Expand Down Expand Up @@ -1260,6 +1283,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::ActiveLaneMask:
case VPInstruction::FirstActiveLane:
case VPInstruction::ExtractLastActive:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::Not:
Expand Down Expand Up @@ -1445,6 +1469,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::Unpack:
O << "unpack";
break;
case VPInstruction::ExtractLastActive:
O << "extract-last-active";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
Expand Down Expand Up @@ -4449,6 +4476,11 @@ void VPWidenPHIRecipe::execute(VPTransformState &State) {
State.set(this, VecPhi);
}

InstructionCost VPWidenPHIRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand Down
Loading