Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 29 additions & 13 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6960,7 +6960,8 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
}
// The VPlan-based cost model is more accurate for partial reduction and
// comparing against the legacy cost isn't desirable.
if (isa<VPPartialReductionRecipe>(&R))
if (auto *VPR = dyn_cast<VPReductionRecipe>(&R);
VPR && VPR->isPartialReduction())
return true;

// The VPlan-based cost model can analyze if recipes are scalar
Expand Down Expand Up @@ -8080,11 +8081,21 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R,
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));

// If the PHI is used by a partial reduction, set the scale factor.
bool UseInLoopReduction = CM.isInLoopReduction(Phi);
bool UseOrderedReductions = CM.useOrderedReductions(RdxDesc);
unsigned ScaleFactor =
getScalingForReduction(RdxDesc.getLoopExitInstr()).value_or(1);
PhiRecipe = new VPReductionPHIRecipe(
Phi, RdxDesc.getRecurrenceKind(), *StartV, CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc), ScaleFactor);
RdxStyle Style(RdxNormal{});
if (UseInLoopReduction) {
if (UseOrderedReductions)
Style = RdxOrderedInLoop{};
else
Style = RdxInLoop{};
} else if (ScaleFactor > 1) {
Style = RdxPartial{/*VFScaleFactor=*/ScaleFactor};
}
PhiRecipe = new VPReductionPHIRecipe(Phi, RdxDesc.getRecurrenceKind(),
*StartV, Style);
} else {
// TODO: Currently fixed-order recurrences are modeled as chains of
// first-order recurrences. If there are no users of the intermediate
Expand Down Expand Up @@ -8152,7 +8163,8 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
VPValue *Accumulator = Operands[1];
VPRecipeBase *BinOpRecipe = BinOp->getDefiningRecipe();
if (isa<VPReductionPHIRecipe>(BinOpRecipe) ||
isa<VPPartialReductionRecipe>(BinOpRecipe))
(isa<VPReductionRecipe>(BinOpRecipe) &&
cast<VPReductionRecipe>(BinOpRecipe)->isPartialReduction()))
std::swap(BinOp, Accumulator);

if (ScaleFactor !=
Expand All @@ -8177,11 +8189,11 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
Cond = getBlockInMask(Builder.getInsertBlock());
VPValue *Zero = Plan.getConstantInt(Reduction->getType(), 0);
BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
}
return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
ScaleFactor, Reduction);

return new VPReductionRecipe(RecurKind::Add, FastMathFlags(), Reduction,
Accumulator, BinOp, Cond,
RdxPartial{/*VFScaleFactor=*/ScaleFactor});
}

void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
Expand Down Expand Up @@ -8687,9 +8699,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
: FastMathFlags();
auto *RedRecipe = new VPReductionRecipe(
Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
PhiR->isOrdered(), CurrentLinkI->getDebugLoc());
bool UseOrderedReductions = PhiR->isOrdered();
RdxStyle Style = UseOrderedReductions ? RdxStyle(RdxOrderedInLoop{})
: RdxStyle(RdxInLoop{});
auto *RedRecipe =
new VPReductionRecipe(Kind, FMFs, CurrentLinkI, PreviousLink, VecOp,
CondOp, Style, CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
Expand Down Expand Up @@ -8724,8 +8739,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// Don't output selects for partial reductions because they have an output
// with fewer lanes than the VF. So the operands of the select would have
// different numbers of lanes. Partial reductions mask the input instead.
auto *RR = dyn_cast<VPReductionRecipe>(OrigExitingVPV->getDefiningRecipe());
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
(!RR || !RR->isPartialReduction())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(PhiR->getParent());
std::optional<FastMathFlags> FMFs =
PhiTy->isFloatingPointTy()
Expand Down
173 changes: 71 additions & 102 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <functional>
#include <string>
#include <utility>
#include <variant>

namespace llvm {

Expand Down Expand Up @@ -553,7 +554,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntOrFpInductionSC:
case VPRecipeBase::VPWidenPointerInductionSC:
case VPRecipeBase::VPReductionPHISC:
case VPRecipeBase::VPPartialReductionSC:
return true;
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPInterleaveEVLSC:
Expand Down Expand Up @@ -2330,6 +2330,23 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
}
};

// Possible variants of a reduction.

// This reduction is ordered and in-loop.
struct RdxOrderedInLoop {};
// This reduction is in-loop.
struct RdxInLoop {};
// This reduction isn't partial, ordered or in-loop.
struct RdxNormal {};
// This reduction is partial and its output is a vector whose length is scaled
// by the VF.
struct RdxPartial {
// The factor by which the output is scaled down from the VF.
unsigned VFScaleFactor;
};
typedef std::variant<RdxOrderedInLoop, RdxInLoop, RdxNormal, RdxPartial>
RdxStyle;

/// A recipe for handling reduction phis. The start value is the first operand
/// of the recipe and the incoming value from the backedge is the second
/// operand.
Expand All @@ -2338,32 +2355,21 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// The recurrence kind of the reduction.
const RecurKind Kind;

/// The phi is part of an in-loop reduction.
bool IsInLoop;

/// The phi is part of an ordered reduction. Requires IsInLoop to be true.
bool IsOrdered;

/// When expanding the reduction PHI, the plan's VF element count is divided
/// by this factor to form the reduction phi's VF.
unsigned VFScaleFactor = 1;
RdxStyle Style;

public:
/// Create a new VPReductionPHIRecipe for the reduction \p Phi.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
bool IsInLoop = false, bool IsOrdered = false,
unsigned VFScaleFactor = 1)
RdxStyle Style)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}
Style(Style) {}

~VPReductionPHIRecipe() override = default;

VPReductionPHIRecipe *clone() override {
auto *R = new VPReductionPHIRecipe(
dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(),
*getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
*getOperand(0), Style);
R->addOperand(getBackedgeValue());
return R;
}
Expand All @@ -2373,8 +2379,12 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
/// Generate the phi/select nodes.
void execute(VPTransformState &State) override;

/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }
/// Get the factor that the VF of this recipe's output should be scaled by, or
/// null if it isn't scaled.
std::optional<unsigned> getVFScaleFactor() const {
auto *Partial = std::get_if<RdxPartial>(&Style);
return Partial ? std::make_optional(Partial->VFScaleFactor) : std::nullopt;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
Expand All @@ -2391,10 +2401,18 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
RecurKind getRecurrenceKind() const { return Kind; }

/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
bool isOrdered() const {
return std::holds_alternative<RdxOrderedInLoop>(Style);
}

/// Returns true if the phi is part of an in-loop reduction.
bool isInLoop() const {
return std::holds_alternative<RdxInLoop>(Style) ||
std::holds_alternative<RdxOrderedInLoop>(Style);
}

/// Returns true, if the phi is part of an in-loop reduction.
bool isInLoop() const { return IsInLoop; }
/// Returns true if the reduction outputs a vector with a scaled down VF.
bool isPartialReduction() const { return getVFScaleFactor() > 1; }

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
Expand Down Expand Up @@ -2666,23 +2684,25 @@ class LLVM_ABI_FOR_TEST VPInterleaveEVLRecipe final : public VPInterleaveBase {
}
};

/// A recipe to represent inloop reduction operations, performing a reduction on
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
/// A recipe to represent inloop, ordered or partial reduction operations. It
/// performs a reduction on a vector operand into a scalar (vector in the case
/// of a partial reduction) value, and adds the result to a chain. The Operands
/// are {ChainOp, VecOp, [Condition]}.
class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {

/// The recurrence kind for the reduction in question.
RecurKind RdxKind;
bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;
RdxStyle Style;

protected:
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
FastMathFlags FMFs, Instruction *I,
ArrayRef<VPValue *> Operands, VPValue *CondOp,
bool IsOrdered, DebugLoc DL)
RdxStyle Style, DebugLoc DL)
: VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
IsOrdered(IsOrdered) {
Style(Style) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
Expand All @@ -2693,30 +2713,29 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
public:
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
RdxStyle Style, DebugLoc DL = DebugLoc::getUnknown())
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
DL) {}

VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
RdxStyle Style, DebugLoc DL = DebugLoc::getUnknown())
: VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
DL) {}

~VPReductionRecipe() override = default;

VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxKind, getFastMathFlags(),
getUnderlyingInstr(), getChainOp(), getVecOp(),
getCondOp(), IsOrdered, getDebugLoc());
getCondOp(), Style, getDebugLoc());
}

static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
}

static inline bool classof(const VPUser *U) {
Expand Down Expand Up @@ -2749,9 +2768,13 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
/// Return the recurrence kind for the in-loop reduction.
RecurKind getRecurrenceKind() const { return RdxKind; }
/// Return true if the in-loop reduction is ordered.
bool isOrdered() const { return IsOrdered; };
bool isOrdered() const {
return std::holds_alternative<RdxOrderedInLoop>(Style);
};
/// Return true if the in-loop reduction is conditional.
bool isConditional() const { return IsConditional; };
/// Returns true if the reduction outputs a vector with a scaled down VF.
bool isPartialReduction() const { return getVFScaleFactor() > 1; }
/// The VPValue of the scalar Chain being accumulated.
VPValue *getChainOp() const { return getOperand(0); }
/// The VPValue of the vector value to be reduced.
Expand All @@ -2760,68 +2783,12 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags {
VPValue *getCondOp() const {
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
}
};

/// A recipe for forming partial reductions. In the loop, an accumulator and
/// vector operand are added together and passed to the next iteration as the
/// next accumulator. After the loop body, the accumulator is reduced to a
/// scalar value.
class VPPartialReductionRecipe : public VPReductionRecipe {
unsigned Opcode;

/// The divisor by which the VF of this recipe's output should be divided
/// during execution.
unsigned VFScaleFactor;

public:
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
: VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
VFScaleFactor, ReductionInst) {}
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
VPValue *Cond, unsigned ScaleFactor,
Instruction *ReductionInst = nullptr)
: VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
FastMathFlags(), ReductionInst,
ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
Opcode(Opcode), VFScaleFactor(ScaleFactor) {
[[maybe_unused]] auto *AccumulatorRecipe =
getChainOp()->getDefiningRecipe();
// When cloning as part of a VPExpressionRecipe the chain op could have
// replaced by a temporary VPValue, so it doesn't have a defining recipe.
assert((!AccumulatorRecipe ||
isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
"Unexpected operand order for partial reduction recipe");
}
~VPPartialReductionRecipe() override = default;

VPPartialReductionRecipe *clone() override {
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
getCondOp(), VFScaleFactor,
getUnderlyingInstr());
}

VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)

/// Generate the reduction in the loop.
void execute(VPTransformState &State) override;

/// Return the cost of this VPPartialReductionRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

/// Get the binary op's opcode.
unsigned getOpcode() const { return Opcode; }

/// Get the factor that the VF of this recipe's output should be scaled by.
unsigned getVFScaleFactor() const { return VFScaleFactor; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
/// Get the factor that the VF of this recipe's output should be scaled by, or
/// null if it isn't scaled.
std::optional<unsigned> getVFScaleFactor() const {
auto *Partial = std::get_if<RdxPartial>(&Style);
return Partial ? std::make_optional(Partial->VFScaleFactor) : std::nullopt;
}
};

/// A recipe to represent inloop reduction operations with vector-predication
Expand All @@ -2837,7 +2804,9 @@ class LLVM_ABI_FOR_TEST VPReductionEVLRecipe : public VPReductionRecipe {
R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
R.isOrdered() ? RdxStyle(RdxOrderedInLoop{})
: RdxStyle(RdxInLoop{}),
DL) {}

~VPReductionEVLRecipe() override = default;

Expand Down Expand Up @@ -3101,8 +3070,8 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
void decompose();

unsigned getVFScaleFactor() const {
auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
return PR ? PR->getVFScaleFactor() : 1;
auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
return PR ? PR->getVFScaleFactor().value_or(1) : 1;
}

/// Method for generating code, must not be called as this recipe is abstract.
Expand Down
Loading