Skip to content

Commit b8359f9

Browse files
committed
[VPlan] Introduce cannotHoistOrSinkRecipe
Factor out common code to determine legality of hoisting and sinking. In the case of the additonal use-sites, functional changes, if any, would amount to esoteric bugs being fixed.
1 parent 9df85ce commit b8359f9

File tree

3 files changed

+31
-55
lines changed

3 files changed

+31
-55
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
7070
return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();
7171
case VPCanonicalIVPHISC:
7272
case VPBranchOnMaskSC:
73+
case VPDerivedIVSC:
7374
case VPFirstOrderRecurrencePHISC:
7475
case VPReductionPHISC:
7576
case VPScalarIVStepsSC:
@@ -86,6 +87,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
8687
case VPWidenLoadEVLSC:
8788
case VPWidenLoadSC:
8889
case VPWidenPHISC:
90+
case VPWidenPointerInductionSC:
8991
case VPWidenSC:
9092
case VPWidenSelectSC: {
9193
const Instruction *I =
@@ -119,6 +121,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
119121
case VPWidenIntrinsicSC:
120122
return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();
121123
case VPBranchOnMaskSC:
124+
case VPDerivedIVSC:
122125
case VPFirstOrderRecurrencePHISC:
123126
case VPPredInstPHISC:
124127
case VPScalarIVStepsSC:
@@ -134,6 +137,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
134137
case VPWidenGEPSC:
135138
case VPWidenIntOrFpInductionSC:
136139
case VPWidenPHISC:
140+
case VPWidenPointerInductionSC:
137141
case VPWidenSC:
138142
case VPWidenSelectSC: {
139143
const Instruction *I =

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,24 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
130130
return true;
131131
}
132132

133+
/// Return true if we do not know how to (mechanically) hoist or sink \p R out
134+
/// of a loop region.
135+
static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
136+
// Assumes don't alias anything or throw; as long as they're guaranteed to
137+
// execute, they're safe to hoist.
138+
if (match(&R, m_Intrinsic<Intrinsic::assume>()))
139+
return false;
140+
141+
// TODO: Relax checks in the future, e.g. we could also hoist reads, if their
142+
// memory location is not modified in the vector loop.
143+
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
144+
return true;
145+
146+
// Allocas cannot be hoisted.
147+
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
148+
return RepR && RepR->getOpcode() == Instruction::Alloca;
149+
}
150+
133151
static bool sinkScalarOperands(VPlan &Plan) {
134152
auto Iter = vp_depth_first_deep(Plan.getEntry());
135153
bool Changed = false;
@@ -1795,7 +1813,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
17951813
VPDT.properlyDominates(Previous, SinkCandidate))
17961814
return true;
17971815

1798-
if (SinkCandidate->mayHaveSideEffects())
1816+
if (cannotHoistOrSinkRecipe(*SinkCandidate))
17991817
return false;
18001818

18011819
WorkList.push_back(SinkCandidate);
@@ -1835,7 +1853,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
18351853
static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
18361854
VPRecipeBase *Previous,
18371855
VPDominatorTree &VPDT) {
1838-
if (Previous->mayHaveSideEffects() || Previous->mayReadFromMemory())
1856+
if (cannotHoistOrSinkRecipe(*Previous))
18391857
return false;
18401858

18411859
// Collect recipes that need hoisting.
@@ -1882,11 +1900,6 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
18821900
return nullptr;
18831901
return HoistCandidate;
18841902
};
1885-
auto CanHoist = [&](VPRecipeBase *HoistCandidate) {
1886-
// Avoid hoisting candidates with side-effects, as we do not yet analyze
1887-
// associated dependencies.
1888-
return !HoistCandidate->mayHaveSideEffects();
1889-
};
18901903

18911904
if (!NeedsHoisting(Previous->getVPSingleValue()))
18921905
return true;
@@ -1898,7 +1911,7 @@ static bool hoistPreviousBeforeFORUsers(VPFirstOrderRecurrencePHIRecipe *FOR,
18981911
VPRecipeBase *Current = HoistCandidates[I];
18991912
assert(Current->getNumDefinedValues() == 1 &&
19001913
"only recipes with a single defined value expected");
1901-
if (!CanHoist(Current))
1914+
if (cannotHoistOrSinkRecipe(*Current))
19021915
return false;
19031916

19041917
for (VPValue *Op : Current->operands()) {
@@ -2127,24 +2140,6 @@ void VPlanTransforms::cse(VPlan &Plan) {
21272140
static void licm(VPlan &Plan) {
21282141
VPBasicBlock *Preheader = Plan.getVectorPreheader();
21292142

2130-
// Return true if we do not know how to (mechanically) hoist a given recipe
2131-
// out of a loop region.
2132-
auto CannotHoistRecipe = [](VPRecipeBase &R) {
2133-
// Assumes don't alias anything or throw; as long as they're guaranteed to
2134-
// execute, they're safe to hoist.
2135-
if (match(&R, m_Intrinsic<Intrinsic::assume>()))
2136-
return false;
2137-
2138-
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2139-
// their memory location is not modified in the vector loop.
2140-
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
2141-
return true;
2142-
2143-
// Allocas cannot be hoisted.
2144-
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
2145-
return RepR && RepR->getOpcode() == Instruction::Alloca;
2146-
};
2147-
21482143
// Hoist any loop invariant recipes from the vector loop region to the
21492144
// preheader. Preform a shallow traversal of the vector loop region, to
21502145
// exclude recipes in replicate regions. Since the top-level blocks in the
@@ -2156,7 +2151,7 @@ static void licm(VPlan &Plan) {
21562151
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
21572152
vp_depth_first_shallow(LoopRegion->getEntry()))) {
21582153
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
2159-
if (CannotHoistRecipe(R))
2154+
if (cannotHoistOrSinkRecipe(R))
21602155
continue;
21612156
if (any_of(R.operands(), [](VPValue *Op) {
21622157
return !Op->isDefinedOutsideLoopRegions();

llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -429,41 +429,18 @@ exit:
429429
define void @hoist_previous_value_and_operand_load(ptr %dst, i64 %mask) {
430430
; CHECK-LABEL: @hoist_previous_value_and_operand_load(
431431
; CHECK-NEXT: bb:
432-
; CHECK-NEXT: br label [[LOOP:%.*]]
433-
; CHECK: vector.ph:
434-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
435-
; CHECK: vector.body:
436-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[LOOP]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
437-
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 1>, [[LOOP]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
438-
; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[LOOP]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
439-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
440-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[OFFSET_IDX]]
441-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = load i32, ptr [[DST]], align 4
442-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[VECTOR_RECUR_EXTRACT]], i64 0
443-
; CHECK-NEXT: [[TMP2]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
444-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
445-
; CHECK-NEXT: [[TMP4]] = or <4 x i32> [[TMP12]], splat (i32 3)
446-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
447-
; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP0]], align 4
448-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
449-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 336
450-
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
451-
; CHECK: middle.block:
452-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
453-
; CHECK-NEXT: br label [[SCALAR_PH:%.*]]
454-
; CHECK: scalar.ph:
455432
; CHECK-NEXT: br label [[LOOP1:%.*]]
456433
; CHECK: loop:
457-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 337, [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP1]] ]
458-
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[SCALAR_PH]] ], [ [[TRUNC:%.*]], [[LOOP1]] ]
459-
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP1]] ]
434+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 1, [[BB:%.*]] ], [ [[ADD:%.*]], [[LOOP1]] ]
435+
; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 1, [[BB]] ], [ [[TRUNC:%.*]], [[LOOP1]] ]
436+
; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[BB]] ], [ [[OR:%.*]], [[LOOP1]] ]
460437
; CHECK-NEXT: [[OR]] = or i32 [[FOR_1]], 3
461438
; CHECK-NEXT: [[ADD]] = add i64 [[IV]], 1
462-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
439+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[IV]]
463440
; CHECK-NEXT: store i32 [[FOR_2]], ptr [[GEP]], align 4
464441
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[IV]], 337
465442
; CHECK-NEXT: [[TRUNC]] = load i32, ptr [[DST]], align 4
466-
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP1]], label [[EXIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]]
443+
; CHECK-NEXT: br i1 [[ICMP]], label [[LOOP1]], label [[EXIT:%.*]]
467444
; CHECK: exit:
468445
; CHECK-NEXT: ret void
469446
;

0 commit comments

Comments
 (0)