Skip to content

Commit d3d6f85

Browse files
committed
[VPlan] Rewrite sinkScalarOperands
Rewrite sinkScalarOperands in VPlanTransforms for clarity, with minimal test changes.
1 parent 6058c0c commit d3d6f85

File tree

3 files changed

+46
-43
lines changed

3 files changed

+46
-43
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -131,60 +131,63 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
131131
}
132132

133133
static bool sinkScalarOperands(VPlan &Plan) {
134-
auto Iter = vp_depth_first_deep(Plan.getEntry());
134+
bool ScalarVFOnly = Plan.hasScalarVFOnly();
135135
bool Changed = false;
136136
// First, collect the operands of all recipes in replicate blocks as seeds for
137137
// sinking.
138138
SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
139-
for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
139+
for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(
140+
vp_depth_first_deep(Plan.getEntry()))) {
140141
VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
141142
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
142143
continue;
143-
VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
144+
VPBasicBlock *VPBB =
145+
dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
144146
if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
145147
continue;
146148
for (auto &Recipe : *VPBB) {
147-
for (VPValue *Op : Recipe.operands())
148-
if (auto *Def =
149-
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
150-
WorkList.insert({VPBB, Def});
149+
for (VPValue *Op : Recipe.operands()) {
150+
auto *Def =
151+
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe());
152+
if (!Def)
153+
continue;
154+
155+
// We only know how to duplicate VPReplicateRecipes and
156+
// VPScalarIVStepsRecipes for now.
157+
if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Def))
158+
continue;
159+
160+
if (Def->getParent() == VPBB || Def->mayHaveSideEffects() ||
161+
Def->mayReadOrWriteMemory())
162+
continue;
163+
164+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(Op))
165+
if (!ScalarVFOnly && RepR->isSingleScalar())
166+
continue;
167+
168+
WorkList.insert({VPBB, Def});
169+
}
151170
}
152171
}
153172

154-
bool ScalarVFOnly = Plan.hasScalarVFOnly();
155173
// Try to sink each replicate or scalar IV steps recipe in the worklist.
156-
for (unsigned I = 0; I != WorkList.size(); ++I) {
174+
for (const auto &Item : WorkList) {
157175
VPBasicBlock *SinkTo;
158176
VPSingleDefRecipe *SinkCandidate;
159-
std::tie(SinkTo, SinkCandidate) = WorkList[I];
160-
if (SinkCandidate->getParent() == SinkTo ||
161-
SinkCandidate->mayHaveSideEffects() ||
162-
SinkCandidate->mayReadOrWriteMemory())
163-
continue;
164-
if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
165-
if (!ScalarVFOnly && RepR->isSingleScalar())
166-
continue;
167-
} else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
168-
continue;
177+
std::tie(SinkTo, SinkCandidate) = Item;
169178

170-
bool NeedsDuplicating = false;
171179
// All recipe users of the sink candidate must be in the same block SinkTo
172-
// or all users outside of SinkTo must be uniform-after-vectorization (
173-
// i.e., only first lane is used) . In the latter case, we need to duplicate
174-
// SinkCandidate.
175-
auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
176-
SinkCandidate](VPUser *U) {
177-
auto *UI = cast<VPRecipeBase>(U);
178-
if (UI->getParent() == SinkTo)
179-
return true;
180-
NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
181-
// We only know how to duplicate VPReplicateRecipes and
182-
// VPScalarIVStepsRecipes for now.
183-
return NeedsDuplicating &&
184-
isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(SinkCandidate);
185-
};
186-
if (!all_of(SinkCandidate->users(), CanSinkWithUser))
180+
// or all users outside of SinkTo must have only their first lane used. In
181+
// the latter case, we need to duplicate SinkCandidate.
182+
auto UsersOutsideSinkTo =
183+
make_filter_range(SinkCandidate->users(), [SinkTo](VPUser *U) {
184+
return cast<VPRecipeBase>(U)->getParent() != SinkTo;
185+
});
186+
if (any_of(UsersOutsideSinkTo, [SinkCandidate](VPUser *U) {
187+
return !U->onlyFirstLaneUsed(SinkCandidate);
188+
}))
187189
continue;
190+
bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
188191

189192
if (NeedsDuplicating) {
190193
if (ScalarVFOnly)

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
133133
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
134134
; CHECK-EMPTY:
135135
; CHECK-NEXT: pred.store.if:
136-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
137136
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
137+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
138138
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
139139
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
140140
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
@@ -292,8 +292,8 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
292292
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
293293
; CHECK-EMPTY:
294294
; CHECK: pred.store.if:
295-
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
296295
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
296+
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
297297
; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2>
298298
; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem>
299299
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>

llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,21 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
4444
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
4545
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
4646
; CHECK: [[PRED_STORE_IF]]:
47+
; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
4748
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
4849
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
4950
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
50-
; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
5151
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
5252
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1
5353
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
5454
; CHECK: [[PRED_STORE_CONTINUE]]:
5555
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
5656
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
5757
; CHECK: [[PRED_STORE_IF3]]:
58+
; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
5859
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1
5960
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]]
6061
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1
61-
; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
6262
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]]
6363
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1
6464
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -120,21 +120,21 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
120120
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
121121
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
122122
; CHECK: [[PRED_STORE_IF]]:
123+
; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
123124
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
124125
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
125126
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
126-
; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
127127
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
128128
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
129129
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
130130
; CHECK: [[PRED_STORE_CONTINUE]]:
131131
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
132132
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
133133
; CHECK: [[PRED_STORE_IF1]]:
134+
; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
134135
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
135136
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]]
136137
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1
137-
; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
138138
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
139139
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1
140140
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -266,21 +266,21 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
266266
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
267267
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
268268
; CHECK: [[PRED_STORE_IF]]:
269+
; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
269270
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
270271
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
271272
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
272-
; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
273273
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
274274
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
275275
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
276276
; CHECK: [[PRED_STORE_CONTINUE]]:
277277
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
278278
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
279279
; CHECK: [[PRED_STORE_IF1]]:
280+
; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
280281
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
281282
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]]
282283
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1
283-
; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
284284
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
285285
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1
286286
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]

0 commit comments

Comments
 (0)