Skip to content

Commit 038664c

Browse files
committed
[VPlan] Huge thinko: redo!
1 parent 4baad77 commit 038664c

File tree

5 files changed

+42
-37
lines changed

5 files changed

+42
-37
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,25 @@ static bool sinkScalarOperands(VPlan &Plan) {
134134
auto Iter = vp_depth_first_deep(Plan.getEntry());
135135
bool ScalarVFOnly = Plan.hasScalarVFOnly();
136136
bool Changed = false;
137+
138+
auto IsValidSinkCandidate = [ScalarVFOnly](VPBasicBlock *SinkTo,
139+
VPSingleDefRecipe *Candidate) {
140+
// We only know how to duplicate VPReplicateRecipes and
141+
// VPScalarIVStepsRecipes for now.
142+
if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
143+
return false;
144+
145+
if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
146+
Candidate->mayReadOrWriteMemory())
147+
return false;
148+
149+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))
150+
if (!ScalarVFOnly && RepR->isSingleScalar())
151+
return false;
152+
153+
return true;
154+
};
155+
137156
// First, collect the operands of all recipes in replicate blocks as seeds for
138157
// sinking.
139158
SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
@@ -142,38 +161,23 @@ static bool sinkScalarOperands(VPlan &Plan) {
142161
if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
143162
continue;
144163
VPBasicBlock *VPBB = cast<VPBasicBlock>(EntryVPBB->getSuccessors().front());
145-
if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
164+
if (VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
146165
continue;
147166
for (auto &Recipe : *VPBB) {
148167
for (VPValue *Op : Recipe.operands()) {
149-
auto *Def =
150-
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe());
151-
if (!Def)
152-
continue;
153-
154-
// We only know how to duplicate VPReplicateRecipes and
155-
// VPScalarIVStepsRecipes for now.
156-
if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Def))
157-
continue;
158-
159-
if (Def->getParent() == VPBB || Def->mayHaveSideEffects() ||
160-
Def->mayReadOrWriteMemory())
161-
continue;
162-
163-
if (auto *RepR = dyn_cast<VPReplicateRecipe>(Op))
164-
if (!ScalarVFOnly && RepR->isSingleScalar())
165-
continue;
166-
167-
WorkList.insert({VPBB, Def});
168+
if (auto *Def =
169+
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
170+
if (IsValidSinkCandidate(VPBB, Def))
171+
WorkList.insert({VPBB, Def});
168172
}
169173
}
170174
}
171175

172176
// Try to sink each replicate or scalar IV steps recipe in the worklist.
173-
for (const auto &Item : WorkList) {
177+
for (unsigned I = 0; I != WorkList.size(); ++I) {
174178
VPBasicBlock *SinkTo;
175179
VPSingleDefRecipe *SinkCandidate;
176-
std::tie(SinkTo, SinkCandidate) = Item;
180+
std::tie(SinkTo, SinkCandidate) = WorkList[I];
177181

178182
// All recipe users of the sink candidate must be in the same block SinkTo
179183
// or all users outside of SinkTo must have only their first lane used. In
@@ -213,7 +217,8 @@ static bool sinkScalarOperands(VPlan &Plan) {
213217
for (VPValue *Op : SinkCandidate->operands())
214218
if (auto *Def =
215219
dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
216-
WorkList.insert({SinkTo, Def});
220+
if (IsValidSinkCandidate(SinkTo, Def))
221+
WorkList.insert({SinkTo, Def});
217222
Changed = true;
218223
}
219224
return Changed;

llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize {
133133
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
134134
; CHECK-EMPTY:
135135
; CHECK-NEXT: pred.store.if:
136-
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
137136
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
137+
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
138138
; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEPS]]>
139139
; CHECK-NEXT: REPLICATE ir<%add> = add ir<%rem>, ir<%recur.next>
140140
; CHECK-NEXT: REPLICATE store ir<%add>, ir<%gep>
@@ -292,8 +292,8 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr
292292
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
293293
; CHECK-EMPTY:
294294
; CHECK: pred.store.if:
295-
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
296295
; CHECK-NEXT: REPLICATE ir<%lv.2> = load ir<%gep>
296+
; CHECK-NEXT: REPLICATE ir<%rem> = srem vp<[[SPLICE]]>, ir<%x>
297297
; CHECK-NEXT: REPLICATE ir<%conv.lv.2> = sext ir<%lv.2>
298298
; CHECK-NEXT: REPLICATE ir<%add.1> = add ir<%conv>, ir<%rem>
299299
; CHECK-NEXT: REPLICATE ir<%gep.dst> = getelementptr ir<%dst>, vp<[[STEPS]]>

llvm/test/Transforms/LoopVectorize/load-deref-pred-poison-ub-ops-feeding-pointer.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,21 @@ define void @ptr_depends_on_sdiv(ptr noalias %dst, i16 noundef %off) {
4444
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
4545
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
4646
; CHECK: [[PRED_STORE_IF]]:
47-
; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
4847
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i16> [[TMP23]], i32 0
4948
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP13]]
5049
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP24]], align 1
50+
; CHECK-NEXT: [[TMP10:%.*]] = add i16 [[OFFSET_IDX]], 0
5151
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP10]]
5252
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP11]], align 1
5353
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
5454
; CHECK: [[PRED_STORE_CONTINUE]]:
5555
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
5656
; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
5757
; CHECK: [[PRED_STORE_IF3]]:
58-
; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
5958
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i16> [[TMP23]], i32 1
6059
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP25]]
6160
; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP20]], align 1
61+
; CHECK-NEXT: [[TMP16:%.*]] = add i16 [[OFFSET_IDX]], 1
6262
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP16]]
6363
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 1
6464
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
@@ -120,21 +120,21 @@ define void @ptr_depends_on_possibly_poison_value(ptr noalias %dst, i16 %off) {
120120
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
121121
; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
122122
; CHECK: [[PRED_STORE_IF]]:
123-
; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
124123
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i16> [[TMP3]], i32 0
125124
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP5]]
126125
; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP14]], align 1
126+
; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], 0
127127
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP7]]
128128
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 1
129129
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
130130
; CHECK: [[PRED_STORE_CONTINUE]]:
131131
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
132132
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
133133
; CHECK: [[PRED_STORE_IF1]]:
134-
; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
135134
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i16> [[TMP3]], i32 1
136135
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP15]]
137136
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP16]], align 1
137+
; CHECK-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], 1
138138
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP11]]
139139
; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 1
140140
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
@@ -266,21 +266,21 @@ define void @ptr_depends_on_possibly_poison_value_from_load(ptr noalias %dst) {
266266
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
267267
; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
268268
; CHECK: [[PRED_STORE_IF]]:
269-
; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
270269
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i16> [[TMP4]], i32 0
271270
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP6]]
272271
; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP15]], align 1
272+
; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[OFFSET_IDX]], 0
273273
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP8]]
274274
; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 1
275275
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
276276
; CHECK: [[PRED_STORE_CONTINUE]]:
277277
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
278278
; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
279279
; CHECK: [[PRED_STORE_IF1]]:
280-
; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
281280
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i16> [[TMP4]], i32 1
282281
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr @src, i16 [[TMP16]]
283282
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP17]], align 1
283+
; CHECK-NEXT: [[TMP12:%.*]] = add i16 [[OFFSET_IDX]], 1
284284
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DST]], i16 [[TMP12]]
285285
; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 1
286286
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]

llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -590,8 +590,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
590590
; VF2IC2: [[PRED_STORE_IF]]:
591591
; VF2IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
592592
; VF2IC2-NEXT: [[TMP6:%.*]] = tail call { float, float } @fn2(float [[TMP5]]) #[[ATTR3:[0-9]+]]
593-
; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0
594593
; VF2IC2-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0
594+
; VF2IC2-NEXT: [[TMP8:%.*]] = extractvalue { float, float } [[TMP6]], 0
595595
; VF2IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
596596
; VF2IC2-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0
597597
; VF2IC2-NEXT: [[TMP11:%.*]] = fdiv float [[TMP8]], [[TMP10]]
@@ -603,8 +603,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
603603
; VF2IC2: [[PRED_STORE_IF2]]:
604604
; VF2IC2-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
605605
; VF2IC2-NEXT: [[TMP14:%.*]] = tail call { float, float } @fn2(float [[TMP13]]) #[[ATTR3]]
606-
; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0
607606
; VF2IC2-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 1
607+
; VF2IC2-NEXT: [[TMP16:%.*]] = extractvalue { float, float } [[TMP14]], 0
608608
; VF2IC2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]]
609609
; VF2IC2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1
610610
; VF2IC2-NEXT: [[TMP19:%.*]] = fdiv float [[TMP16]], [[TMP18]]
@@ -616,8 +616,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
616616
; VF2IC2: [[PRED_STORE_IF4]]:
617617
; VF2IC2-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
618618
; VF2IC2-NEXT: [[TMP22:%.*]] = tail call { float, float } @fn2(float [[TMP21]]) #[[ATTR3]]
619-
; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0
620619
; VF2IC2-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 2
620+
; VF2IC2-NEXT: [[TMP24:%.*]] = extractvalue { float, float } [[TMP22]], 0
621621
; VF2IC2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]]
622622
; VF2IC2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 0
623623
; VF2IC2-NEXT: [[TMP27:%.*]] = fdiv float [[TMP24]], [[TMP26]]
@@ -629,8 +629,8 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) {
629629
; VF2IC2: [[PRED_STORE_IF6]]:
630630
; VF2IC2-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
631631
; VF2IC2-NEXT: [[TMP30:%.*]] = tail call { float, float } @fn2(float [[TMP29]]) #[[ATTR3]]
632-
; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0
633632
; VF2IC2-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 3
633+
; VF2IC2-NEXT: [[TMP32:%.*]] = extractvalue { float, float } [[TMP30]], 0
634634
; VF2IC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
635635
; VF2IC2-NEXT: [[TMP34:%.*]] = extractelement <2 x float> [[WIDE_LOAD1]], i32 1
636636
; VF2IC2-NEXT: [[TMP35:%.*]] = fdiv float [[TMP32]], [[TMP34]]

llvm/test/Transforms/LoopVectorize/struct-return.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,25 +272,25 @@ define void @scalarized_predicated_struct_return(ptr %a) {
272272
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
273273
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
274274
; CHECK: [[PRED_STORE_IF]]:
275-
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
276275
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
277276
; CHECK-NEXT: [[TMP4:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP3]]) #[[ATTR4:[0-9]+]]
278277
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i64, i64 } [[TMP4]], 0
279278
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 0
280279
; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP5]], [[TMP6]]
280+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0
281281
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
282282
; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP9]], align 8
283283
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
284284
; CHECK: [[PRED_STORE_CONTINUE]]:
285285
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
286286
; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
287287
; CHECK: [[PRED_STORE_IF1]]:
288-
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1
289288
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
290289
; CHECK-NEXT: [[TMP12:%.*]] = tail call { i64, i64 } @bar_i64(i64 [[TMP11]]) #[[ATTR4]]
291290
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { i64, i64 } [[TMP12]], 0
292291
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i32 1
293292
; CHECK-NEXT: [[TMP15:%.*]] = udiv i64 [[TMP13]], [[TMP14]]
293+
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 1
294294
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP16]]
295295
; CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8
296296
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]

0 commit comments

Comments
 (0)