Skip to content

Commit 91475e3

Browse files
committed
[VPlan] Avoid sinking allocas in sinkScalarOperands
Use cannotHoistOrSinkRecipe to forbid sinking allocas.
1 parent c1dc064 commit 91475e3

File tree

2 files changed

+8
-6
lines changed

2 files changed

+8
-6
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,7 @@ static bool sinkScalarOperands(VPlan &Plan) {
167167
if (!isa<VPReplicateRecipe, VPScalarIVStepsRecipe>(Candidate))
168168
return;
169169

170-
if (Candidate->getParent() == SinkTo || Candidate->mayHaveSideEffects() ||
171-
Candidate->mayReadOrWriteMemory())
170+
if (Candidate->getParent() == SinkTo || cannotHoistOrSinkRecipe(*Candidate))
172171
return;
173172

174173
if (auto *RepR = dyn_cast<VPReplicateRecipe>(Candidate))

llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@ define i32 @test(ptr %vf1, i64 %n) {
1212
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
1313
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
1414
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 -56)
15+
; CHECK-NEXT: [[TMP18:%.*]] = alloca i8, i64 [[N]], align 16
16+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP18]], i64 0
17+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
1518
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1619
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1720
; CHECK: [[PRED_STORE_IF]]:
1821
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
1922
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP2]]
20-
; CHECK-NEXT: [[TMP4:%.*]] = alloca i8, i64 [[N]], align 16
23+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
2124
; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8
2225
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
2326
; CHECK: [[PRED_STORE_CONTINUE]]:
@@ -26,7 +29,7 @@ define i32 @test(ptr %vf1, i64 %n) {
2629
; CHECK: [[PRED_STORE_IF1]]:
2730
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
2831
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP6]]
29-
; CHECK-NEXT: [[TMP8:%.*]] = alloca i8, i64 [[N]], align 16
32+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
3033
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
3134
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
3235
; CHECK: [[PRED_STORE_CONTINUE2]]:
@@ -35,7 +38,7 @@ define i32 @test(ptr %vf1, i64 %n) {
3538
; CHECK: [[PRED_STORE_IF3]]:
3639
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 2
3740
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP10]]
38-
; CHECK-NEXT: [[TMP12:%.*]] = alloca i8, i64 [[N]], align 16
41+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
3942
; CHECK-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
4043
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
4144
; CHECK: [[PRED_STORE_CONTINUE4]]:
@@ -44,7 +47,7 @@ define i32 @test(ptr %vf1, i64 %n) {
4447
; CHECK: [[PRED_STORE_IF5]]:
4548
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 3
4649
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[VF1]], i64 [[TMP14]]
47-
; CHECK-NEXT: [[TMP16:%.*]] = alloca i8, i64 [[N]], align 16
50+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr> [[BROADCAST_SPLAT]], i32 0
4851
; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
4952
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]]
5053
; CHECK: [[PRED_STORE_CONTINUE6]]:

0 commit comments

Comments
 (0)