Skip to content

Commit 5fb2f37

Browse files
committed
[LV]: Improve accuract of calculating remaining iterations out of MainLoop.
Account for vscale for vscale-based TC when calculating remaining iterations.
1 parent ceaa071 commit 5fb2f37

File tree

3 files changed

+62
-46
lines changed

3 files changed

+62
-46
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4371,8 +4371,21 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
43714371
const SCEV *TC =
43724372
vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
43734373
assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
4374+
4375+
// TODO: Maybe this could be removed when SCEV can evaluate expressions with
4376+
// 'vscale'.
4377+
// If TC is multiple of vscale, try to get estimated value:
4378+
if (match(TC, m_scev_Mul(m_SCEV(), m_SCEVVScale()))) {
4379+
std::optional<ElementCount> BestKnownTC =
4380+
getSmallBestKnownTC(PSE, OrigLoop);
4381+
if (BestKnownTC) {
4382+
unsigned EstimatedRuntimeTC =
4383+
estimateElementCount(*BestKnownTC, CM.getVScaleForTuning());
4384+
TC = SE.getConstant(TCType, EstimatedRuntimeTC);
4385+
}
4386+
}
43744387
RemainingIterations =
4375-
SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
4388+
SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC));
43764389

43774390
// No iterations left to process in the epilogue.
43784391
if (RemainingIterations->isZero())

llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,10 @@ define void @cost_store_i8(ptr %dst) #0 {
99
; DEFAULT-LABEL: define void @cost_store_i8(
1010
; DEFAULT-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
1111
; DEFAULT-NEXT: iter.check:
12-
; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
12+
; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
13+
; DEFAULT-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP10]], 3
14+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP15]]
15+
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
1316
; DEFAULT: vector.main.loop.iter.check:
1417
; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1518
; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
@@ -40,18 +43,23 @@ define void @cost_store_i8(ptr %dst) #0 {
4043
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
4144
; DEFAULT: vec.epilog.ph:
4245
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
46+
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
47+
; DEFAULT-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8
48+
; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP14]]
49+
; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]]
4350
; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
4451
; DEFAULT: vec.epilog.vector.body:
4552
; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
4653
; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
47-
; DEFAULT-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP9]], align 1
48-
; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
49-
; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 96
50-
; DEFAULT-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
54+
; DEFAULT-NEXT: store <vscale x 8 x i8> zeroinitializer, ptr [[TMP9]], align 1
55+
; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP14]]
56+
; DEFAULT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]]
57+
; DEFAULT-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
5158
; DEFAULT: vec.epilog.middle.block:
52-
; DEFAULT-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
59+
; DEFAULT-NEXT: [[CMP_N6:%.*]] = icmp eq i64 101, [[N_VEC3]]
60+
; DEFAULT-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
5361
; DEFAULT: vec.epilog.scalar.ph:
54-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
62+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
5563
; DEFAULT-NEXT: br label [[LOOP:%.*]]
5664
; DEFAULT: loop:
5765
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
target triple = "aarch64-linux-gnu"
1111

1212
; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16'
13-
; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
14-
; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
13+
; DEBUG: Executing best plan with VF=vscale x 16, UF=2
1514

1615
; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_16'
1716
; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced.
@@ -20,13 +19,11 @@ target triple = "aarch64-linux-gnu"
2019

2120
define void @main_vf_vscale_x_16(ptr %A) #0 {
2221
; CHECK-LABEL: @main_vf_vscale_x_16(
23-
; CHECK-NEXT: iter.check:
24-
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
25-
; CHECK: vector.main.loop.iter.check:
22+
; CHECK-NEXT: entry:
2623
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
2724
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
2825
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
29-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
26+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3027
; CHECK: vector.ph:
3128
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
3229
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
@@ -150,7 +147,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
150147
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP5]], align 1
151148
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
152149
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
153-
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
150+
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
154151
; CHECK: middle.block:
155152
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
156153
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
@@ -261,7 +258,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
261258
; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
262259
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
263260
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
264-
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
261+
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
265262
; CHECK: middle.block:
266263
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
267264
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -279,7 +276,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
279276
; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1
280277
; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8
281278
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
282-
; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
279+
; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
283280
; CHECK: vec.epilog.middle.block:
284281
; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
285282
; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -382,7 +379,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
382379
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1
383380
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
384381
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
385-
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
382+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
386383
; CHECK: middle.block:
387384
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]]
388385
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -400,7 +397,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
400397
; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1
401398
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8
402399
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000
403-
; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
400+
; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
404401
; CHECK: vec.epilog.middle.block:
405402
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
406403
; CHECK: vec.epilog.scalar.ph:
@@ -482,16 +479,18 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
482479
; CHECK-NEXT: iter.check:
483480
; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
484481
; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033
482+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
483+
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
485484
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
486485
; CHECK: vector.main.loop.iter.check:
487-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
488-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
489-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
486+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
487+
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3
488+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP3]]
490489
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
491490
; CHECK: vector.ph:
492-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
493-
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
494-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
491+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
492+
; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
493+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]]
495494
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
496495
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
497496
; CHECK: vector.body:
@@ -523,20 +522,22 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
523522
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
524523
; CHECK: vec.epilog.ph:
525524
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
526-
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
525+
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
526+
; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2
527+
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], [[TMP23]]
527528
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
528529
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
529530
; CHECK: vec.epilog.vector.body:
530531
; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
531-
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
532-
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP18]], align 4
533-
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
534-
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP19]], align 4
535-
; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
536-
; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[TMP19]], align 4
537-
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
538-
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
539-
; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
532+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
533+
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 2 x float>, ptr [[TMP24]], align 4
534+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
535+
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP25]], align 4
536+
; CHECK-NEXT: [[TMP26:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
537+
; CHECK-NEXT: store <vscale x 2 x float> [[TMP26]], ptr [[TMP25]], align 4
538+
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], [[TMP23]]
539+
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
540+
; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
540541
; CHECK: vec.epilog.middle.block:
541542
; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]
542543
; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -606,22 +607,16 @@ exit:
606607
}
607608

608609
; Loop with vscale-based trip count vscale x 1024.
609-
; TODO: No epilogue vectorizations should remain when choosing VF = vscale x 4.
610610
define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalias %b) vscale_range(1, 16) #0 {
611611
; CHECK-LABEL: @trip_count_vscale_no_epilogue_iterations(
612-
; CHECK-NEXT: iter.check:
612+
; CHECK-NEXT: entry:
613613
; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
614614
; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024
615-
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
616-
; CHECK: vector.main.loop.iter.check:
617-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
618-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
619-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
620-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
615+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
621616
; CHECK: vector.ph:
622-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
623-
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
624-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
617+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
618+
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
619+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
625620
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
626621
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
627622
; CHECK: vector.body:

0 commit comments

Comments
 (0)