Skip to content

Commit 02ce6db

Browse files
committed
resolve review comments:
- Transform TC and LoopVF into same numerical space to calculate remaining iterations. Change-Id: Ibf83c3a99689d12669842ce042576d5b1217eeb6
1 parent 171124b commit 02ce6db

File tree

4 files changed

+63
-70
lines changed

4 files changed

+63
-70
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4372,22 +4372,29 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
43724372
vputils::getSCEVExprForVPValue(getPlanFor(MainLoopVF).getTripCount(), SE);
43734373
assert(!isa<SCEVCouldNotCompute>(TC) && "Trip count SCEV must be computable");
43744374

4375-
// TODO: Maybe this could be removed when SCEV can evaluate expressions with
4376-
// 'vscale'.
4377-
// If TC is multiple of vscale, try to get estimated value:
4378-
if (match(TC, m_scev_Mul(m_SCEV(), m_SCEVVScale()))) {
4379-
if (std::optional<ElementCount> BestKnownTC =
4380-
getSmallBestKnownTC(PSE, OrigLoop)) {
4381-
unsigned EstimatedRuntimeTC =
4382-
estimateElementCount(*BestKnownTC, CM.getVScaleForTuning());
4383-
TC = SE.getConstant(TCType, EstimatedRuntimeTC);
4375+
// Calculate runtime estimated value for TC/MainLoopVF only when they are in
4376+
// different numerical space.
4377+
// TODO: This is a workaround until SCEV can evaluate vscale-based
4378+
// expressions.
4379+
if (match(TC, m_scev_Mul(m_SCEV(), m_SCEVVScale())) &&
4380+
MainLoopVF.isScalable()) {
4381+
RemainingIterations =
4382+
SE.getURemExpr(TC, SE.getElementCount(TCType, MainLoopVF * IC));
4383+
} else {
4384+
if (match(TC, m_scev_Mul(m_SCEV(), m_SCEVVScale()))) {
4385+
if (std::optional<ElementCount> BestKnownTC =
4386+
getSmallBestKnownTC(PSE, OrigLoop)) {
4387+
unsigned EstimatedRuntimeTC =
4388+
estimateElementCount(*BestKnownTC, CM.getVScaleForTuning());
4389+
TC = SE.getConstant(TCType, EstimatedRuntimeTC);
4390+
}
43844391
}
4392+
RemainingIterations =
4393+
SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC));
43854394
}
4386-
RemainingIterations =
4387-
SE.getURemExpr(TC, SE.getElementCount(TCType, EstimatedRuntimeVF * IC));
43884395

43894396
// No iterations left to process in the epilogue.
4390-
if (RemainingIterations->isZero())
4397+
if (!RemainingIterations || RemainingIterations->isZero())
43914398
return Result;
43924399

43934400
if (MainLoopVF.isFixed()) {

llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ define void @cost_store_i8(ptr %dst) #0 {
3939
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]]
4040
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
4141
; DEFAULT: vec.epilog.iter.check:
42-
; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
42+
; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP15]]
4343
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
4444
; DEFAULT: vec.epilog.ph:
4545
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

Lines changed: 41 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,12 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
4040
; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP7]], align 1
4141
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
4242
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
43-
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
43+
; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4444
; CHECK: middle.block:
4545
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
46-
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
47-
; CHECK: vec.epilog.iter.check:
48-
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
49-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
50-
; CHECK: vec.epilog.ph:
51-
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
52-
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
53-
; CHECK: vec.epilog.vector.body:
54-
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
55-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]]
56-
; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
57-
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
58-
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
59-
; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
60-
; CHECK: vec.epilog.middle.block:
61-
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
62-
; CHECK: vec.epilog.scalar.ph:
63-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
46+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
47+
; CHECK: scalar.ph:
48+
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ]
6449
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
6550
; CHECK: for.body:
6651
;
@@ -94,7 +79,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
9479
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
9580
; CHECK-VF8: vec.epilog.iter.check:
9681
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
97-
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
82+
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
9883
; CHECK-VF8: vec.epilog.ph:
9984
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
10085
; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -185,7 +170,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
185170
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
186171
; CHECK-VF8: vec.epilog.iter.check:
187172
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
188-
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
173+
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
189174
; CHECK-VF8: vec.epilog.ph:
190175
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
191176
; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -264,7 +249,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
264249
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
265250
; CHECK: vec.epilog.iter.check:
266251
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
267-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
252+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]]
268253
; CHECK: vec.epilog.ph:
269254
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
270255
; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
@@ -316,7 +301,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
316301
; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
317302
; CHECK-VF8: vec.epilog.iter.check:
318303
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
319-
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
304+
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
320305
; CHECK-VF8: vec.epilog.ph:
321306
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
322307
; CHECK-VF8-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
@@ -386,7 +371,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
386371
; CHECK: vec.epilog.iter.check:
387372
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
388373
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
389-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
374+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7]]
390375
; CHECK: vec.epilog.ph:
391376
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
392377
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000
@@ -437,7 +422,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
437422
; CHECK-VF8: vec.epilog.iter.check:
438423
; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
439424
; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
440-
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
425+
; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
441426
; CHECK-VF8: vec.epilog.ph:
442427
; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
443428
; CHECK-VF8-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000
@@ -479,8 +464,6 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
479464
; CHECK-NEXT: iter.check:
480465
; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
481466
; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033
482-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
483-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
484467
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
485468
; CHECK: vector.main.loop.iter.check:
486469
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -495,11 +478,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
495478
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
496479
; CHECK: vector.body:
497480
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
498-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
499-
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
500-
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
501-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 [[TMP6]]
502-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4]], align 4
481+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
482+
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
483+
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP15]], 2
484+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i64 [[TMP6]]
485+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP14]], align 4
503486
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
504487
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
505488
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -511,31 +494,29 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
511494
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
512495
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
513496
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
514-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
497+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
515498
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
516-
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
499+
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
517500
; CHECK: middle.block:
518501
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
519502
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
520503
; CHECK: vec.epilog.iter.check:
521504
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
522-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
505+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]]
523506
; CHECK: vec.epilog.ph:
524507
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
525-
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
526-
; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2
527-
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], [[TMP23]]
508+
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
528509
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
529510
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
530511
; CHECK: vec.epilog.vector.body:
531512
; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
532513
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
533-
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 2 x float>, ptr [[TMP24]], align 4
514+
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP24]], align 4
534515
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
535-
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP25]], align 4
536-
; CHECK-NEXT: [[TMP26:%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
537-
; CHECK-NEXT: store <vscale x 2 x float> [[TMP26]], ptr [[TMP25]], align 4
538-
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], [[TMP23]]
516+
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP25]], align 4
517+
; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
518+
; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[TMP25]], align 4
519+
; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
539520
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
540521
; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
541522
; CHECK: vec.epilog.middle.block:
@@ -609,18 +590,23 @@ exit:
609590
; Loop with vscale-based trip count vscale x 1024.
610591
define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalias %b) vscale_range(1, 16) #0 {
611592
; CHECK-LABEL: @trip_count_vscale_no_epilogue_iterations(
612-
; CHECK-NEXT: entry:
593+
; CHECK-NEXT: iter.check:
613594
; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
614595
; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024
615596
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
597+
; CHECK: vector.main.loop.iter.check:
598+
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
599+
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3
600+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP17]]
601+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
616602
; CHECK: vector.ph:
617603
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
618604
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
619605
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
620606
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
621607
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
622608
; CHECK: vector.body:
623-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
609+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
624610
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
625611
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
626612
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
@@ -637,20 +623,20 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
637623
; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
638624
; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
639625
; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
640-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
641-
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
642-
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
626+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
627+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
628+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
643629
; CHECK: middle.block:
644630
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
645631
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
646632
; CHECK: vec.epilog.iter.check:
647633
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
648-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
634+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14]]
649635
; CHECK: vec.epilog.ph:
650-
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
651-
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
636+
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
637+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
652638
; CHECK: vec.epilog.vector.body:
653-
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
639+
; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[FOR_BODY]] ]
654640
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX4]]
655641
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP18]], align 4
656642
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX4]]
@@ -659,12 +645,12 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
659645
; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[TMP19]], align 4
660646
; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 2
661647
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N]]
662-
; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
648+
; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
663649
; CHECK: vec.epilog.middle.block:
664-
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
650+
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[SCALAR_PH]]
665651
; CHECK: vec.epilog.scalar.ph:
666652
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
667-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
653+
; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
668654
; CHECK: for.body:
669655
;
670656
; CHECK-VF8-LABEL: @trip_count_vscale_no_epilogue_iterations(

0 commit comments

Comments
 (0)