@@ -40,27 +40,12 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
4040; CHECK-NEXT: store <vscale x 16 x i8> splat (i8 1), ptr [[TMP7]], align 1
4141; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
4242; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
43- ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK :%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
43+ ; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_ITER_CHECK :%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4444; CHECK: middle.block:
4545; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
46- ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
47- ; CHECK: vec.epilog.iter.check:
48- ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
49- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
50- ; CHECK: vec.epilog.ph:
51- ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
52- ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
53- ; CHECK: vec.epilog.vector.body:
54- ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
55- ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]]
56- ; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1
57- ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
58- ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
59- ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
60- ; CHECK: vec.epilog.middle.block:
61- ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
62- ; CHECK: vec.epilog.scalar.ph:
63- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
46+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
47+ ; CHECK: scalar.ph:
48+ ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ]
6449; CHECK-NEXT: br label [[FOR_BODY:%.*]]
6550; CHECK: for.body:
6651;
@@ -94,7 +79,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
9479; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
9580; CHECK-VF8: vec.epilog.iter.check:
9681; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
97- ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
82+ ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
9883; CHECK-VF8: vec.epilog.ph:
9984; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
10085; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -185,7 +170,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
185170; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
186171; CHECK-VF8: vec.epilog.iter.check:
187172; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
188- ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
173+ ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
189174; CHECK-VF8: vec.epilog.ph:
190175; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
191176; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
@@ -264,7 +249,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
264249; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
265250; CHECK: vec.epilog.iter.check:
266251; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
267- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
252+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]]
268253; CHECK: vec.epilog.ph:
269254; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
270255; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
@@ -316,7 +301,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
316301; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
317302; CHECK-VF8: vec.epilog.iter.check:
318303; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
319- ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
304+ ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
320305; CHECK-VF8: vec.epilog.ph:
321306; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
322307; CHECK-VF8-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8
@@ -386,7 +371,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
386371; CHECK: vec.epilog.iter.check:
387372; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
388373; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
389- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
374+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7]]
390375; CHECK: vec.epilog.ph:
391376; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
392377; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000
@@ -437,7 +422,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
437422; CHECK-VF8: vec.epilog.iter.check:
438423; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
439424; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8
440- ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
425+ ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
441426; CHECK-VF8: vec.epilog.ph:
442427; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
443428; CHECK-VF8-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000
@@ -479,8 +464,6 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
479464; CHECK-NEXT: iter.check:
480465; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
481466; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033
482- ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
483- ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
484467; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
485468; CHECK: vector.main.loop.iter.check:
486469; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
@@ -495,11 +478,11 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
495478; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
496479; CHECK: vector.body:
497480; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
498- ; CHECK-NEXT: [[TMP4 :%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
499- ; CHECK-NEXT: [[TMP5 :%.*]] = call i64 @llvm.vscale.i64()
500- ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5 ]], 2
501- ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4 ]], i64 [[TMP6]]
502- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP4 ]], align 4
481+ ; CHECK-NEXT: [[TMP14 :%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
482+ ; CHECK-NEXT: [[TMP15 :%.*]] = call i64 @llvm.vscale.i64()
483+ ; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP15 ]], 2
484+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14 ]], i64 [[TMP6]]
485+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP14 ]], align 4
503486; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
504487; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[INDEX]]
505488; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -511,31 +494,29 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
511494; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
512495; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
513496; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
514- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3 ]]
497+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5 ]]
515498; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
516- ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
499+ ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
517500; CHECK: middle.block:
518501; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
519502; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
520503; CHECK: vec.epilog.iter.check:
521504; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
522- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
505+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]]
523506; CHECK: vec.epilog.ph:
524507; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
525- ; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
526- ; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2
527- ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], [[TMP23]]
508+ ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
528509; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
529510; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
530511; CHECK: vec.epilog.vector.body:
531512; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
532513; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
533- ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 2 x float>, ptr [[TMP24]], align 4
514+ ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP24]], align 4
534515; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
535- ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP25]], align 4
536- ; CHECK-NEXT: [[TMP26 :%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
537- ; CHECK-NEXT: store <vscale x 2 x float> [[TMP26 ]], ptr [[TMP25]], align 4
538- ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], [[TMP23]]
516+ ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP25]], align 4
517+ ; CHECK-NEXT: [[TMP20 :%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
518+ ; CHECK-NEXT: store <2 x float> [[TMP20 ]], ptr [[TMP25]], align 4
519+ ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
539520; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
540521; CHECK-NEXT: br i1 [[TMP27]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
541522; CHECK: vec.epilog.middle.block:
@@ -609,18 +590,23 @@ exit:
609590; Loop with vscale-based trip count vscale x 1024.
610591define void @trip_count_vscale_no_epilogue_iterations (ptr noalias %a , ptr noalias %b ) vscale_range(1 , 16 ) #0 {
611592; CHECK-LABEL: @trip_count_vscale_no_epilogue_iterations(
612- ; CHECK-NEXT: entry :
593+ ; CHECK-NEXT: iter.check :
613594; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
614595; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024
615596; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
597+ ; CHECK: vector.main.loop.iter.check:
598+ ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
599+ ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3
600+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP17]]
601+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
616602; CHECK: vector.ph:
617603; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
618604; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
619605; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]]
620606; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
621607; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
622608; CHECK: vector.body:
623- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH ]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
609+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1 ]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
624610; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDEX]]
625611; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
626612; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 2
@@ -637,20 +623,20 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
637623; CHECK-NEXT: [[TMP13:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD3]]
638624; CHECK-NEXT: store <vscale x 4 x float> [[TMP12]], ptr [[TMP8]], align 4
639625; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP11]], align 4
640- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3 ]]
641- ; CHECK-NEXT: [[TMP17 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
642- ; CHECK-NEXT: br i1 [[TMP17 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18 :![0-9]+]]
626+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1 ]]
627+ ; CHECK-NEXT: [[TMP14 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
628+ ; CHECK-NEXT: br i1 [[TMP14 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]]
643629; CHECK: middle.block:
644630; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
645631; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
646632; CHECK: vec.epilog.iter.check:
647633; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2
648- ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH ]], label [[VEC_EPILOG_PH]]
634+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[SCALAR_PH ]], label [[VEC_EPILOG_PH]], !prof [[PROF14 ]]
649635; CHECK: vec.epilog.ph:
650- ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK ]] ]
651- ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY :%.*]]
636+ ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH ]] ]
637+ ; CHECK-NEXT: br label [[FOR_BODY :%.*]]
652638; CHECK: vec.epilog.vector.body:
653- ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY ]] ]
639+ ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[FOR_BODY ]] ]
654640; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX4]]
655641; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP18]], align 4
656642; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX4]]
@@ -659,12 +645,12 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia
659645; CHECK-NEXT: store <2 x float> [[TMP20]], ptr [[TMP19]], align 4
660646; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 2
661647; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N]]
662- ; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY ]], !llvm.loop [[LOOP19 :![0-9]+]]
648+ ; CHECK-NEXT: br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[FOR_BODY ]], !llvm.loop [[LOOP18 :![0-9]+]]
663649; CHECK: vec.epilog.middle.block:
664- ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH ]]
650+ ; CHECK-NEXT: br i1 true, label [[EXIT]], label [[SCALAR_PH ]]
665651; CHECK: vec.epilog.scalar.ph:
666652; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
667- ; CHECK-NEXT: br label [[FOR_BODY :%.*]]
653+ ; CHECK-NEXT: br label [[FOR_BODY1 :%.*]]
668654; CHECK: for.body:
669655;
670656; CHECK-VF8-LABEL: @trip_count_vscale_no_epilogue_iterations(
0 commit comments