1010target triple = "aarch64-linux-gnu"
1111
1212; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16'
13- ; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
14- ; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
13+ ; DEBUG: Executing best plan with VF=vscale x 16, UF=2
1514
1615; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_16'
1716; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced.
@@ -20,13 +19,11 @@ target triple = "aarch64-linux-gnu"
2019
2120define void @main_vf_vscale_x_16 (ptr %A ) #0 {
2221; CHECK-LABEL: @main_vf_vscale_x_16(
23- ; CHECK-NEXT: iter.check:
24- ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
25- ; CHECK: vector.main.loop.iter.check:
22+ ; CHECK-NEXT: entry:
2623; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
2724; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5
2825; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
29- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH :%.*]], label [[VECTOR_PH:%.*]]
26+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH :%.*]], label [[VECTOR_PH:%.*]]
3027; CHECK: vector.ph:
3128; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
3229; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32
@@ -150,7 +147,7 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) {
150147; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP5]], align 1
151148; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
152149; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
153- ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
150+ ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
154151; CHECK: middle.block:
155152; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
156153; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
@@ -261,7 +258,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
261258; CHECK-NEXT: store <vscale x 2 x i64> splat (i64 1), ptr [[TMP7]], align 1
262259; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
263260; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
264- ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
261+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
265262; CHECK: middle.block:
266263; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
267264; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -279,7 +276,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) {
279276; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1
280277; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8
281278; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]]
282- ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
279+ ; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
283280; CHECK: vec.epilog.middle.block:
284281; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
285282; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -382,7 +379,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
382379; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[TMP6]], align 1
383380; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
384381; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
385- ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
382+ ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10 :![0-9]+]]
386383; CHECK: middle.block:
387384; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]]
388385; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -400,7 +397,7 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 {
400397; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1
401398; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8
402399; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000
403- ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
400+ ; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
404401; CHECK: vec.epilog.middle.block:
405402; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
406403; CHECK: vec.epilog.scalar.ph:
@@ -482,16 +479,18 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
482479; CHECK-NEXT: iter.check:
483480; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
484481; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1033
482+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
483+ ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1
485484; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
486485; CHECK: vector.main.loop.iter.check:
487- ; CHECK-NEXT: [[TMP0 :%.*]] = call i64 @llvm.vscale.i64()
488- ; CHECK-NEXT: [[TMP1 :%.*]] = shl nuw i64 [[TMP0 ]], 3
489- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1 ]]
486+ ; CHECK-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
487+ ; CHECK-NEXT: [[TMP3 :%.*]] = shl nuw i64 [[TMP2 ]], 3
488+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP3 ]]
490489; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
491490; CHECK: vector.ph:
492- ; CHECK-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
493- ; CHECK-NEXT: [[TMP3 :%.*]] = mul nuw i64 [[TMP2 ]], 8
494- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3 ]]
491+ ; CHECK-NEXT: [[TMP4 :%.*]] = call i64 @llvm.vscale.i64()
492+ ; CHECK-NEXT: [[TMP5 :%.*]] = mul nuw i64 [[TMP4 ]], 8
493+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5 ]]
495494; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
496495; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
497496; CHECK: vector.body:
@@ -523,20 +522,22 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1
523522; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
524523; CHECK: vec.epilog.ph:
525524; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
526- ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
525+ ; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
526+ ; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2
527+ ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], [[TMP23]]
527528; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
528529; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
529530; CHECK: vec.epilog.vector.body:
530531; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
531- ; CHECK-NEXT: [[TMP18 :%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
532- ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x float>, ptr [[TMP18 ]], align 4
533- ; CHECK-NEXT: [[TMP19 :%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
534- ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP19 ]], align 4
535- ; CHECK-NEXT: [[TMP20 :%.*]] = fmul <2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
536- ; CHECK-NEXT: store <2 x float> [[TMP20 ]], ptr [[TMP19 ]], align 4
537- ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
538- ; CHECK-NEXT: [[TMP21 :%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
539- ; CHECK-NEXT: br i1 [[TMP21 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16 :![0-9]+]]
532+ ; CHECK-NEXT: [[TMP24 :%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX6]]
533+ ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 2 x float>, ptr [[TMP24 ]], align 4
534+ ; CHECK-NEXT: [[TMP25 :%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX6]]
535+ ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 2 x float>, ptr [[TMP25 ]], align 4
536+ ; CHECK-NEXT: [[TMP26 :%.*]] = fmul <vscale x 2 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
537+ ; CHECK-NEXT: store <vscale x 2 x float> [[TMP26 ]], ptr [[TMP25 ]], align 4
538+ ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], [[TMP23]]
539+ ; CHECK-NEXT: [[TMP27 :%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]]
540+ ; CHECK-NEXT: br i1 [[TMP27 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
540541; CHECK: vec.epilog.middle.block:
541542; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]
542543; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -606,22 +607,16 @@ exit:
606607}
607608
608609; Loop with vscale-based trip count vscale x 1024.
609- ; TODO: No epilogue vectorizations should remain when choosing VF = vscale x 4.
610610define void @trip_count_vscale_no_epilogue_iterations (ptr noalias %a , ptr noalias %b ) vscale_range(1 , 16 ) #0 {
611611; CHECK-LABEL: @trip_count_vscale_no_epilogue_iterations(
612- ; CHECK-NEXT: iter.check :
612+ ; CHECK-NEXT: entry :
613613; CHECK-NEXT: [[V:%.*]] = tail call i64 @llvm.vscale.i64()
614614; CHECK-NEXT: [[N:%.*]] = mul nuw nsw i64 [[V]], 1024
615- ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
616- ; CHECK: vector.main.loop.iter.check:
617- ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
618- ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3
619- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]]
620- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
615+ ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
621616; CHECK: vector.ph:
622- ; CHECK-NEXT: [[TMP2 :%.*]] = call i64 @llvm.vscale.i64()
623- ; CHECK-NEXT: [[TMP3 :%.*]] = mul nuw i64 [[TMP2 ]], 8
624- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3 ]]
617+ ; CHECK-NEXT: [[TMP0 :%.*]] = call i64 @llvm.vscale.i64()
618+ ; CHECK-NEXT: [[TMP1 :%.*]] = mul nuw i64 [[TMP0 ]], 8
619+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1 ]]
625620; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
626621; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
627622; CHECK: vector.body:
0 commit comments