-
Notifications
You must be signed in to change notification settings - Fork 15.1k
LV]: consider scalable VF during deciding dead epilogue. #156724
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4407,6 +4407,13 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor( | |
| LLVM_DEBUG(dbgs() << "LEV: Maximum Trip Count for Epilogue: " | ||
| << MaxTripCount << "\n"); | ||
| } | ||
| // Check if the RemainingIterations is scalable. | ||
| const SCEV *KnownMinRemIter = nullptr, *EstimatedRemIter = nullptr; | ||
| bool ScalableRemIter = match(RemainingIterations, m_scev_c_Mul(m_SCEV(KnownMinRemIter), m_SCEVVScale())); | ||
| if (ScalableRemIter) | ||
| EstimatedRemIter = SE.getMulExpr( | ||
| KnownMinRemIter, | ||
| SE.getConstant(TCType, CM.getVScaleForTuning().value_or(1))); | ||
|
|
||
| for (auto &NextVF : ProfitableVFs) { | ||
| // Skip candidate VFs without a corresponding VPlan. | ||
|
|
@@ -4425,11 +4432,25 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor( | |
|
|
||
| // If NextVF is greater than the number of remaining iterations, the | ||
| // epilogue loop would be dead. Skip such factors. | ||
| if (RemainingIterations && !NextVF.Width.isScalable()) { | ||
| if (SE.isKnownPredicate( | ||
| CmpInst::ICMP_UGT, | ||
| SE.getConstant(TCType, NextVF.Width.getFixedValue()), | ||
| RemainingIterations)) | ||
| if (ScalableRemIter == NextVF.Width.isScalable()) { | ||
| if (SE.isKnownPredicate(CmpInst::ICMP_UGT, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given that you're asking the same thing each time, is it worth creating a small lambda function to do the work? For example, and then calling this lambda function each time? |
||
| SE.getElementCount(TCType, NextVF.Width), | ||
| RemainingIterations)) | ||
| continue; | ||
| } | ||
| // Handle the case where NextVF and RemainingIterations are in different | ||
| // numerical spaces. | ||
| else if (NextVF.Width.isScalable()) { | ||
| ElementCount EstimatedRuntimeNextVF = ElementCount::getFixed( | ||
| estimateElementCount(NextVF.Width, CM.getVScaleForTuning())); | ||
| if (SE.isKnownPredicate(CmpInst::ICMP_UGT, | ||
| SE.getElementCount(TCType, EstimatedRuntimeNextVF), | ||
| RemainingIterations)) | ||
| continue; | ||
| } else { | ||
| if (SE.isKnownPredicate(CmpInst::ICMP_UGT, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can combine the if with the else, i.e. |
||
| SE.getElementCount(TCType, NextVF.Width), | ||
| EstimatedRemIter)) | ||
| continue; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,13 +9,8 @@ target triple = "aarch64-linux-gnu" | |
| define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 { | ||
| ; CHECK-LABEL: define i64 @vector_loop_with_remaining_iterations( | ||
| ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { | ||
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 | ||
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]] | ||
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] | ||
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: | ||
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] | ||
| ; CHECK: [[VECTOR_PH]]: | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer | ||
|
|
@@ -39,58 +34,12 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 | |
| ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] | ||
| ; CHECK: [[MIDDLE_BLOCK]]: | ||
| ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]]) | ||
| ; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice!! |
||
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: | ||
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP1]] | ||
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] | ||
| ; CHECK: [[VEC_EPILOG_PH]]: | ||
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 2 | ||
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 17, [[TMP17]] | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 | ||
| ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]] | ||
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]] | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 | ||
| ; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false) | ||
| ; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP26:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1) | ||
| ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP26]] | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] | ||
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: | ||
| ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <vscale x 2 x i64> [ [[TMP22]], %[[VEC_EPILOG_PH]] ], [ [[TMP35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3 | ||
| ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 [[TMP28]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison) | ||
| ; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32> | ||
| ; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]]) | ||
| ; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]]) | ||
| ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]] | ||
| ; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1 | ||
| ; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64> | ||
| ; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]] | ||
| ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]] | ||
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]] | ||
| ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]] | ||
| ; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: | ||
| ; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[TMP35]]) | ||
| ; CHECK-NEXT: br label %[[VEC_EPILOG_SCALAR_PH]] | ||
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: | ||
| ; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP37]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] | ||
| ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] | ||
| ; CHECK: [[SCALAR_PH]]: | ||
| ; CHECK-NEXT: br label %[[LOOP:.*]] | ||
| ; CHECK: [[LOOP]]: | ||
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 | ||
| ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 | ||
| ; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 | ||
|
|
@@ -104,7 +53,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 | |
| ; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] | ||
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 | ||
| ; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 | ||
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] | ||
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] | ||
| ; CHECK: [[EXIT]]: | ||
| ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]] | ||
|
|
@@ -140,13 +89,8 @@ exit: | |
| define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 { | ||
| ; CHECK-LABEL: define i64 @main_vector_loop_fixed_with_no_remaining_iterations( | ||
| ; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { | ||
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 | ||
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 17, [[TMP1]] | ||
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] | ||
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: | ||
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] | ||
| ; CHECK-NEXT: [[ENTRY:.*:]] | ||
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] | ||
| ; CHECK: [[VECTOR_PH]]: | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer | ||
|
|
@@ -167,61 +111,15 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no | |
| ; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]] | ||
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 | ||
| ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 | ||
| ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] | ||
| ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] | ||
| ; CHECK: [[MIDDLE_BLOCK]]: | ||
| ; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]]) | ||
| ; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]] | ||
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: | ||
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ule i64 1, [[TMP1]] | ||
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] | ||
| ; CHECK: [[VEC_EPILOG_PH]]: | ||
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() | ||
| ; CHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 2 | ||
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 17, [[TMP17]] | ||
| ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 | ||
| ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i64 [[TMP17]], i64 [[N_MOD_VF]] | ||
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 17, [[TMP19]] | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[X]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <vscale x 2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0 | ||
| ; CHECK-NEXT: [[TMP24:%.*]] = call <vscale x 2 x i32> @llvm.abs.nxv2i32(<vscale x 2 x i32> [[BROADCAST_SPLAT2]], i1 false) | ||
| ; CHECK-NEXT: [[TMP25:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64() | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP38:%.*]] = mul <vscale x 2 x i64> [[TMP25]], splat (i64 1) | ||
| ; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> [[BROADCAST_SPLAT4]], [[TMP38]] | ||
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP17]], i64 0 | ||
| ; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] | ||
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: | ||
| ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <vscale x 2 x i64> [ [[TMP22]], %[[VEC_EPILOG_PH]] ], [ [[TMP35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] | ||
| ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], <vscale x 2 x i64> [[VEC_IND]], i32 0, i64 3 | ||
| ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> align 1 [[TMP28]], <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i8> poison) | ||
| ; CHECK-NEXT: [[TMP29:%.*]] = zext <vscale x 2 x i8> [[WIDE_MASKED_GATHER]] to <vscale x 2 x i32> | ||
| ; CHECK-NEXT: [[TMP30:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP29]]) | ||
| ; CHECK-NEXT: [[TMP31:%.*]] = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> [[TMP24]], <vscale x 2 x i32> [[TMP30]]) | ||
| ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]] | ||
| ; CHECK-NEXT: store <vscale x 2 x i8> zeroinitializer, ptr [[TMP32]], align 1 | ||
| ; CHECK-NEXT: [[TMP34:%.*]] = zext <vscale x 2 x i32> [[TMP31]] to <vscale x 2 x i64> | ||
| ; CHECK-NEXT: [[TMP35]] = or <vscale x 2 x i64> [[VEC_PHI8]], [[TMP34]] | ||
| ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]] | ||
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT6]] | ||
| ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]] | ||
| ; CHECK-NEXT: br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] | ||
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: | ||
| ; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> [[TMP35]]) | ||
| ; CHECK-NEXT: br label %[[VEC_EPILOG_SCALAR_PH]] | ||
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: | ||
| ; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] | ||
| ; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i64 [ [[TMP37]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] | ||
| ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] | ||
| ; CHECK: [[SCALAR_PH]]: | ||
| ; CHECK-NEXT: br label %[[LOOP:.*]] | ||
| ; CHECK: [[LOOP]]: | ||
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 | ||
| ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 | ||
| ; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 | ||
|
|
@@ -235,7 +133,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no | |
| ; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] | ||
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 | ||
| ; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 | ||
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] | ||
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] | ||
| ; CHECK: [[EXIT]]: | ||
| ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] | ||
| ; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]] | ||
|
|
@@ -308,7 +206,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( | |
| ; CHECK-NEXT: store i64 0, ptr [[L]], align 8 | ||
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 | ||
| ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 | ||
| ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] | ||
| ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] | ||
| ; CHECK: [[MIDDLE_BLOCK]]: | ||
| ; CHECK-NEXT: br label %[[SCALAR_PH:.*]] | ||
| ; CHECK: [[SCALAR_PH]]: | ||
|
|
@@ -332,7 +230,7 @@ define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( | |
| ; CHECK-NEXT: store i64 0, ptr [[L]], align 8 | ||
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 2 | ||
| ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV1]], 14 | ||
| ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP11:![0-9]+]] | ||
| ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]] | ||
| ; CHECK: [[EXIT]]: | ||
| ; CHECK-NEXT: ret void | ||
| ; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we actually need this code? Unless I've missed something the only way
RemainingIterationscan be scalable is if bothScalableTC==trueandMainLoopVF.isScalable()==true. Can't you just writeFor all other calculations we are either operating on fixed-width only or we have used estimates for scalable versions.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah we still need this code.
We need to calculate the
EstimatedRemIterto compare it against the fixed-width VFs.The dependency on the fact that
ScalableTC==trueandMainLoopVF.isScalable()==truedoesn't work for cases where SCEV can't get theKnownMinRemIterto calculateEstimatedRemIter, cases like when theRemainingIteris justvscale.I could add a TODO here to simplify it as your suggestion, when SCEV can be able to handle scale_based expressions that are not in the standard form of:
var x vscaleThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK so if both
ScalableTC==trueandMainLoopVF.isScalable()==truethen we knowRemainingIterationsmust be a multiple of vscale. If we cannot calculateEstimatedRemIterbecause RemainingIterations!=var x vscale then in the new code below we will end up comparing a fixed value with vscale.When
RemainingIterationsisn't in the formvar x vscalewhat does the expression look like? If it's justRemainingIterations=vscalethen presumably we can support that fairly easily here too?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it's just vscale, it could be handled by doing this:
But I chose not to do that because that is a handling for a single case that SCEV can't handle, not generic handling for scale-based complex forms that SCEV can't handle.
But also I don't have strong opinion here, what do you think ?