diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d68a26251ac9d..45046a3871fb1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9076,9 +9076,11 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { DebugLoc DL = getDebugLocFromInstOrOperands(Legal->getPrimaryInduction()); TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow); - // When not folding the tail, we know that the induction increment will not - // overflow. - bool HasNUW = Style == TailFoldingStyle::None; + // Use NUW for the induction increment if we proved that it won't overflow in + // the vector loop or when not folding the tail. In the later case, we know + // that the canonical induction increment will not overflow as the vector trip + // count is >= increment and a multiple of the increment. + bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None; addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW, DL); VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 1acc400a2c9ff..dff0bbe59a706 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -455,7 +455,7 @@ define void @latch_branch_cost(ptr %dst) { ; PRED-NEXT: store i8 0, ptr [[TMP24]], align 1 ; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; PRED: pred.store.continue14: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104 ; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -943,7 +943,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]] ; DEFAULT: pred.store.continue14: ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -1045,7 +1045,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; PRED-NEXT: br label [[PRED_STORE_CONTINUE14]] ; PRED: pred.store.continue14: ; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; PRED-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; PRED: middle.block: ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -1593,7 +1593,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; DEFAULT-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4 ; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]] ; DEFAULT: pred.store.continue6: -; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; DEFAULT-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1656,7 +1656,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize { ; PRED-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4 ; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]] ; PRED: pred.store.continue6: -; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; PRED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; PRED-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll index 6011ccfd15614..af43b747c0043 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -18,7 +18,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]]) ; CHECK-NEXT: [[TMP2]] = add i32 [[TMP1]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -71,7 +71,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD1]]) ; CHECK-NEXT: [[TMP8]] = add i32 [[TMP7]], [[TMP6]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -124,7 +124,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -176,7 +176,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP4]] = and <4 x i32> [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -228,7 +228,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4]] = or <4 x i32> [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -280,7 +280,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD1]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4]] = xor <4 x i32> [[VEC_PHI]], [[TMP3]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -332,7 +332,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -384,7 +384,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll index 832d4db53036f..6f78982d7ab02 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll @@ -33,7 +33,7 @@ define i32 @mla_i32(ptr noalias nocapture readonly %A, ptr noalias nocapture rea ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -116,7 +116,7 @@ define i32 @mla_i8(ptr noalias nocapture readonly %A, ptr noalias nocapture read ; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -193,7 +193,7 @@ define i32 @add_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -259,7 +259,7 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -326,7 +326,7 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -393,7 +393,7 @@ define i32 @or_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: @@ -460,7 +460,7 @@ define i32 @xor_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison) ; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: @@ -527,7 +527,7 @@ define float @fadd_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison) ; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: @@ -594,7 +594,7 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP2]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> poison) ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP4]] = select fast <4 x i1> [[ACTIVE_LANE_MASK]], <4 x float> [[TMP3]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll index c115c91cff896..baad7a84a891a 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -255,7 +255,7 @@ define i32 @add_i32_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> zeroinitializer) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]]) ; CHECK-NEXT: [[TMP2]] = add i32 [[TMP1]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -301,7 +301,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP1]], <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP4]] = add i32 [[TMP3]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -348,7 +348,7 @@ define i32 @add_i8_i32(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP1]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP2]]) ; CHECK-NEXT: [[TMP4]] = add i32 [[TMP3]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -393,7 +393,7 @@ define signext i16 @add_i16_i16(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[TMP0]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]]) ; CHECK-NEXT: [[TMP2]] = add i16 [[TMP1]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -439,7 +439,7 @@ define signext i16 @add_i8_i16(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i16> [[TMP1]], <16 x i16> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[TMP2]]) ; CHECK-NEXT: [[TMP4]] = add i16 [[TMP3]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -484,7 +484,7 @@ define zeroext i8 @add_i8_i8(ptr nocapture readonly %x, i32 %n) #0 { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[TMP0]], i32 1, <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> zeroinitializer) ; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[WIDE_MASKED_LOAD]]) ; CHECK-NEXT: [[TMP2]] = add i8 [[TMP1]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -810,7 +810,7 @@ define i32 @mla_i32_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i3 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -863,7 +863,7 @@ define i32 @mla_i16_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i3 ; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP4]], <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -918,7 +918,7 @@ define i32 @mla_i8_i32(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 ; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -971,7 +971,7 @@ define signext i16 @mla_i16_i16(ptr nocapture readonly %x, ptr nocapture readonl ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> [[TMP2]], <8 x i16> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP3]]) ; CHECK-NEXT: [[TMP5]] = add i16 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -1024,7 +1024,7 @@ define signext i16 @mla_i8_i16(ptr nocapture readonly %x, ptr nocapture readonly ; CHECK-NEXT: [[TMP5:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i16> [[TMP4]], <16 x i16> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[TMP5]]) ; CHECK-NEXT: [[TMP7]] = add i16 [[TMP6]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -1077,7 +1077,7 @@ define zeroext i8 @mla_i8_i8(ptr nocapture readonly %x, ptr nocapture readonly % ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i8> [[TMP2]], <16 x i8> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> [[TMP3]]) ; CHECK-NEXT: [[TMP5]] = add i8 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: for.cond.cleanup: @@ -1417,7 +1417,7 @@ define i32 @mla_i8_i32_multiuse(ptr nocapture readonly %x, ptr nocapture readonl ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i32> [[TMP2]], <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP3]]) ; CHECK-NEXT: [[TMP5]] = add i32 [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: for.cond.cleanup: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll index ed6254074c8b5..e9642a55bc341 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll @@ -345,7 +345,7 @@ define void @float(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0({{.*}}%active.lane.mask ; PREFER-FOLDING: call <4 x float> @llvm.masked.load.v4f32.p0({{.*}}%active.lane.mask ; PREFER-FOLDING: call void @llvm.masked.store.v4f32.p0({{.*}}%active.lane.mask -; PREFER-FOLDING: %index.next = add i32 %index, 4 +; PREFER-FOLDING: %index.next = add nuw i32 %index, 4 ; PREFER-FOLDING: br i1 %{{.*}}, label %{{.*}}, label %vector.body entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll index 77cc3b70edb93..573a9f75f3054 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll @@ -50,7 +50,7 @@ define dso_local void @predicate_loop_hint(ptr noalias nocapture %A, ptr noalias ; CHECK: %[[WML2:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}<4 x i1> %active.lane.mask ; CHECK: %[[ADD:.*]] = add nsw <4 x i32> %[[WML2]], %[[WML1]] ; CHECK: call void @llvm.masked.store.v4i32.p0(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask -; CHECK: %index.next = add i64 %index, 4 +; CHECK: %index.next = add nuw i64 %index, 4 ; CHECK: br i1 %{{.*}}, label %{{.*}}, label %vector.body, !llvm.loop [[VEC_LOOP2:![0-9]+]] ; ; CHECK: br i1 %{{.*}}, label %{{.*}}, label %for.body, !llvm.loop [[SCALAR_LOOP2:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll index 8f3cfc5b20ee9..f0464a085d390 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll @@ -27,7 +27,7 @@ define dso_local void @flag_overrules_hint(ptr noalias nocapture %A, ptr noalias ; PREDFLAG: %wide.masked.load1 = call <4 x i32> @llvm.masked.load.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask ; PREDFLAG: %{{.*}} = add nsw <4 x i32> %wide.masked.load1, %wide.masked.load ; PREDFLAG: call void @llvm.masked.store.v4i32.p0({{.*}}, <4 x i1> %active.lane.mask -; PREDFLAG: %index.next = add i64 %index, 4 +; PREDFLAG: %index.next = add nuw i64 %index, 4 ; PREDFLAG: %[[CMP:.*]] = icmp eq i64 %index.next, 432 ; PREDFLAG: br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0 entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 8368022bceed9..c09b2de500d8c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -146,8 +146,8 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext [[VP_OP_LOAD]] to ; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv4i32( [[VEC_PHI]], [[TMP9]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP5]]) -; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP5]], [[EVL_BASED_IV]] -; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]] +; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]] +; IF-EVL-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-OUTLOOP: middle.block: @@ -203,8 +203,8 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = sext [[VP_OP_LOAD]] to ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, [[TMP9]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]] -; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP5]], [[EVL_BASED_IV]] -; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]] +; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]] +; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL-INLOOP: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index 44c5348296dea..5c5600b9cfdf8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -69,7 +69,7 @@ define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0( [[TMP13]], ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -138,7 +138,7 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0( [[TMP13]], ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -207,7 +207,7 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0( [[TMP13]], ptr [[TMP12]], i32 1, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 02b7ce3795ad5..305d26d7f3bc1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -55,7 +55,7 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index aa7b7a5676eef..3c35c272d3389 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -31,7 +31,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -95,7 +95,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -158,7 +158,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP11]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -225,7 +225,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -282,7 +282,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: @@ -375,7 +375,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP10:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP10]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index 1583e071fe7bc..375278eea38f9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -22,7 +22,7 @@ define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_ran ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i32.p0(ptr [[TMP5]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP6:%.*]] = add nsw [[WIDE_MASKED_LOAD]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.store.nxv1i32.p0( [[TMP6]], ptr [[TMP5]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -81,7 +81,7 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]], poison) ; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[WIDE_MASKED_LOAD]], splat (i32 1) ; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP8]], ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index d0e14e5ba44c7..bfdcfbf5139b3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -175,7 +175,7 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 [[TMP1]], i32 2) ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer ; CHECK-NEXT: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> zeroinitializer, <2 x ptr> [[BROADCAST_SPLAT4]], i32 1, <2 x i1> [[TMP3]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 30f1f0ca75e1d..9bba6e4376612 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -111,7 +111,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -145,7 +145,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -452,7 +452,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -497,7 +497,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -648,7 +648,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -682,7 +682,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -819,7 +819,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -853,7 +853,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -1015,7 +1015,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1076,7 +1076,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -1255,7 +1255,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP9]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1300,7 +1300,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -1449,7 +1449,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-SCALABLE: middle.block: @@ -1483,7 +1483,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-FIXEDLEN: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll index f18a098e162a8..40ae96f378b6c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll @@ -47,8 +47,8 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -141,8 +141,8 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -235,8 +235,8 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -329,8 +329,8 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -423,8 +423,8 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -517,8 +517,8 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -611,8 +611,8 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -705,8 +705,8 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -799,8 +799,8 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -893,8 +893,8 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -987,8 +987,8 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -1081,8 +1081,8 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -1175,8 +1175,8 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP17]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -1272,8 +1272,8 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -1367,8 +1367,8 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -1462,8 +1462,8 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -1557,8 +1557,8 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: @@ -1705,8 +1705,8 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] ; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; IF-EVL: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll index fbc4ad0bac50a..dc74f44688fdf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-known-no-overflow.ll @@ -190,8 +190,8 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[VP_OP:%.*]] = call @llvm.vp.add.nxv2i64( [[VP_OP_LOAD]], splat (i64 1), splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 -; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index 79471ab21dc78..209c251f6a6a4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -50,8 +50,8 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: @@ -166,8 +166,8 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP28:%.*]] = zext i32 [[TMP5]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP28]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; IF-EVL-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll index 6c2c2612fe2d4..313511d017f65 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll @@ -40,8 +40,8 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP_LOAD]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: @@ -360,8 +360,8 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP11]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP_LOAD]], ptr align 32 [[TMP12]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: @@ -445,8 +445,8 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 ; IF-EVL-NEXT: call void @llvm.vp.store.nxv1i64.p0( [[VP_OP_LOAD]], ptr align 32 [[TMP6]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]] +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll index 107ae50461296..385f7621801b4 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll @@ -33,7 +33,7 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) { ; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; CHECK: [[PRED_STORE_CONTINUE2]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10 ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll index f828b74bc8bf6..98245fcc0f0af 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll @@ -136,7 +136,7 @@ define void @test(ptr %p, i40 %a) { ; CHECK-NEXT: store i1 [[TMP40]], ptr [[P]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]] ; CHECK: pred.store.continue32: -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index 699b8487a065a..d0754f1c2bb55 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -58,7 +58,7 @@ define void @func_21() { ; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.continue4: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll index 64e57747bf2ae..e685a83d9ccbb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll @@ -102,7 +102,7 @@ attributes #0 = { "target-cpu"="knl" } ; FORCE-NEXT: store i32 [[TMP1]], ptr @b, align 1 ; FORCE-NEXT: br label [[PRED_STORE_CONTINUE4]] ; FORCE: pred.store.continue2: -; FORCE-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; FORCE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCE-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; FORCE-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; FORCE-NEXT: br i1 [[TMP15]], label {{%.*}}, label [[VECTOR_BODY]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 9f6362b361685..f8de81f43afc5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1052,7 +1052,7 @@ define i64 @live_in_known_1_via_scev() { ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index bc74742d952ab..f6c3219c90a98 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -637,7 +637,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) { ; CHECK-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll index a12aed7239c9d..597be33ca63f6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -28,7 +28,7 @@ define i32 @foo_optsize() #0 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1) ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -67,7 +67,7 @@ define i32 @foo_optsize() #0 { ; AUTOVF-NEXT: [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer ; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1) ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]]) -; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 +; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 ; AUTOVF-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AUTOVF: middle.block: @@ -128,7 +128,7 @@ define i32 @foo_minsize() #1 { ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1) ; CHECK-NEXT: call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -167,7 +167,7 @@ define i32 @foo_minsize() #1 { ; AUTOVF-NEXT: [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer ; AUTOVF-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1) ; AUTOVF-NEXT: call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]]) -; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 32 +; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224 ; AUTOVF-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTOVF: middle.block: @@ -383,7 +383,7 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; CHECK-NEXT: [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <64 x i32> [[VEC_IV]], [[BROADCAST_SPLAT4]] ; CHECK-NEXT: call void @llvm.masked.scatter.v64p0.v64p0(<64 x ptr> zeroinitializer, <64 x ptr> [[TMP5]], i32 8, <64 x i1> [[TMP6]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 64 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 -4608 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -429,7 +429,7 @@ define void @tail_folded_store_avx512(ptr %start, ptr %end) #3 { ; AUTOVF-NEXT: [[VEC_IV:%.*]] = add <8 x i32> [[BROADCAST_SPLAT]], ; AUTOVF-NEXT: [[TMP6:%.*]] = icmp ule <8 x i32> [[VEC_IV]], [[BROADCAST_SPLAT4]] ; AUTOVF-NEXT: call void @llvm.masked.scatter.v8p0.v8p0(<8 x ptr> zeroinitializer, <8 x ptr> [[TMP5]], i32 8, <8 x i1> [[TMP6]]) -; AUTOVF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; AUTOVF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; AUTOVF-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 -576 ; AUTOVF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; AUTOVF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 73bdae71654f1..a190e94a01489 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -35,7 +35,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 99aaecf9a073c..0a4b46b2c41fa 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -117,7 +117,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: store i32 [[X]], ptr [[TMP14]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -200,7 +200,7 @@ define void @example2(i32 %n, i32 %x) optsize { ; CHECK-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]] ; CHECK: pred.store.continue30: -; CHECK-NEXT: [[INDEX_NEXT31]] = add i64 [[INDEX20]], 4 +; CHECK-NEXT: [[INDEX_NEXT31]] = add nuw i64 [[INDEX20]], 4 ; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT31]], [[N_VEC12]] ; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK7:%.*]], label [[VECTOR_BODY19]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block7: @@ -322,7 +322,7 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP7]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]] ; CHECK: pred.store.continue20: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: @@ -508,7 +508,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: store i32 [[TMP23]], ptr [[NEXT_GEP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE17]] ; CHECK: pred.store.continue17: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll index de906598775eb..428ed94dadb89 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll @@ -28,7 +28,7 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP6]], ptr [[TMP8]], i32 4, <8 x i1> [[TMP1]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -96,7 +96,7 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP6]], ptr [[TMP8]], i32 4, <8 x i1> [[TMP1]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 432 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index cf913a4fcc7a1..90ba702ed232e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -146,7 +146,7 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl ; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP5]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP7]] ; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <8 x float> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]] ; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP6]], ptr [[TMP5]], i32 4, <8 x i1> [[TMP1]]), !llvm.access.group [[ACC_GRP7]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll index 3af3ad9ab3cf5..844fe1873bc98 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll @@ -71,7 +71,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 { ; CHECK-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <48 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = and <48 x i1> [[INTERLEAVED_MASK]], ; CHECK-NEXT: call void @llvm.masked.store.v48i8.p0(<48 x i8> , ptr [[TMP13]], i32 1, <48 x i1> [[TMP15]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10008 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll index 4174aa4e897bc..d490055b4ff39 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -485,7 +485,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]]) -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -520,7 +520,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]]) -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -689,7 +689,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]]) -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -724,7 +724,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <24 x i8> [[WIDE_MASKED_VEC]], <24 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]]) -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -989,7 +989,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc ; DISABLED_MASKED_STRIDED-NEXT: [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ] ; DISABLED_MASKED_STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; DISABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]]) -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP51]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] @@ -1021,7 +1021,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc ; ENABLED_MASKED_STRIDED-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_VEC]], <16 x i8> poison, <8 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]] ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP4]], i32 1, <8 x i1> [[TMP0]]) -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP5]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: @@ -2521,7 +2521,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP166]], ptr [[TMP165]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE62]] ; DISABLED_MASKED_STRIDED: pred.store.continue62: -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP167:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP167]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -2558,7 +2558,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly ; ENABLED_MASKED_STRIDED-NEXT: [[TMP7:%.*]] = sub <8 x i8> zeroinitializer, [[TMP6]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; ENABLED_MASKED_STRIDED-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -2955,7 +2955,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; DISABLED_MASKED_STRIDED-NEXT: store i8 [[TMP164]], ptr [[TMP163]], align 1 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE60]] ; DISABLED_MASKED_STRIDED: pred.store.continue60: -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP165]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] @@ -2990,7 +2990,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = sub <8 x i8> zeroinitializer, [[TMP4]] ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP6]], i32 1, <16 x i1> [[INTERLEAVED_MASK]]) -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP8]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll index bd54f87b50489..0d674ca2bc76a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -217,7 +217,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; DISABLED_MASKED_STRIDED-NEXT: store i16 [[TMP36]], ptr [[TMP35]], align 2 ; DISABLED_MASKED_STRIDED-NEXT: br label [[PRED_STORE_CONTINUE15]] ; DISABLED_MASKED_STRIDED: pred.store.continue15: -; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; DISABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; DISABLED_MASKED_STRIDED-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; DISABLED_MASKED_STRIDED-NEXT: [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DISABLED_MASKED_STRIDED-NEXT: br i1 [[TMP37]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -254,7 +254,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr ; ENABLED_MASKED_STRIDED-NEXT: [[INTERLEAVED_MASK:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <16 x i32> ; ENABLED_MASKED_STRIDED-NEXT: [[TMP5:%.*]] = and <16 x i1> [[INTERLEAVED_MASK]], ; ENABLED_MASKED_STRIDED-NEXT: call void @llvm.masked.store.v16i16.p0(<16 x i16> [[INTERLEAVED_VEC]], ptr [[GEP]], i32 2, <16 x i1> [[TMP5]]) -; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; ENABLED_MASKED_STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; ENABLED_MASKED_STRIDED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; ENABLED_MASKED_STRIDED-NEXT: br i1 [[TMP6]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; ENABLED_MASKED_STRIDED: for.end.loopexit: diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index c2edb54f6f3b5..e1fd07bf590c4 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -91,7 +91,7 @@ define void @pr47390(ptr %a) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index d0c811763a522..fbcd9cb95f2fc 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -66,7 +66,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): loop.1 ; CHECK-EMPTY: ; CHECK-NEXT: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -153,7 +153,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -221,7 +221,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: WIDEN ir<%add> = add ir<%rem>, ir<%recur.next> ; CHECK-NEXT: WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add> ; CHECK-NEXT: EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]>, ir<%and.red.next>, ir<%and.red> -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -336,7 +336,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -433,7 +433,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -519,7 +519,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 6c94f5bfc836a..87b0d5b83f82c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -2780,7 +2780,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[TMP47]] = add <4 x i32> [[VEC_PHI1]], [[TMP45]] ; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI1]] -; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL-NO-IC: middle.block: @@ -2847,7 +2847,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[TMP11]] = add i32 [[VEC_PHI1]], [[TMP6]] ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP10]], i32 [[VEC_PHI]] ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP11]], i32 [[VEC_PHI1]] -; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL-NO-VF: middle.block: @@ -2934,7 +2934,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]] ; SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] -; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27:![0-9]+]], !llvm.loop [[LOOP28:![0-9]+]] ; SINK-AFTER: middle.block: @@ -3147,7 +3147,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC: pred.store.continue30: ; UNROLL-NO-IC-NEXT: [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI2]] -; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] @@ -3234,7 +3234,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF: pred.store.continue7: ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = select i1 [[TMP4]], i32 [[TMP10]], i32 [[VEC_PHI]] ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = select i1 [[TMP5]], i32 [[TMP11]], i32 [[VEC_PHI2]] -; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] ; UNROLL-NO-VF: middle.block: @@ -3356,7 +3356,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: br label [[PRED_STORE_CONTINUE13]] ; SINK-AFTER: pred.store.continue13: ; SINK-AFTER-NEXT: [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] -; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]] @@ -3712,7 +3712,7 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8 +; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -3768,7 +3768,7 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4 ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll index c71ff2b9fa4c4..7170147438327 100644 --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -72,7 +72,7 @@ for.end: ; CHECK-NEXT: Successor(s): loop.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successor ; CHECK-NEXT: } diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index b59cb66b11acf..730d488119d13 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -76,7 +76,7 @@ define void @bottom_tested(ptr %p, i32 %n) { ; TAILFOLD-NEXT: store i16 0, ptr [[TMP8]], align 4 ; TAILFOLD-NEXT: br label [[PRED_STORE_CONTINUE2]] ; TAILFOLD: pred.store.continue2: -; TAILFOLD-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; TAILFOLD-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; TAILFOLD-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll index c3d8a75acaaaf..d1ad7e3f4fc0d 100644 --- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll @@ -64,7 +64,7 @@ define void @maxvf3() { ; CHECK-NEXT: store i8 7, ptr [[TMP13]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index 0f5dcca81d371..738f265b89d65 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; This test verifies that the loop vectorizer will NOT produce a tail ; loop with the optimize for size or the minimize size attributes. ; REQUIRES: asserts @@ -10,9 +11,57 @@ target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128" @tab = common global [32 x i8] zeroinitializer, align 1 define i32 @foo_optsize() #0 { -; CHECK-LABEL: @foo_optsize( -; CHECK-NOT: <2 x i8> -; CHECK-NOT: <4 x i8> +; CHECK-LABEL: define i32 @foo_optsize( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret i32 0 +; +; PGSO-LABEL: define i32 @foo_optsize( +; PGSO-SAME: ) #[[ATTR0:[0-9]+]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br label %[[FOR_BODY:.*]] +; PGSO: [[FOR_BODY]]: +; PGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; PGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; PGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; PGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; PGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; PGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; PGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; PGSO: [[FOR_END]]: +; PGSO-NEXT: ret i32 0 +; +; NPGSO-LABEL: define i32 @foo_optsize( +; NPGSO-SAME: ) #[[ATTR0:[0-9]+]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br label %[[FOR_BODY:.*]] +; NPGSO: [[FOR_BODY]]: +; NPGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; NPGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; NPGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NPGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; NPGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; NPGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; NPGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; NPGSO: [[FOR_END]]: +; NPGSO-NEXT: ret i32 0 +; entry: br label %for.body @@ -35,10 +84,57 @@ for.end: ; preds = %for.body attributes #0 = { optsize } define i32 @foo_minsize() #1 { -; CHECK-LABEL: @foo_minsize( -; CHECK-NOT: <2 x i8> -; CHECK-NOT: <4 x i8> -; CHECK-LABEL: @foo_pgso( +; CHECK-LABEL: define i32 @foo_minsize( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret i32 0 +; +; PGSO-LABEL: define i32 @foo_minsize( +; PGSO-SAME: ) #[[ATTR1:[0-9]+]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br label %[[FOR_BODY:.*]] +; PGSO: [[FOR_BODY]]: +; PGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; PGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; PGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; PGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; PGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; PGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; PGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; PGSO: [[FOR_END]]: +; PGSO-NEXT: ret i32 0 +; +; NPGSO-LABEL: define i32 @foo_minsize( +; NPGSO-SAME: ) #[[ATTR1:[0-9]+]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br label %[[FOR_BODY:.*]] +; NPGSO: [[FOR_BODY]]: +; NPGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; NPGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; NPGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NPGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; NPGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; NPGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; NPGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; NPGSO: [[FOR_END]]: +; NPGSO-NEXT: ret i32 0 +; entry: br label %for.body @@ -61,10 +157,76 @@ for.end: ; preds = %for.body attributes #1 = { minsize } define i32 @foo_pgso() !prof !14 { -; PGSO-LABEL: @foo_pgso( -; PGSO-NOT: <{{[0-9]+}} x i8> -; NPGSO-LABEL: @foo_pgso( -; NPGSO: <{{[0-9]+}} x i8> +; CHECK-LABEL: define i32 @foo_pgso( +; CHECK-SAME: ) !prof [[PROF14:![0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; CHECK-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret i32 0 +; +; PGSO-LABEL: define i32 @foo_pgso( +; PGSO-SAME: ) !prof [[PROF14:![0-9]+]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br label %[[FOR_BODY:.*]] +; PGSO: [[FOR_BODY]]: +; PGSO-NEXT: [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; PGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; PGSO-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; PGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0 +; PGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; PGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; PGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; PGSO: [[FOR_END]]: +; PGSO-NEXT: ret i32 0 +; +; NPGSO-LABEL: define i32 @foo_pgso( +; NPGSO-SAME: ) !prof [[PROF14:![0-9]+]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NPGSO: [[VECTOR_PH]]: +; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; NPGSO: [[VECTOR_BODY]]: +; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; NPGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]] +; NPGSO-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 +; NPGSO-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; NPGSO-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1) +; NPGSO-NEXT: store <4 x i8> [[TMP4]], ptr [[TMP2]], align 1 +; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; NPGSO-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 200 +; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; NPGSO: [[MIDDLE_BLOCK]]: +; NPGSO-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; NPGSO: [[SCALAR_PH]]: +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 200, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; NPGSO-NEXT: br label %[[FOR_BODY:.*]] +; NPGSO: [[FOR_BODY]]: +; NPGSO-NEXT: [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; NPGSO-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]] +; NPGSO-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; NPGSO-NEXT: [[CMP1:%.*]] = icmp eq i8 [[TMP6]], 0 +; NPGSO-NEXT: [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1 +; NPGSO-NEXT: store i8 [[DOT]], ptr [[ARRAYIDX]], align 1 +; NPGSO-NEXT: [[INC]] = add nsw i32 [[I_08]], 1 +; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202 +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; NPGSO: [[FOR_END]]: +; NPGSO-NEXT: ret i32 0 +; entry: br label %for.body @@ -91,19 +253,124 @@ for.end: ; preds = %for.body define void @pr43371() optsize { ; -; CHECK-LABEL: @pr43371 -; CHECK-NOT: vector.scevcheck +; CHECK-LABEL: define void @pr43371( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]] +; CHECK-NEXT: store i16 0, ptr [[TMP3]], align 1 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756 +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY29:.*]] +; CHECK: [[FOR_COND_CLEANUP28]]: +; CHECK-NEXT: unreachable +; CHECK: [[FOR_BODY29]]: +; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; CHECK-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] +; CHECK-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 +; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] +; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 +; CHECK-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 +; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 +; CHECK-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP18:![0-9]+]] +; +; PGSO-LABEL: define void @pr43371( +; PGSO-SAME: ) #[[ATTR0]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; PGSO: [[VECTOR_PH]]: +; PGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; PGSO: [[VECTOR_BODY]]: +; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; PGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]] +; PGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32> +; PGSO-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; PGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]] +; PGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; PGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]] +; PGSO-NEXT: store i16 0, ptr [[TMP3]], align 1 +; PGSO-NEXT: store i16 0, ptr [[TMP5]], align 1 +; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; PGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756 +; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; PGSO: [[MIDDLE_BLOCK]]: +; PGSO-NEXT: br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]] +; PGSO: [[SCALAR_PH]]: +; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; PGSO-NEXT: br label %[[FOR_BODY29:.*]] +; PGSO: [[FOR_COND_CLEANUP28]]: +; PGSO-NEXT: unreachable +; PGSO: [[FOR_BODY29]]: +; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; PGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] +; PGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 +; PGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] +; PGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 +; PGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 +; PGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 +; PGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP18:![0-9]+]] +; +; NPGSO-LABEL: define void @pr43371( +; NPGSO-SAME: ) #[[ATTR0]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NPGSO: [[VECTOR_PH]]: +; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; NPGSO: [[VECTOR_BODY]]: +; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]] +; NPGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32> +; NPGSO-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; NPGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]] +; NPGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; NPGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]] +; NPGSO-NEXT: store i16 0, ptr [[TMP3]], align 1 +; NPGSO-NEXT: store i16 0, ptr [[TMP5]], align 1 +; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; NPGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756 +; NPGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; NPGSO: [[MIDDLE_BLOCK]]: +; NPGSO-NEXT: br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]] +; NPGSO: [[SCALAR_PH]]: +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; NPGSO-NEXT: br label %[[FOR_BODY29:.*]] +; NPGSO: [[FOR_COND_CLEANUP28]]: +; NPGSO-NEXT: unreachable +; NPGSO: [[FOR_BODY29]]: +; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; NPGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] +; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 +; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] +; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 +; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 +; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 +; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP20:![0-9]+]] ; ; We do not want to generate SCEV predicates when optimising for size, because ; that will lead to extra code generation such as the SCEV overflow runtime ; checks. Not generating SCEV predicates can still result in vectorisation as ; the non-consecutive loads/stores can be scalarized: -; -; CHECK: vector.body: -; CHECK: store i16 0, ptr %{{.*}}, align 1 -; CHECK: store i16 0, ptr %{{.*}}, align 1 -; CHECK: br i1 {{.*}}, label %vector.body -; entry: br label %for.body29 @@ -123,19 +390,123 @@ for.body29: define void @pr43371_pgso() !prof !14 { ; -; CHECK-LABEL: @pr43371_pgso -; CHECK-NOT: vector.scevcheck +; CHECK-LABEL: define void @pr43371_pgso( +; CHECK-SAME: ) !prof [[PROF14]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]] +; CHECK-NEXT: store i16 0, ptr [[TMP3]], align 1 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756 +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY29:.*]] +; CHECK: [[FOR_COND_CLEANUP28]]: +; CHECK-NEXT: unreachable +; CHECK: [[FOR_BODY29]]: +; CHECK-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; CHECK-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] +; CHECK-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 +; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] +; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 +; CHECK-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 +; CHECK-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 +; CHECK-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP20:![0-9]+]] +; +; PGSO-LABEL: define void @pr43371_pgso( +; PGSO-SAME: ) !prof [[PROF14]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; PGSO: [[VECTOR_PH]]: +; PGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; PGSO: [[VECTOR_BODY]]: +; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; PGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]] +; PGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32> +; PGSO-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; PGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]] +; PGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; PGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]] +; PGSO-NEXT: store i16 0, ptr [[TMP3]], align 1 +; PGSO-NEXT: store i16 0, ptr [[TMP5]], align 1 +; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2) +; PGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756 +; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; PGSO: [[MIDDLE_BLOCK]]: +; PGSO-NEXT: br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]] +; PGSO: [[SCALAR_PH]]: +; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; PGSO-NEXT: br label %[[FOR_BODY29:.*]] +; PGSO: [[FOR_COND_CLEANUP28]]: +; PGSO-NEXT: unreachable +; PGSO: [[FOR_BODY29]]: +; PGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; PGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] +; PGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 +; PGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] +; PGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 +; PGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 +; PGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 +; PGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP20:![0-9]+]] +; +; NPGSO-LABEL: define void @pr43371_pgso( +; NPGSO-SAME: ) !prof [[PROF14]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; NPGSO: [[VECTOR_SCEVCHECK]]: +; NPGSO-NEXT: br i1 undef, label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; NPGSO: [[VECTOR_PH]]: +; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; NPGSO: [[VECTOR_BODY]]: +; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 +; NPGSO-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0 +; NPGSO-NEXT: [[TMP1:%.*]] = add i16 undef, [[TMP0]] +; NPGSO-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; NPGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]] +; NPGSO-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0 +; NPGSO-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP4]], align 1 +; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; NPGSO-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756 +; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; NPGSO: [[MIDDLE_BLOCK]]: +; NPGSO-NEXT: br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]] +; NPGSO: [[SCALAR_PH]]: +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; NPGSO-NEXT: br label %[[FOR_BODY29:.*]] +; NPGSO: [[FOR_COND_CLEANUP28]]: +; NPGSO-NEXT: unreachable +; NPGSO: [[FOR_BODY29]]: +; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; NPGSO-NEXT: [[ADD33:%.*]] = add i16 undef, [[I24_0170]] +; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 +; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] +; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 +; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 +; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 +; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP22:![0-9]+]] ; ; We do not want to generate SCEV predicates when optimising for size, because ; that will lead to extra code generation such as the SCEV overflow runtime ; checks. Not generating SCEV predicates can still result in vectorisation as ; the non-consecutive loads/stores can be scalarized: -; -; CHECK: vector.body: -; CHECK: store i16 0, ptr %{{.*}}, align 1 -; CHECK: store i16 0, ptr %{{.*}}, align 1 -; CHECK: br i1 {{.*}}, label %vector.body -; entry: br label %for.body29 @@ -157,20 +528,47 @@ for.body29: ; define i32 @pr45526() optsize { ; -; CHECK-LABEL: @pr45526 -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop -; CHECK-EMPTY: -; CHECK-NEXT: loop: -; CHECK-NEXT: %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ] -; CHECK-NEXT: %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ] -; CHECK-NEXT: %pivPlus1 = add nuw nsw i32 %piv, 1 -; CHECK-NEXT: %cond = icmp ult i32 %piv, 510 -; CHECK-NEXT: br i1 %cond, label %loop, label %exit -; CHECK-EMPTY: -; CHECK-NEXT: exit: -; CHECK-NEXT: %for.lcssa = phi i32 [ %for, %loop ] -; CHECK-NEXT: ret i32 %for.lcssa +; CHECK-LABEL: define i32 @pr45526( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ] +; CHECK-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 +; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[FOR_LCSSA]] +; +; PGSO-LABEL: define i32 @pr45526( +; PGSO-SAME: ) #[[ATTR0]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br label %[[LOOP:.*]] +; PGSO: [[LOOP]]: +; PGSO-NEXT: [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ] +; PGSO-NEXT: [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ] +; PGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 +; PGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 +; PGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]] +; PGSO: [[EXIT]]: +; PGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ] +; PGSO-NEXT: ret i32 [[FOR_LCSSA]] +; +; NPGSO-LABEL: define i32 @pr45526( +; NPGSO-SAME: ) #[[ATTR0]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br label %[[LOOP:.*]] +; NPGSO: [[LOOP]]: +; NPGSO-NEXT: [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ] +; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ] +; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 +; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 +; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]] +; NPGSO: [[EXIT]]: +; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ] +; NPGSO-NEXT: ret i32 [[FOR_LCSSA]] ; entry: br label %loop @@ -188,20 +586,66 @@ exit: define i32 @pr45526_pgso() !prof !14 { ; -; CHECK-LABEL: @pr45526_pgso -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop -; CHECK-EMPTY: -; CHECK-NEXT: loop: -; CHECK-NEXT: %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ] -; CHECK-NEXT: %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ] -; CHECK-NEXT: %pivPlus1 = add nuw nsw i32 %piv, 1 -; CHECK-NEXT: %cond = icmp ult i32 %piv, 510 -; CHECK-NEXT: br i1 %cond, label %loop, label %exit -; CHECK-EMPTY: -; CHECK-NEXT: exit: -; CHECK-NEXT: %for.lcssa = phi i32 [ %for, %loop ] -; CHECK-NEXT: ret i32 %for.lcssa +; CHECK-LABEL: define i32 @pr45526_pgso( +; CHECK-SAME: ) !prof [[PROF14]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ] +; CHECK-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 +; CHECK-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[FOR_LCSSA]] +; +; PGSO-LABEL: define i32 @pr45526_pgso( +; PGSO-SAME: ) !prof [[PROF14]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br label %[[LOOP:.*]] +; PGSO: [[LOOP]]: +; PGSO-NEXT: [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ] +; PGSO-NEXT: [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ] +; PGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 +; PGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 +; PGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]] +; PGSO: [[EXIT]]: +; PGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ] +; PGSO-NEXT: ret i32 [[FOR_LCSSA]] +; +; NPGSO-LABEL: define i32 @pr45526_pgso( +; NPGSO-SAME: ) !prof [[PROF14]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NPGSO: [[VECTOR_PH]]: +; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; NPGSO: [[VECTOR_BODY]]: +; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[TMP0]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1) +; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508 +; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; NPGSO: [[MIDDLE_BLOCK]]: +; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 +; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 +; NPGSO-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; NPGSO: [[SCALAR_PH]]: +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 508, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; NPGSO-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 5, %[[ENTRY]] ] +; NPGSO-NEXT: br label %[[LOOP:.*]] +; NPGSO: [[LOOP]]: +; NPGSO-NEXT: [[PIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ] +; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ] +; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 +; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 +; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP24:![0-9]+]] +; NPGSO: [[EXIT]]: +; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ] +; NPGSO-NEXT: ret i32 [[FOR_LCSSA]] ; entry: br label %loop @@ -222,51 +666,153 @@ exit: ; NOTE: Some assertions have been autogenerated by utils/update_test_checks.py define void @stride1(ptr noalias %B, i32 %BStride) optsize { -; CHECK-LABEL: @stride1( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE:%.*]], i64 0 +; CHECK-LABEL: define void @stride1( +; CHECK-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024) ; CHECK-NEXT: [[TMP0:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; CHECK: pred.store.if: +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]] ; CHECK-NEXT: store i16 42, ptr [[TMP4]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] -; CHECK: pred.store.continue: +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.if1: +; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; CHECK: [[PRED_STORE_IF1]]: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]] ; CHECK-NEXT: store i16 42, ptr [[TMP7]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] -; CHECK: pred.store.continue2: -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; CHECK: [[PRED_STORE_CONTINUE2]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] -; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK: for.end: +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] +; CHECK-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] +; CHECK-NEXT: store i16 42, ptr [[GEPOFB]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; -; PGSO-LABEL: @stride1( -; PGSO-NEXT: entry: -; PGSO-NEXT: br i1 false, label %scalar.ph, label %vector.ph +; PGSO-LABEL: define void @stride1( +; PGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; PGSO: [[VECTOR_PH]]: +; PGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0 +; PGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; PGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; PGSO: [[VECTOR_BODY]]: +; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] +; PGSO-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024) +; PGSO-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; PGSO-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; PGSO-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; PGSO: [[PRED_STORE_IF]]: +; PGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; PGSO-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]] +; PGSO-NEXT: store i16 42, ptr [[TMP4]], align 4 +; PGSO-NEXT: br label %[[PRED_STORE_CONTINUE]] +; PGSO: [[PRED_STORE_CONTINUE]]: +; PGSO-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; PGSO-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; PGSO: [[PRED_STORE_IF1]]: +; PGSO-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; PGSO-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]] +; PGSO-NEXT: store i16 42, ptr [[TMP7]], align 4 +; PGSO-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; PGSO: [[PRED_STORE_CONTINUE2]]: +; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; PGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 +; PGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; PGSO: [[MIDDLE_BLOCK]]: +; PGSO-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; PGSO: [[SCALAR_PH]]: +; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; PGSO-NEXT: br label %[[FOR_BODY:.*]] +; PGSO: [[FOR_BODY]]: +; PGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; PGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] +; PGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] +; PGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 +; PGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 +; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; PGSO: [[FOR_END]]: +; PGSO-NEXT: ret void +; +; NPGSO-LABEL: define void @stride1( +; NPGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; NPGSO: [[VECTOR_PH]]: +; NPGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0 +; NPGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; NPGSO: [[VECTOR_BODY]]: +; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ] +; NPGSO-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024) +; NPGSO-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; NPGSO-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 +; NPGSO-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; NPGSO: [[PRED_STORE_IF]]: +; NPGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +; NPGSO-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]] +; NPGSO-NEXT: store i16 42, ptr [[TMP4]], align 4 +; NPGSO-NEXT: br label %[[PRED_STORE_CONTINUE]] +; NPGSO: [[PRED_STORE_CONTINUE]]: +; NPGSO-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 +; NPGSO-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; NPGSO: [[PRED_STORE_IF1]]: +; NPGSO-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +; NPGSO-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]] +; NPGSO-NEXT: store i16 42, ptr [[TMP7]], align 4 +; NPGSO-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; NPGSO: [[PRED_STORE_CONTINUE2]]: +; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 +; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; NPGSO: [[MIDDLE_BLOCK]]: +; NPGSO-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; NPGSO: [[SCALAR_PH]]: +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; NPGSO-NEXT: br label %[[FOR_BODY:.*]] +; NPGSO: [[FOR_BODY]]: +; NPGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; NPGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] +; NPGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] +; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 +; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; NPGSO: [[FOR_END]]: +; NPGSO-NEXT: ret void ; -; NPGSO-LABEL: @stride1( -; NPGSO-NEXT: entry: -; NPGSO-NEXT: br i1 false, label %scalar.ph, label %vector.ph entry: br label %for.body @@ -287,14 +833,108 @@ for.end: ; Vectorize with versioning for unit stride for PGSO and enabled vectorization. ; define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { -; CHECK-LABEL: @stride1_pgso( -; CHECK: vector.body +; CHECK-LABEL: define void @stride1_pgso( +; CHECK-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] +; CHECK-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] +; CHECK-NEXT: store i16 42, ptr [[GEPOFB]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +; PGSO-LABEL: define void @stride1_pgso( +; PGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; PGSO: [[VECTOR_SCEVCHECK]]: +; PGSO-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1 +; PGSO-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; PGSO: [[VECTOR_PH]]: +; PGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; PGSO: [[VECTOR_BODY]]: +; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; PGSO-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; PGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]] +; PGSO-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; PGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP2]], align 4 +; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; PGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; PGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; PGSO: [[MIDDLE_BLOCK]]: +; PGSO-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; PGSO: [[SCALAR_PH]]: +; PGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; PGSO-NEXT: br label %[[FOR_BODY:.*]] +; PGSO: [[FOR_BODY]]: +; PGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; PGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] +; PGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] +; PGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 +; PGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; PGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 +; PGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; PGSO: [[FOR_END]]: +; PGSO-NEXT: ret void ; -; PGSO-LABEL: @stride1_pgso( -; PGSO: vector.body +; NPGSO-LABEL: define void @stride1_pgso( +; NPGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; NPGSO: [[VECTOR_SCEVCHECK]]: +; NPGSO-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1 +; NPGSO-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; NPGSO: [[VECTOR_PH]]: +; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] +; NPGSO: [[VECTOR_BODY]]: +; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; NPGSO-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; NPGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]] +; NPGSO-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0 +; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP2]], align 4 +; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; NPGSO: [[MIDDLE_BLOCK]]: +; NPGSO-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; NPGSO: [[SCALAR_PH]]: +; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; NPGSO-NEXT: br label %[[FOR_BODY:.*]] +; NPGSO: [[FOR_BODY]]: +; NPGSO-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; NPGSO-NEXT: [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]] +; NPGSO-NEXT: [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]] +; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 +; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; NPGSO: [[FOR_END]]: +; NPGSO-NEXT: ret void ; -; NPGSO-LABEL: @stride1_pgso( -; NPGSO: vector.body entry: br label %for.body @@ -314,14 +954,55 @@ for.end: ; PR46652: Check that the need for stride==1 check prevents vectorizing a loop ; having tiny trip count, when compiling w/o -Os/-Oz. -; CHECK-LABEL: @pr46652 -; CHECK-NOT: vector.scevcheck -; CHECK-NOT: vector.body -; CHECK-LABEL: for.body @g = external global [1 x i16], align 1 define void @pr46652(i16 %stride) { +; CHECK-LABEL: define void @pr46652( +; CHECK-SAME: i16 [[STRIDE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]] +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1 +; CHECK-NEXT: [[INC9]] = add nuw nsw i16 [[L1_02]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: ret void +; +; PGSO-LABEL: define void @pr46652( +; PGSO-SAME: i16 [[STRIDE:%.*]]) { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br label %[[FOR_BODY:.*]] +; PGSO: [[FOR_BODY]]: +; PGSO-NEXT: [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ] +; PGSO-NEXT: [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]] +; PGSO-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]] +; PGSO-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1 +; PGSO-NEXT: [[INC9]] = add nuw nsw i16 [[L1_02]], 1 +; PGSO-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16 +; PGSO-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; PGSO: [[FOR_END]]: +; PGSO-NEXT: ret void +; +; NPGSO-LABEL: define void @pr46652( +; NPGSO-SAME: i16 [[STRIDE:%.*]]) { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br label %[[FOR_BODY:.*]] +; NPGSO: [[FOR_BODY]]: +; NPGSO-NEXT: [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ] +; NPGSO-NEXT: [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]] +; NPGSO-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]] +; NPGSO-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1 +; NPGSO-NEXT: [[INC9]] = add nuw nsw i16 [[L1_02]], 1 +; NPGSO-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16 +; NPGSO-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] +; NPGSO: [[FOR_END]]: +; NPGSO-NEXT: ret void +; entry: br label %for.body @@ -341,8 +1022,57 @@ for.end: ; preds = %for.body ; Make sure we do not crash while building the VPlan for the loop with the ; select below. define i32 @PR48142(ptr %ptr.start, ptr %ptr.end) optsize { -; CHECK-LABEL: PR48142 -; CHECK-NOT: vector.body +; CHECK-LABEL: define i32 @PR48142( +; CHECK-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99 +; CHECK-NEXT: [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]] +; CHECK-NEXT: store i32 0, ptr [[PTR_IV]], align 4 +; CHECK-NEXT: [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1 +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]] +; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret i32 [[RES]] +; +; PGSO-LABEL: define i32 @PR48142( +; PGSO-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] { +; PGSO-NEXT: [[ENTRY:.*]]: +; PGSO-NEXT: br label %[[FOR_BODY:.*]] +; PGSO: [[FOR_BODY]]: +; PGSO-NEXT: [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; PGSO-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ] +; PGSO-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99 +; PGSO-NEXT: [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]] +; PGSO-NEXT: store i32 0, ptr [[PTR_IV]], align 4 +; PGSO-NEXT: [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1 +; PGSO-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]] +; PGSO-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; PGSO: [[EXIT]]: +; PGSO-NEXT: [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ] +; PGSO-NEXT: ret i32 [[RES]] +; +; NPGSO-LABEL: define i32 @PR48142( +; NPGSO-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] { +; NPGSO-NEXT: [[ENTRY:.*]]: +; NPGSO-NEXT: br label %[[FOR_BODY:.*]] +; NPGSO: [[FOR_BODY]]: +; NPGSO-NEXT: [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; NPGSO-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ] +; NPGSO-NEXT: [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99 +; NPGSO-NEXT: [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]] +; NPGSO-NEXT: store i32 0, ptr [[PTR_IV]], align 4 +; NPGSO-NEXT: [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1 +; NPGSO-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]] +; NPGSO-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; NPGSO: [[EXIT]]: +; NPGSO-NEXT: [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ] +; NPGSO-NEXT: ret i32 [[RES]] +; entry: br label %for.body @@ -379,3 +1109,44 @@ exit: !14 = !{!"function_entry_count", i64 0} !15 = distinct !{!15, !16} !16 = !{!"llvm.loop.vectorize.enable", i1 true} +;. +; CHECK: [[PROF14]] = !{!"function_entry_count", i64 0} +; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]} +; CHECK: [[META16]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]} +; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]} +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META17]], [[META16]]} +; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]} +; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META16]]} +; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]} +; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]} +;. +; PGSO: [[PROF14]] = !{!"function_entry_count", i64 0} +; PGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]} +; PGSO: [[META16]] = !{!"llvm.loop.isvectorized", i32 1} +; PGSO: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"} +; PGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]} +; PGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]} +; PGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META17]], [[META16]]} +; PGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]} +; PGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META16]]} +; PGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]} +; PGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]} +;. +; NPGSO: [[PROF14]] = !{!"function_entry_count", i64 0} +; NPGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]} +; NPGSO: [[META16]] = !{!"llvm.loop.isvectorized", i32 1} +; NPGSO: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"} +; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]} +; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]} +; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META17]], [[META16]]} +; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]} +; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]]} +; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]} +; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META17]], [[META16]]} +; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]} +; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META17]], [[META16]]} +; NPGSO: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]], [[META17]]} +; NPGSO: [[LOOP28]] = distinct !{[[LOOP28]], [[META16]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index ee6975fa0c9e6..c301ef3c5319a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -52,7 +52,7 @@ define void @pr45679(ptr %A) optsize { ; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -114,7 +114,7 @@ define void @pr45679(ptr %A) optsize { ; VF2UF2-NEXT: store i32 13, ptr [[TMP13]], align 1 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.continue6: -; VF2UF2-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; VF2UF2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) ; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -172,7 +172,7 @@ define void @pr45679(ptr %A) optsize { ; VF1UF4-NEXT: store i32 13, ptr [[TMP11]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] ; VF1UF4: pred.store.continue6: -; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; VF1UF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF1UF4: middle.block: @@ -251,7 +251,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: store i64 [[TMP19]], ptr [[B]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -318,7 +318,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2-NEXT: store i64 [[TMP20]], ptr [[B]], align 8 ; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] ; VF2UF2: pred.store.continue6: -; VF2UF2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; VF2UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF2UF2-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -381,7 +381,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: store i64 [[TMP18]], ptr [[B]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] ; VF1UF4: pred.store.continue6: -; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; VF1UF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF1UF4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; VF1UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF1UF4: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index f0220f5e766b2..1e1b03b6cd8cf 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -28,7 +28,7 @@ define void @test(i16 %x, i64 %y, ptr %ptr) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: store i32 0, ptr [[PTR:%.*]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll index d7f5f646f1873..705152662be24 100644 --- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll @@ -56,7 +56,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize { ; CHECK-NEXT: [[TMP24:%.*]] = add nsw <2 x i16> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP25]] = add <2 x i16> [[VEC_PHI]], [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[TMP25]], <2 x i16> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index 6771f56191313..8e132ed8399cd 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -56,7 +56,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -165,7 +165,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]]) ; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 @@ -260,7 +260,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 3), <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]]) ; CHECK-NEXT: [[TMP29]] = add i32 [[TMP28]], [[TMP26]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -370,7 +370,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) ; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 @@ -483,7 +483,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) ; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) ; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 @@ -594,7 +594,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = mul i32 [[TMP44]], [[TMP42]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -702,7 +702,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = and i32 [[TMP44]], [[TMP42]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -808,7 +808,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]]) ; CHECK-NEXT: [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -914,7 +914,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]]) ; CHECK-NEXT: [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -1020,7 +1020,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP40]]) ; CHECK-NEXT: [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1128,7 +1128,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]]) ; CHECK-NEXT: [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -1217,7 +1217,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> splat (i32 2147483647) ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] @@ -1304,7 +1304,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]]) ; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] @@ -1487,7 +1487,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> ; CHECK-NEXT: [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1582,7 +1582,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32> ; CHECK-NEXT: [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index f65f62bbf54bf..1d35a18c98fae 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -257,7 +257,7 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP108:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP98]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP109:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP108]]) ; CHECK-NEXT: [[TMP110]] = add i32 [[TMP109]], [[VEC_PHI6]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16) ; CHECK-NEXT: [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272 ; CHECK-NEXT: br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index 491a9ec0f0d4d..f95be1a221e73 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -55,7 +55,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -158,7 +158,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP41:%.*]] = add <4 x i32> [[TMP40]], [[TMP38]] ; CHECK-NEXT: [[TMP42:%.*]] = add <4 x i32> [[TMP41]], [[TMP39]] ; CHECK-NEXT: [[TMP43]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP42]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -264,7 +264,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-NEXT: [[TMP40:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -369,7 +369,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP40:%.*]] = and <4 x i32> [[TMP38]], [[TMP39]] ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> splat (i32 -1) ; CHECK-NEXT: [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -474,7 +474,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42]] = or <4 x i32> [[VEC_PHI]], [[TMP41]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -579,7 +579,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP42]] = xor <4 x i32> [[VEC_PHI]], [[TMP41]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -684,7 +684,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP40:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = fadd fast <4 x float> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -789,7 +789,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP40:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = fmul fast <4 x float> [[TMP40]], [[TMP39]] ; CHECK-NEXT: [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -877,7 +877,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) ; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -963,7 +963,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { ; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]) ; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 ; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index b7fc1e83ce23f..5e28192997695 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -32,7 +32,7 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], splat (i32 10) ; CHECK-NEXT: [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> splat (i32 10) ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -93,7 +93,7 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) { ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 12) ; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll index b3fc0283affdf..9e7c667f9c8ad 100644 --- a/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll +++ b/llvm/test/Transforms/LoopVectorize/strict-fadd-interleave-only.ll @@ -19,7 +19,7 @@ define float @pr70988() { ; CHECK-NEXT: [[TMP3:%.*]] = fadd contract float [[VEC_PHI]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = select contract i1 [[TMP1]], float 1.000000e+00, float -0.000000e+00 ; CHECK-NEXT: [[TMP5]] = fadd contract float [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT3]] = add i32 [[INDEX1]], 2 +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1022 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -55,7 +55,7 @@ define float @pr70988() { ; CHECK-ALM-NEXT: [[TMP3:%.*]] = fadd contract float [[VEC_PHI]], [[TMP2]] ; CHECK-ALM-NEXT: [[TMP4:%.*]] = select contract i1 [[ACTIVE_LANE_MASK2]], float 1.000000e+00, float -0.000000e+00 ; CHECK-ALM-NEXT: [[TMP5]] = fadd contract float [[TMP3]], [[TMP4]] -; CHECK-ALM-NEXT: [[INDEX_NEXT3]] = add i32 [[INDEX1]], 2 +; CHECK-ALM-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 2 ; CHECK-ALM-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1022 ; CHECK-ALM-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-ALM: middle.block: @@ -125,7 +125,7 @@ define float @pr72720reduction_using_active_lane_mask(ptr %src) { ; CHECK-NEXT: [[TMP11:%.*]] = fadd contract float [[VEC_PHI]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP1]], float [[TMP9]], float -0.000000e+00 ; CHECK-NEXT: [[TMP13]] = fadd contract float [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -178,7 +178,7 @@ define float @pr72720reduction_using_active_lane_mask(ptr %src) { ; CHECK-ALM-NEXT: [[TMP9:%.*]] = fadd contract float [[VEC_PHI]], [[TMP8]] ; CHECK-ALM-NEXT: [[TMP10:%.*]] = select contract i1 [[ACTIVE_LANE_MASK1]], float [[TMP7]], float -0.000000e+00 ; CHECK-ALM-NEXT: [[TMP11]] = fadd contract float [[TMP9]], [[TMP10]] -; CHECK-ALM-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-ALM-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-ALM-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; CHECK-ALM-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-ALM: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll index 9efc8d12f44a3..56fc8eac35bad 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll @@ -48,7 +48,7 @@ define i32 @test(ptr %vf1, i64 %n) { ; CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll index dd2fd0de9938e..31732f027f6dd 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll @@ -49,7 +49,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) { ; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; CHECK: [[PRED_STORE_CONTINUE6]]: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index 92ca77bc841ca..336c5ea4331b8 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -49,7 +49,7 @@ define void @VF1-VPlanExe(ptr %dst) { ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -129,7 +129,7 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP3]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 88e7aaccfe2f3..8872ad4e3897d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -51,7 +51,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -136,7 +136,7 @@ exit: ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -221,7 +221,7 @@ exit: ; CHECK-NEXT: } ; CHECK: loop.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -292,7 +292,7 @@ define void @uniform_gep(i64 %k, ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: Successor(s): loop.then.0 ; CHECK-EMPTY: ; CHECK-NEXT: loop.then.0: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -384,7 +384,7 @@ define void @pred_cfg1(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): next.0.1 ; CHECK-EMPTY: ; CHECK-NEXT: next.0.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -484,7 +484,7 @@ define void @pred_cfg2(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.1.1 ; CHECK-EMPTY: ; CHECK-NEXT: then.1.1: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -592,7 +592,7 @@ define void @pred_cfg3(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.1.2 ; CHECK-EMPTY: ; CHECK-NEXT: then.1.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -701,7 +701,7 @@ define void @merge_3_replicate_region(i32 %k, i32 %j) { ; CHECK-NEXT: Successor(s): then.0.4 ; CHECK-EMPTY: ; CHECK-NEXT: then.0.4: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -781,7 +781,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: } @@ -866,7 +866,7 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { ; CHECK-NEXT: Successor(s): loop.2 ; CHECK-EMPTY: ; CHECK-NEXT: loop.2: -; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add vp<[[CAN_IV]]>, vp<[[VFxUF]]> +; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> ; CHECK-NEXT: No successors ; CHECK-NEXT: }