diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index db650b23e271e..f2bee2c67a235 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2747,17 +2747,24 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, if (isa_and_nonnull(II.getInductionBinOp())) B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags()); - Value *CountMinusOne = B.CreateSub( - VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1)); - CountMinusOne->setName("cmo"); - VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep()); assert(StepVPV && "step must have been expanded during VPlan execution"); Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue() : State.get(StepVPV, VPLane(0)); - Value *Escape = - emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, - II.getKind(), II.getInductionBinOp()); + Value *Escape = nullptr; + if (EndValue->getType()->isIntegerTy()) + Escape = B.CreateSub(EndValue, Step); + else if (EndValue->getType()->isPointerTy()) + Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step)); + else if (EndValue->getType()->isFloatingPointTy()) { + Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() == + Instruction::FAdd + ? Instruction::FSub + : Instruction::FAdd, + EndValue, Step); + } else { + llvm_unreachable("all possible induction types must be handled"); + } Escape->setName("ind.escape"); MissingVals[UI] = Escape; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index c28776e82776b..64b69be5f5259 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -42,9 +42,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] -; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[CMO]], 8 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP37]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[IND_END]], i64 -8 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index cb4f5f6d9eaba..54dd9c870a170 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -208,25 +208,23 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], -; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], +; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]] ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 ; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64 ; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96 ; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8 -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], ptr [[TMP3]], align 8 -; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3]], ptr [[TMP4]], align 8 +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8 +; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8 ; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], ; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; AUTO_VEC: middle.block: ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] -; AUTO_VEC-NEXT: [[CMO:%.*]] = add nsw i64 [[N_VEC]], -1 -; AUTO_VEC-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[CMO]] to double -; AUTO_VEC-NEXT: [[TMP6:%.*]] = fmul fast double [[DOTCAST6]], 3.000000e+00 +; AUTO_VEC-NEXT: [[IND_ESCAPE:%.*]] = fadd fast double [[TMP0]], -3.000000e+00 ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] @@ -238,7 +236,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]] ; AUTO_VEC-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; AUTO_VEC: for.end: -; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: ret double [[J_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index bf27c146ec9ce..02fdbc05ed518 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -63,7 +63,7 @@ for.end: ; CHECK-LABEL: @geppre ; CHECK-LABEL: middle.block: -; CHECK: %ind.escape = getelementptr i8, ptr %ptr, i64 496 +; CHECK: %ind.escape = getelementptr i8, ptr %ind.end, i64 -16 ; CHECK-LABEL: for.end: ; CHECK: %[[RET:.*]] = phi ptr [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ] ; CHECK: ret ptr %[[RET]] @@ -85,9 +85,7 @@ for.end: ; CHECK-LABEL: @both ; CHECK-LABEL: middle.block: -; CHECK: %[[END:.*]] = sub i64 %n.vec, 1 -; CHECK: %[[END_OFFSET:.*]] = mul i64 %[[END]], 4 -; CHECK: %ind.escape = getelementptr i8, ptr %base, i64 %[[END_OFFSET]] +; CHECK: %ind.escape = getelementptr i8, ptr %ind.end1, i64 -4 ; CHECK-LABEL: for.end: ; CHECK: %[[RET:.*]] = phi ptr [ %inc.lag1, %for.body ], [ %ind.escape, %middle.block ] ; CHECK: ret ptr %[[RET]] @@ -142,7 +140,7 @@ for.end: ; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]] ; CHECK: middle.block ; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]] -; CHECK: %ind.escape = add i32 %[[T15]], +; CHECK: %ind.escape = sub i32 %ind.end8, -8 ; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph define void @PR30742() { BB0: diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll index 80a6bb50ca91b..d462d3aa650d2 100644 --- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll +++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll @@ -51,8 +51,7 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1 -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = add i64 1, [[CMO]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOAD_VAL:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll index c0eb4ccdd6d7e..af1c146c2c6c4 100644 --- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll @@ -28,10 +28,10 @@ define void @test1_pr58811() { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: br i1 false, label [[LOOP_3_PREHEADER:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ] @@ -123,28 +123,28 @@ define void @test2_pr58811() { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]] ; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ] ; CHECK-NEXT: br label [[LOOP_3:%.*]] ; CHECK: loop.3: -; CHECK-NEXT: [[INT16_TINDARRAYSAFEVAR_186_0747_1:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[UINT32_TVAR_177_2745_1:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] -; CHECK-NEXT: [[SUB93_1]] = sub i32 [[UINT32_TVAR_177_2745_1]], [[IV_2_LCSSA]] -; CHECK-NEXT: [[INC_1]] = add i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 1 -; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 198 +; CHECK-NEXT: [[IV_4:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_5:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[SUB93_1]] = sub i32 [[IV_5]], [[IV_2_LCSSA]] +; CHECK-NEXT: [[INC_1]] = add i16 [[IV_4]], 1 +; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[IV_4]], 198 ; CHECK-NEXT: br i1 [[CMP88_1]], label [[LOOP_3]], label [[LOOP_4_PREHEADER]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: loop.4.preheader: -; CHECK-NEXT: [[UINT32_TVAR_177_2745_1_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_177_2745_1]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[IV_5_LCSSA:%.*]] = phi i32 [ [[IV_5]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP_4]] ; CHECK: loop.4: -; CHECK-NEXT: [[UINT32_TVAR_177_2745_2:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ] -; CHECK-NEXT: [[SUB93_2]] = sub i32 [[UINT32_TVAR_177_2745_2]], [[UINT32_TVAR_177_2745_1_LCSSA]] +; CHECK-NEXT: [[IV_6:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ] +; CHECK-NEXT: [[SUB93_2]] = sub i32 [[IV_6]], [[IV_5_LCSSA]] ; CHECK-NEXT: br i1 false, label [[LOOP_4]], label [[LOOP_1_HEADER_LOOPEXIT]] ; entry: @@ -201,10 +201,10 @@ define void @test3_pr58811() { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[TMP3]] +; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[TMP3]] ; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]