From e481816f8757406bbcd8d065bbf00bb86ec7a69e Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 20 Aug 2025 17:37:19 +0800 Subject: [PATCH 1/7] [VPlan] Allow folding not (cmp eq) -> icmp ne with other select users Currently we only allow folding not (cmp eq) -> icmp ne if the not is the only user of the compare. However a common scenario is that some select might also use the compare. We can still fold the not if we also swizzle the arms of the selects. This helps avoid regressions in #150368 --- .../Transforms/Vectorize/VPlanTransforms.cpp | 26 +++- .../X86/drop-poison-generating-flags.ll | 65 ++++---- ...6-sunk-instruction-used-outside-of-loop.ll | 7 +- ...able-info-from-assumption-constant-size.ll | 77 ++++------ ...able-info-from-assumption-variable-size.ll | 31 ++-- .../Transforms/LoopVectorize/if-reduction.ll | 10 +- .../Transforms/LoopVectorize/induction.ll | 15 +- .../load-of-struct-deref-pred.ll | 140 +++++++++--------- .../LoopVectorize/no_outside_user.ll | 67 +++++---- .../LoopVectorize/pr44488-predication.ll | 5 +- 10 files changed, 211 insertions(+), 232 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index cff43c2742a6b..1c9d171b72279 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1107,13 +1107,29 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith(A); // Try to fold Not into compares by adjusting the predicate in-place. - if (isa(A) && A->getNumUsers() == 1) { + auto CanFold = [&A](VPUser *U) { + return match( + U, m_CombineOr(m_Not(m_Specific(A)), + m_Select(m_Specific(A), m_VPValue(), m_VPValue()))); + }; + if (isa(A) && all_of(A->users(), CanFold)) { auto *WideCmp = cast(A); if (WideCmp->getOpcode() == Instruction::ICmp || WideCmp->getOpcode() == Instruction::FCmp) { WideCmp->setPredicate( CmpInst::getInversePredicate(WideCmp->getPredicate())); - Def->replaceAllUsesWith(WideCmp); + for (VPUser *U : WideCmp->users()) { + auto *R = cast(U); + // not (icmp eq) -> icmp ne + if (match(R, m_Not(m_Specific(WideCmp)))) + R->replaceAllUsesWith(WideCmp); + // select (icmp eq), x, y -> select (icmp ne), y, x + else if (match(R, m_Select(m_Specific(WideCmp), m_VPValue(X), + m_VPValue(Y)))) { + R->setOperand(1, Y); + R->setOperand(2, X); + } + } // If WideCmp doesn't have a debug location, use the one from the // negation, to preserve the location. if (!WideCmp->getDebugLoc() && R.getDebugLoc()) @@ -1885,7 +1901,6 @@ void VPlanTransforms::truncateToMinimalBitwidths( PH->appendRecipe(NewOp); } } - } } } @@ -2654,8 +2669,9 @@ void VPlanTransforms::createInterleaveGroups( ReversePtr->insertBefore(InsertPos); Addr = ReversePtr; } - auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, - InsertPos->getMask(), NeedsMaskForGaps, InsertPos->getDebugLoc()); + auto *VPIG = + new VPInterleaveRecipe(IG, Addr, StoredValues, InsertPos->getMask(), + NeedsMaskForGaps, InsertPos->getDebugLoc()); VPIG->insertBefore(InsertPos); unsigned J = 0; diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll index 84e36cbb33552..906e3ef67ec96 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll @@ -29,12 +29,11 @@ define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0:![0-9]+]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0:![0-9]+]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -79,12 +78,11 @@ define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, ptr %ou ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -129,12 +127,11 @@ define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -182,10 +179,9 @@ define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[TMP3]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[TMP2]], <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -230,13 +226,12 @@ define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[TMP2]], splat (i64 2) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP3]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_GATHER]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -282,15 +277,14 @@ define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], <4 x i64> [[TMP2]] ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP1]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP5]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -343,7 +337,7 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1 @@ -358,9 +352,8 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input, ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3 ; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true) -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP10]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP10]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]] ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -720,8 +713,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -765,8 +757,8 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) { ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP31:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP31]], <4 x i8> zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP15]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -816,8 +808,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -861,9 +852,9 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_CONTINUE6]]: -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison +; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP32]], <4 x i8> zeroinitializer +; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> poison, <4 x i64> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3 ; CHECK-NEXT: store i64 [[TMP12]], ptr [[AUX]], align 8 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index 2c97863c182b9..0fdcf89deb902 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -17,8 +17,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -34,8 +33,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> zeroinitializer, <2 x i32> [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index d24e7e871239a..0316e8e39a6a4 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -20,8 +20,7 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP5]], i64 4), "dereferenceable"(ptr [[TMP5]], i64 4) ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -39,8 +38,8 @@ define void @deref_assumption_in_header_constant_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP14]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -190,8 +189,7 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP5]], i64 4), "dereferenceable"(ptr [[TMP5]], i64 2) ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -209,8 +207,8 @@ define void @deref_assumption_too_small_in_header_constant_trip_count(ptr noalia ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP14]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -287,8 +285,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP3]], i64 4) ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -307,7 +304,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_1(ptr noalias ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -384,8 +381,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP3]], i64 4) ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -404,7 +400,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_via_arg_attrib ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -481,8 +477,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[TMP3]], i64 4) ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -501,7 +496,7 @@ define void @deref_assumption_in_header_constant_trip_count_align_not_known(ptr ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -573,8 +568,7 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] @@ -593,8 +587,8 @@ define void @deref_assumption_in_then_constant_trip_count(ptr noalias noundef %a ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP12]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -667,8 +661,7 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -686,8 +679,8 @@ define void @deref_assumption_in_latch_constant_trip_count(ptr noalias noundef % ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP23]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP12]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP28]], i64 4), "dereferenceable"(ptr [[TMP20]], i64 4) ] @@ -773,8 +766,7 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP5]], i64 4), "dereferenceable"(ptr [[TMP5]], i64 4) ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP9]], splat (i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -792,8 +784,8 @@ define void @deref_assumption_in_header_variable_trip_count(ptr noalias noundef ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP27]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP17]] +; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP14]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -940,8 +932,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -962,7 +953,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1179,8 +1170,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1201,7 +1191,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1271,8 +1261,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1293,7 +1282,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1365,8 +1354,7 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1382,8 +1370,8 @@ define void @may_free_align_deref_assumption_in_header_constant_trip_count_loop_ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP14]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ [[TMP12]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP9]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -1455,8 +1443,7 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -1473,7 +1460,7 @@ define void @may_free_local_ptr_align_deref_assumption_in_header_constant_trip_c ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP12:%.*]] = phi <2 x i32> [ [[TMP8]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP11]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP12]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP12]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll index 9852f538c6f74..8417e22fe0d8a 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll @@ -180,8 +180,7 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP15:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -201,8 +200,8 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP20]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -278,8 +277,7 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac ; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true) +; CHECK-NEXT: [[TMP15:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -299,8 +297,8 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP20]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP0]], 2 @@ -458,8 +456,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_al ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -480,7 +477,7 @@ define void @deref_assumption_in_preheader_non_constant_trip_count_access_i32_al ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP16]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -566,8 +563,7 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP8]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -587,8 +583,8 @@ define void @deref_assumption_loop_access_start_variable(i8 %v, ptr noundef %P, ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP14]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP20]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 @@ -678,8 +674,7 @@ define void @deref_assumption_loop_access_start_variable_unknown_range(i8 %v, pt ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[IV_START]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: @@ -700,7 +695,7 @@ define void @deref_assumption_loop_access_start_variable_unknown_range(i8 %v, pt ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_CONTINUE2]]: ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP10]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], %[[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP16]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP16]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 65330aabf3c9a..ad6a1ec684e32 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1192,14 +1192,13 @@ define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x float> [[TMP9]], <4 x float> [[TMP8]] -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[PREDPHI]], <4 x float> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -1315,14 +1314,13 @@ define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonl ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI1:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp uge <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP6]] -; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1]] = select <4 x i1> [[TMP4]], <4 x float> [[PREDPHI]], <4 x float> [[TMP7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 343facb2ef698..cbf9cc11d9009 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -4978,8 +4978,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; CHECK: pred.urem.if: @@ -4998,7 +4997,7 @@ define i32 @PR32419(i32 %a, i16 %b) { ; CHECK-NEXT: br label [[PRED_UREM_CONTINUE2]] ; CHECK: pred.urem.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> [[TMP13]], <2 x i16> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; CHECK-NEXT: [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 @@ -5172,10 +5171,8 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq <2 x i16> [[STEP_ADD]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true) -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp ne <2 x i16> [[VEC_IND]], zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = icmp ne <2 x i16> [[STEP_ADD]], zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; UNROLL-NO-IC-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.urem.if: @@ -5212,8 +5209,8 @@ define i32 @PR32419(i32 %a, i16 %b) { ; UNROLL-NO-IC-NEXT: br label [[PRED_UREM_CONTINUE7]] ; UNROLL-NO-IC: pred.urem.continue7: ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE5]] ], [ [[TMP24]], [[PRED_UREM_IF6]] ] -; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]] -; UNROLL-NO-IC-NEXT: [[PREDPHI8:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]] +; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i16> [[TMP15]], <2 x i16> zeroinitializer +; UNROLL-NO-IC-NEXT: [[PREDPHI8:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> [[TMP25]], <2 x i16> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI8]] to <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]] diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll index fbe57c81053fa..c822ded111c62 100644 --- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll @@ -97,55 +97,54 @@ define void @accesses_to_struct_may_not_be_dereferenceable_due_to_loop_bound(ptr ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP18]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP25]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP27]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP26]] +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP26]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP25]], <4 x i32> [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 -; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -216,56 +215,55 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP5]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 -; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP8]], i64 [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP7]], i64 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i64> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 -; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i64> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP18]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP13]], i64 [[TMP17]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i64> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP23]], align 4 -; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP24]], i32 3 +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP23]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP26:%.*]] = phi <4 x i64> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP25]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP27:%.*]] = trunc <4 x i64> [[TMP26]] to <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP27]] +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i64> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP26:%.*]] = trunc <4 x i64> [[TMP25]] to <4 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP27]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP26]], <4 x i32> [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index 02f324281bf4d..11a190c897f38 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -23,7 +23,7 @@ define i32 @test1() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -44,12 +44,12 @@ define i32 @test1() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -96,7 +96,7 @@ define i32 @test2() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -117,12 +117,12 @@ define i32 @test2() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -169,7 +169,7 @@ define i32 @test3(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -183,12 +183,11 @@ define i32 @test3(i32 %N) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[VEC_IND]], splat (i32 10) -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = icmp sle <2 x i32> [[VEC_IND]], splat (i32 10) ; CHECK-NEXT: [[TMP5:%.*]] = icmp sle <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP5]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 2) -; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> splat (i32 1), <2 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -196,12 +195,12 @@ define i32 @test3(i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[PREDPHI1]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -258,7 +257,7 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -279,12 +278,12 @@ define i32 @test4(i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -520,7 +519,7 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -542,12 +541,12 @@ define i8 @outside_user_non_phi() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -651,14 +650,14 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -681,12 +680,12 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] +; CHECK: [[_LR_PH_I]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I:.*:]] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK: [[_LR_PH_I1:.*:]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index c5dc81b28db2b..9824879546c5a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -19,8 +19,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @v_38, align 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], splat (i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i16> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]] ; CHECK: pred.srem.if: @@ -37,7 +36,7 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]] ; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> splat (i16 5786), <2 x i16> [[TMP10]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[TMP10]], <2 x i16> splat (i16 5786) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 ; CHECK-NEXT: store i16 [[TMP11]], ptr @v_39, align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 From 9734853936f2a34cc0e11d3692a7877dcbcdac21 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 20 Aug 2025 17:56:52 +0800 Subject: [PATCH 2/7] Add llvm_unreachable --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 1c9d171b72279..3d16fd1437d01 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1128,7 +1128,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { m_VPValue(Y)))) { R->setOperand(1, Y); R->setOperand(2, X); - } + } else + llvm_unreachable("Unexpected user"); } // If WideCmp doesn't have a debug location, use the one from the // negation, to preserve the location. From 576f634770e9374080f509ca1e93116d744e7edd Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 20 Aug 2025 17:57:51 +0800 Subject: [PATCH 3/7] Undo some stray formatting changes --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3d16fd1437d01..4086a783c5d43 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1902,6 +1902,7 @@ void VPlanTransforms::truncateToMinimalBitwidths( PH->appendRecipe(NewOp); } } + } } } @@ -2670,9 +2671,8 @@ void VPlanTransforms::createInterleaveGroups( ReversePtr->insertBefore(InsertPos); Addr = ReversePtr; } - auto *VPIG = - new VPInterleaveRecipe(IG, Addr, StoredValues, InsertPos->getMask(), - NeedsMaskForGaps, InsertPos->getDebugLoc()); + auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues, + InsertPos->getMask(), NeedsMaskForGaps, InsertPos->getDebugLoc()); VPIG->insertBefore(InsertPos); unsigned J = 0; From 0fbd1bdf81155aede2c0d4fe9e0cea964631f36d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 20 Aug 2025 21:05:59 +0800 Subject: [PATCH 4/7] Fix modifying users during iteration --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4086a783c5d43..5adba17a3c72b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1118,7 +1118,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { WideCmp->getOpcode() == Instruction::FCmp) { WideCmp->setPredicate( CmpInst::getInversePredicate(WideCmp->getPredicate())); - for (VPUser *U : WideCmp->users()) { + for (VPUser *U : to_vector(WideCmp->users())) { auto *R = cast(U); // not (icmp eq) -> icmp ne if (match(R, m_Not(m_Specific(WideCmp)))) From 8073742b0e0b92665e11e1d8470593af3f1a2a15 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 21 Aug 2025 10:47:57 +0800 Subject: [PATCH 5/7] Address review comments --- .../lib/Transforms/Vectorize/VPlanTransforms.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5adba17a3c72b..455037994aa78 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1120,16 +1120,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { CmpInst::getInversePredicate(WideCmp->getPredicate())); for (VPUser *U : to_vector(WideCmp->users())) { auto *R = cast(U); - // not (icmp eq) -> icmp ne - if (match(R, m_Not(m_Specific(WideCmp)))) - R->replaceAllUsesWith(WideCmp); - // select (icmp eq), x, y -> select (icmp ne), y, x - else if (match(R, m_Select(m_Specific(WideCmp), m_VPValue(X), - m_VPValue(Y)))) { + if (match(R, m_Select(m_Specific(WideCmp), m_VPValue(X), + m_VPValue(Y)))) { + // select (icmp pred), x, y -> select (icmp inv_pred), y, x R->setOperand(1, Y); R->setOperand(2, X); - } else - llvm_unreachable("Unexpected user"); + } else { + // not (icmp pred) -> icmp inv_pred + assert(match(R, m_Not(m_Specific(WideCmp))) && "Unexpected user"); + R->replaceAllUsesWith(WideCmp); + } } // If WideCmp doesn't have a debug location, use the one from the // negation, to preserve the location. From a7df17eaa0809ae113aacdd7c9eac9d77141840d Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 21 Aug 2025 19:04:45 +0800 Subject: [PATCH 6/7] Undo some renames by hand --- .../LoopVectorize/no_outside_user.ll | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index 11a190c897f38..1eae53559d179 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -23,7 +23,7 @@ define i32 @test1() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -44,12 +44,12 @@ define i32 @test1() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -96,7 +96,7 @@ define i32 @test2() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -117,12 +117,12 @@ define i32 @test2() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -257,7 +257,7 @@ define i32 @test4(i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -278,12 +278,12 @@ define i32 @test4(i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[PREDPHI]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[DOTLR_PH_I_PREHEADER]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -519,7 +519,7 @@ define i8 @outside_user_non_phi() { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 4) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -541,12 +541,12 @@ define i8 @outside_user_non_phi() { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP4]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[UNNAMEDTMP8:%.*]] = phi i32 [ [[UNNAMEDTMP18:%.*]], %[[BB16:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[UNNAMEDTMP2:%.*]] = icmp sgt i32 [[UNNAMEDTMP8]], 10 ; CHECK-NEXT: br i1 [[UNNAMEDTMP2]], label %[[BB16]], label %[[UNNAMEDBB10:.*]] ; CHECK: [[UNNAMEDBB10]]: @@ -650,14 +650,14 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[SMAX]], [[B_PROMOTED]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] @@ -680,12 +680,12 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[_LR_PH_I]] -; CHECK: [[_LR_PH_I]]: +; CHECK-NEXT: br i1 [[CMP_N]], label %[[F1_EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[B_PROMOTED]], %[[BB]] ], [ [[B_PROMOTED]], %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[DOTLR_PH_I:.*]] -; CHECK: [[_LR_PH_I1:.*:]] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[_LR_PH_I]] ] +; CHECK: [[_LR_PH_I:.*:]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IVNEXT:%.*]], %[[DOTLR_PH_I]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = sext i32 [[IV]] to i64 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[BLOAD:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 From 7503131f05d39d2d164d0bb1cbc58b62a157b169 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 21 Aug 2025 19:08:41 +0800 Subject: [PATCH 7/7] Sink all_of, use dyn_cast, icmp -> cmp --- .../Transforms/Vectorize/VPlanTransforms.cpp | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index de3d7406ebd18..03e7bef817cf7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1107,26 +1107,25 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith(A); // Try to fold Not into compares by adjusting the predicate in-place. - auto CanFold = [&A](VPUser *U) { - return match( - U, m_CombineOr(m_Not(m_Specific(A)), - m_Select(m_Specific(A), m_VPValue(), m_VPValue()))); - }; - if (isa(A) && all_of(A->users(), CanFold)) { - auto *WideCmp = cast(A); - if (WideCmp->getOpcode() == Instruction::ICmp || - WideCmp->getOpcode() == Instruction::FCmp) { + if (auto *WideCmp = dyn_cast(A)) { + if ((WideCmp->getOpcode() == Instruction::ICmp || + WideCmp->getOpcode() == Instruction::FCmp) && + all_of(WideCmp->users(), [&WideCmp](VPUser *U) { + return match(U, m_CombineOr(m_Not(m_Specific(WideCmp)), + m_Select(m_Specific(WideCmp), + m_VPValue(), m_VPValue()))); + })) { WideCmp->setPredicate( CmpInst::getInversePredicate(WideCmp->getPredicate())); for (VPUser *U : to_vector(WideCmp->users())) { auto *R = cast(U); if (match(R, m_Select(m_Specific(WideCmp), m_VPValue(X), m_VPValue(Y)))) { - // select (icmp pred), x, y -> select (icmp inv_pred), y, x + // select (cmp pred), x, y -> select (cmp inv_pred), y, x R->setOperand(1, Y); R->setOperand(2, X); } else { - // not (icmp pred) -> icmp inv_pred + // not (cmp pred) -> cmp inv_pred assert(match(R, m_Not(m_Specific(WideCmp))) && "Unexpected user"); R->replaceAllUsesWith(WideCmp); }