diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e5c3f17860103..6bcd4242f9fd0 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7752,6 +7752,12 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I, if (CM.isPredicatedInst(I)) { SmallVector Ops(Operands); VPValue *Mask = getBlockInMask(Builder.getInsertBlock()); + if (auto *Inst = dyn_cast(Mask)) { + if (Inst->getOpcode() == VPInstruction::LogicalAnd) { + VPValue *Zero = Plan.getConstantInt(I->getType(), 0); + Mask = Builder.createICmp(CmpInst::ICMP_NE, Ops[1], Zero); + } + } VPValue *One = Plan.getConstantInt(I->getType(), 1u); auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc()); Ops[1] = SafeRHS; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index f16351720b20f..8a1a7237cd235 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -660,16 +660,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]] +; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] ; COMMON: [[PRED_STORE_IF13]]: ; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 ; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT]] +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; COMMON: [[PRED_STORE_CONTINUE14]]: +; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br label %[[EXIT:.*]] ; COMMON: [[EXIT]]: -; COMMON-NEXT: br label %[[SCALAR_PH:.*]] -; COMMON: [[SCALAR_PH]]: -; COMMON-NEXT: br label %[[EXIT1:.*]] -; COMMON: [[EXIT1]]: ; COMMON-NEXT: ret void ; entry: @@ -1303,7 +1303,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; PRED: [[VECTOR_MEMCHECK]]: ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16 +; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 ; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[A2]] ; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[C1]], [[B3]] @@ -1312,42 +1312,42 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 +; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[Y]], i64 0 +; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 +; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]] ; PRED-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], [[TMP8]] ; PRED-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0 -; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP0]]) -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[Y]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP0]]) +; PRED-NEXT: [[TMP16:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; PRED-NEXT: [[TMP13:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i8 1) ; PRED-NEXT: br label %[[VECTOR_BODY:.*]] ; PRED: [[VECTOR_BODY]]: ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] ; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP12]], [[ACTIVE_LANE_MASK]], poison) -; PRED-NEXT: [[TMP13:%.*]] = uitofp [[WIDE_MASKED_LOAD]] to +; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr align 1 [[TMP12]], [[ACTIVE_LANE_MASK]], poison) +; PRED-NEXT: [[TMP15:%.*]] = uitofp [[WIDE_MASKED_LOAD]] to ; PRED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; PRED-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP14]], [[ACTIVE_LANE_MASK]], poison) -; PRED-NEXT: [[TMP15:%.*]] = icmp ne [[WIDE_MASKED_LOAD5]], zeroinitializer -; PRED-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], zeroinitializer -; PRED-NEXT: [[TMP17:%.*]] = xor [[WIDE_MASKED_LOAD]], splat (i8 1) -; PRED-NEXT: [[TMP18:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i8 1) -; PRED-NEXT: [[TMP19:%.*]] = udiv [[TMP17]], [[TMP18]] -; PRED-NEXT: [[TMP20:%.*]] = icmp ugt [[TMP19]], splat (i8 1) -; PRED-NEXT: [[TMP21:%.*]] = select [[TMP20]], zeroinitializer, splat (i32 255) -; PRED-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[TMP21]], zeroinitializer -; PRED-NEXT: [[TMP22:%.*]] = zext [[WIDE_MASKED_LOAD]] to -; PRED-NEXT: [[TMP23:%.*]] = sub [[PREDPHI]], [[TMP22]] -; PRED-NEXT: [[TMP24:%.*]] = sitofp [[TMP23]] to -; PRED-NEXT: [[TMP25:%.*]] = call @llvm.fmuladd.nxv16f32( [[TMP24]], splat (float 3.000000e+00), [[TMP13]]) -; PRED-NEXT: [[TMP26:%.*]] = fptoui [[TMP25]] to +; PRED-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call @llvm.masked.load.nxv4i8.p0(ptr align 1 [[TMP14]], [[ACTIVE_LANE_MASK]], poison) +; PRED-NEXT: [[TMP17:%.*]] = icmp eq [[WIDE_MASKED_LOAD5]], zeroinitializer +; PRED-NEXT: [[TMP18:%.*]] = xor [[WIDE_MASKED_LOAD]], splat (i8 1) +; PRED-NEXT: [[TMP19:%.*]] = udiv [[TMP18]], [[TMP13]] +; PRED-NEXT: [[TMP20:%.*]] = icmp ugt [[TMP19]], splat (i8 1) +; PRED-NEXT: [[TMP21:%.*]] = select [[TMP20]], zeroinitializer, splat (i32 255) +; PRED-NEXT: [[PREDPHI:%.*]] = select [[TMP17]], zeroinitializer, [[TMP21]] +; PRED-NEXT: [[TMP22:%.*]] = zext [[WIDE_MASKED_LOAD]] to +; PRED-NEXT: [[TMP23:%.*]] = sub [[PREDPHI]], [[TMP22]] +; PRED-NEXT: [[TMP24:%.*]] = sitofp [[TMP23]] to +; PRED-NEXT: [[TMP25:%.*]] = call @llvm.fmuladd.nxv4f32( [[TMP24]], splat (float 3.000000e+00), [[TMP15]]) +; PRED-NEXT: [[TMP26:%.*]] = fptoui [[TMP25]] to ; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[C]], i64 [[INDEX]] -; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP26]], ptr align 1 [[TMP27]], [[ACTIVE_LANE_MASK]]) +; PRED-NEXT: call void @llvm.masked.store.nxv4i8.p0( [[TMP26]], ptr align 1 [[TMP27]], [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]]) -; PRED-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]]) +; PRED-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 ; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true ; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; PRED: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 72e813b62025f..35bc986356816 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -114,6 +114,12 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[CONV6]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP16]], i64 [[CONV6]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = sdiv i64 [[M]], [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 +; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP19]], [[CONV61]] +; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[X]], [[TMP20]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[M]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = call @llvm.stepvector.nxv2i64() @@ -128,14 +134,8 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP22:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP23:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP22]], zeroinitializer -; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP23]], i32 0 -; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[CONV6]], i64 1 -; CHECK-NEXT: [[TMP26:%.*]] = sdiv i64 [[M]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32 -; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP26]], [[CONV61]] ; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[INDEX]], [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 -; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[X]], [[TMP27]] ; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], [[TMP30]] ; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP33]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 01b4502308c95..0371f2a04078b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -271,19 +271,15 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP1]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP7]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) @@ -303,15 +299,15 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: vector.ph: ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -352,19 +348,15 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP1]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP7]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) @@ -384,15 +376,15 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED: vector.ph: ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -575,16 +567,10 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP6]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], splat (i8 -128) -; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], splat (i8 -1), splat (i8 1) -; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], splat (i8 -1) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[PREDPHI]], ptr align 1 [[TMP7]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 @@ -607,8 +593,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1 ; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128) -; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1) -; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]] +; FIXED-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1) +; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP2]] ; FIXED-NEXT: [[PREDPHI2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP9]], <32 x i8> [[WIDE_LOAD1]] ; FIXED-NEXT: store <32 x i8> [[PREDPHI2]], ptr [[TMP1]], align 1 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32