diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index bb9eed0e0ddb9..c89d9fae639a7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2635,6 +2635,12 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask, Intrinsic::vp_merge, {Plan->getTrue(), LHS, RHS, &EVL}, TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc()); + if (match(&CurRecipe, m_Select(m_RemoveMask(HeaderMask, Mask), m_VPValue(LHS), + m_VPValue(RHS)))) + return new VPWidenIntrinsicRecipe( + Intrinsic::vp_merge, {Mask, LHS, RHS, &EVL}, + TypeInfo.inferScalarType(LHS), CurRecipe.getDebugLoc()); + return nullptr; } diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index f88778b991b0b..8e71718061c9b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -276,14 +276,9 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP7]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vp.merge.nxv2i64( [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] @@ -360,14 +355,9 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP7]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vp.merge.nxv2i64( [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] @@ -581,15 +571,10 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.stepvector.nxv16i32() -; CHECK-NEXT: [[TMP15:%.*]] = icmp ult [[TMP6]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], splat (i8 -128) -; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], splat (i8 -1), splat (i8 1) +; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vp.merge.nxv16i8( [[TMP9]], splat (i8 -1), splat (i8 1), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[PREDPHI]], ptr align 1 [[TMP7]], splat (i1 true), i32 [[TMP12]])