diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7b77954e3a4ff..e860f83921857 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5809,48 +5809,40 @@ static InstructionCost getExtractWithExtendCost( return TTI.getExtractWithExtendCost(Opcode, Dst, VecTy, Index, CostKind); } -/// Correctly creates insert_subvector, checking that the index is multiple of -/// the subvectors length. Otherwise, generates shuffle using \p Generator or +/// Creates subvector insert. Generates shuffle using \p Generator or /// using default shuffle. static Value *createInsertVector( IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index, function_ref)> Generator = {}) { + if (isa(Vec) && isa(V)) + return Vec; const unsigned SubVecVF = getNumElements(V->getType()); - if (Index % SubVecVF == 0) { - Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, Index); - } else { - // Create shuffle, insertvector requires that index is multiple of - // the subvector length. - const unsigned VecVF = getNumElements(Vec->getType()); - SmallVector Mask(VecVF, PoisonMaskElem); - std::iota(Mask.begin(), Mask.end(), 0); - for (unsigned I : seq(SubVecVF)) - Mask[I + Index] = I + VecVF; - if (Generator) { - Vec = Generator(Vec, V, Mask); - } else { - // 1. Resize V to the size of Vec. - SmallVector ResizeMask(VecVF, PoisonMaskElem); - std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0); - V = Builder.CreateShuffleVector(V, ResizeMask); - Vec = Builder.CreateShuffleVector(Vec, V, Mask); - } + // Create shuffle, insertvector requires that index is multiple of + // the subvector length. + const unsigned VecVF = getNumElements(Vec->getType()); + SmallVector Mask(VecVF, PoisonMaskElem); + if (isa(Vec)) { + auto *Begin = std::next(Mask.begin(), Index); + std::iota(Begin, std::next(Begin, SubVecVF), 0); + Vec = Builder.CreateShuffleVector(V, Mask); + return Vec; } - return Vec; + std::iota(Mask.begin(), Mask.end(), 0); + std::iota(std::next(Mask.begin(), Index), + std::next(Mask.begin(), Index + SubVecVF), VecVF); + if (Generator) + return Generator(Vec, V, Mask); + // 1. Resize V to the size of Vec. + SmallVector ResizeMask(VecVF, PoisonMaskElem); + std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0); + V = Builder.CreateShuffleVector(V, ResizeMask); + // 2. Insert V into Vec. + return Builder.CreateShuffleVector(Vec, V, Mask); } -/// Correctly creates extract_subvector, checking that the index is multiple of -/// the subvectors length. Otherwise, generates shuffle using \p Generator or -/// using default shuffle. +/// Generates subvector extract using \p Generator or using default shuffle. static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec, unsigned SubVecVF, unsigned Index) { - if (Index % SubVecVF == 0) { - VectorType *SubVecTy = - getWidenedType(Vec->getType()->getScalarType(), SubVecVF); - return Builder.CreateExtractVector(SubVecTy, Vec, Index); - } - // Create shuffle, extract_subvector requires that index is multiple of - // the subvector length. SmallVector Mask(SubVecVF, PoisonMaskElem); std::iota(Mask.begin(), Mask.end(), Index); return Builder.CreateShuffleVector(Vec, Mask); @@ -16275,8 +16267,8 @@ Value *BoUpSLP::gather( assert(SLPReVec && "FixedVectorType is not expected."); Vec = createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy)); - auto *II = dyn_cast(Vec); - if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + auto *II = dyn_cast(Vec); + if (!II) return Vec; InsElt = II; } else { @@ -16296,6 +16288,28 @@ Value *BoUpSLP::gather( if (auto *SI = dyn_cast(Scalar)) UserOp = SI; } else { + if (V->getType()->isVectorTy()) { + if (auto *SV = dyn_cast(InsElt); + SV && SV->getOperand(0) != V && SV->getOperand(1) != V) { + // Find shufflevector, caused by resize. + auto FindOperand = [](Value *Vec, Value *V) -> Instruction * { + if (auto *SV = dyn_cast(Vec)) { + if (SV->getOperand(0) == V) + return SV; + if (SV->getOperand(1) == V) + return SV; + } + return nullptr; + }; + InsElt = nullptr; + if (Instruction *User = FindOperand(SV->getOperand(0), V)) + InsElt = User; + else if (Instruction *User = FindOperand(SV->getOperand(1), V)) + InsElt = User; + assert(InsElt && + "Failed to find shufflevector, caused by resize."); + } + } UserOp = InsElt; } if (UserOp) { @@ -16864,10 +16878,18 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { V, SimplifyQuery(*R.DL)); })); unsigned InsertionIndex = Idx * getNumElements(ScalarTy); + // Use scalar version of the SCalarType to correctly handle shuffles + // for revectorization. The revectorization mode operates by the + // vectors, but here we need to operate on the scalars, because the + // masks were already transformed for the vector elements and we don't + // need doing this transformation again. + Type *OrigScalarTy = ScalarTy; + ScalarTy = ScalarTy->getScalarType(); Vec = createInsertVector( Builder, Vec, V, InsertionIndex, std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2, _3)); + ScalarTy = OrigScalarTy; if (!CommonMask.empty()) { std::iota(std::next(CommonMask.begin(), Idx), std::next(CommonMask.begin(), Idx + E->getVectorFactor()), diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll index 5cb2c4530aa57..8e25c9c5547d6 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll @@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, ; ; SSE4-LABEL: @buildvector_mul_subadd_ps256( ; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] -; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] -; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> -; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> -; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> -; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> -; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP5:%.*]] = fsub <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> ; SSE4-NEXT: ret <8 x float> [[TMP6]] ; ; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256( ; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] -; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] -; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> -; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] ; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> -; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> +; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> ; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> ; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> ; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]] @@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> ; ; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512( ; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]] -; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]] -; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]] ; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> -; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> -; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP5]], <16 x i32> -; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6]], <16 x float> poison, <16 x i32> +; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <16 x i32> +; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP6]], <16 x i32> ; AVX_FMA-NEXT: ret <16 x float> [[TMP7]] ; ; AVX512-LABEL: @buildvector_mul_subadd_ps512( @@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> ; ; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512( ; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] -; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]] -; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> -; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]] ; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP5]], <8 x i32> -; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> +; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> +; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP6]], <8 x i32> ; AVX_FMA-NEXT: ret <8 x double> [[TMP7]] ; ; AVX512-LABEL: @buildvector_mul_subadd_pd512( diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll index 9f9e9d84108e6..9c615bb4757fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll @@ -12,9 +12,10 @@ define void @foo(ptr %0) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> poison, <4 x ptr> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> [[TMP11]], <4 x ptr> [[TMP5]], i64 4) -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP7]], <8 x ptr> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP5]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP12]], <8 x ptr> poison, <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x ptr> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]]) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll index 9327fe8995d45..8d44d03e0e5cc 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll @@ -11,7 +11,7 @@ define i32 @test(ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v6i64(<8 x i64> poison, <6 x i64> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8> ; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll index 7ae44c274ff6d..fcbe2d631ba8b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -164,7 +164,8 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y ; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T10]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T12]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]]) ; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP14]], [[SUM_032]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 6c5220d13b7a2..bb05440910130 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -420,27 +420,26 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]] -; TODO: Dead code must be removed below. ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4 -; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 1 -; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 +; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP4]] to i32 ; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 -; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 +; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 5 ; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX13_2]], align 1 ; CHECK-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP6]] to i32 @@ -454,17 +453,17 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX22_2]], align 1 ; CHECK-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP9]] to i32 ; CHECK-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 6 -; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 -; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP41]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 +; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 6 -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 -; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP42]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 +; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 3 -; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 -; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP43]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 +; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ARRAYIDX34_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 3 -; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 -; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP44]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 +; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-NEXT: [[ARRAYIDX37_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 7 ; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX37_2]], align 1 ; CHECK-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP14]] to i32 @@ -478,17 +477,17 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ADD_PTR64_2]], align 1 ; CHECK-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP17]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 4 -; CHECK-NEXT: [[TMP48:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP48]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP18]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 4 -; CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP19]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 1 -; CHECK-NEXT: [[TMP50:%.*]] = load i8, ptr [[ARRAYIDX8_3]], align 1 -; CHECK-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP50]] to i32 +; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX8_3]], align 1 +; CHECK-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP20]] to i32 ; CHECK-NEXT: [[ARRAYIDX10_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 1 -; CHECK-NEXT: [[TMP51:%.*]] = load i8, ptr [[ARRAYIDX10_3]], align 1 -; CHECK-NEXT: [[CONV11_3:%.*]] = zext i8 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP21:%.*]] = load i8, ptr [[ARRAYIDX10_3]], align 1 +; CHECK-NEXT: [[CONV11_3:%.*]] = zext i8 [[TMP21]] to i32 ; CHECK-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 5 ; CHECK-NEXT: [[TMP22:%.*]] = load i8, ptr [[ARRAYIDX13_3]], align 1 ; CHECK-NEXT: [[CONV14_3:%.*]] = zext i8 [[TMP22]] to i32 @@ -519,28 +518,35 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[ARRAYIDX39_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 7 ; CHECK-NEXT: [[TMP31:%.*]] = load i8, ptr [[ARRAYIDX39_3]], align 1 ; CHECK-NEXT: [[CONV40_3:%.*]] = zext i8 [[TMP31]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 -; CHECK-NEXT: [[TMP38:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP38]], <4 x i8> [[TMP4]], i64 4) -; CHECK-NEXT: [[TMP40:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP39]], <4 x i8> [[TMP1]], i64 8) -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP40]], <4 x i8> [[TMP5]], i64 12) -; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP45:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP46:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP45]], <4 x i8> [[TMP12]], i64 4) -; CHECK-NEXT: [[TMP47:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP46]], <4 x i8> [[TMP3]], i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP47]], <4 x i8> [[TMP13]], i64 12) -; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32> -; CHECK-NEXT: [[TMP20:%.*]] = mul <16 x i32> [[TMP11]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP20]]) -; CHECK-NEXT: ret i32 [[TMP21]] +; CHECK-NEXT: [[TMP32:%.*]] = load <4 x i8>, ptr [[P1]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = load <4 x i8>, ptr [[P2]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP36:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP32]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <4 x i8> [[TMP36]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP32]], <4 x i8> [[TMP36]], <16 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP42:%.*]] = shufflevector <16 x i8> [[TMP40]], <16 x i8> [[TMP41]], <16 x i32> +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP37]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i8> [[TMP42]], <16 x i8> [[TMP43]], <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = zext <16 x i8> [[TMP44]] to <16 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP47:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = shufflevector <4 x i8> [[TMP46]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> [[TMP46]], <16 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP35]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i8> [[TMP50]], <16 x i8> [[TMP51]], <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <4 x i8> [[TMP47]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <16 x i8> [[TMP52]], <16 x i8> [[TMP53]], <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = zext <16 x i8> [[TMP54]] to <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = mul <16 x i32> [[TMP45]], [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP56]]) +; CHECK-NEXT: ret i32 [[TMP57]] ; +; TODO: Dead code must be removed below. entry: %idx.ext = sext i32 %off1 to i64 %idx.ext63 = sext i32 %off2 to i64 @@ -1016,69 +1022,68 @@ define void @store_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]] -; TODO: Dead code must be removed below. ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP0]] to i32 ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1 +; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4 -; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1 +; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1 +; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 1 -; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 -; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1 +; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP4]] to i32 ; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 -; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1 +; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP5]] to i32 ; CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 5 -; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX13_2]], align 1 -; CHECK-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP38]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX13_2]], align 1 +; CHECK-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP6]] to i32 ; CHECK-NEXT: [[ARRAYIDX15_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 5 -; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX15_2]], align 1 -; CHECK-NEXT: [[CONV16_2:%.*]] = zext i8 [[TMP39]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX15_2]], align 1 +; CHECK-NEXT: [[CONV16_2:%.*]] = zext i8 [[TMP7]] to i32 ; CHECK-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 2 -; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[ARRAYIDX20_2]], align 1 -; CHECK-NEXT: [[CONV21_2:%.*]] = zext i8 [[TMP40]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[ARRAYIDX20_2]], align 1 +; CHECK-NEXT: [[CONV21_2:%.*]] = zext i8 [[TMP8]] to i32 ; CHECK-NEXT: [[ARRAYIDX22_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 2 -; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX22_2]], align 1 -; CHECK-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP41]] to i32 +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX22_2]], align 1 +; CHECK-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP9]] to i32 ; CHECK-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 6 -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 -; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP42]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX25_2]], align 1 +; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 6 -; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 -; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP43]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX27_2]], align 1 +; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP11]] to i32 ; CHECK-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 3 -; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 -; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP44]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX32_2]], align 1 +; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP12]] to i32 ; CHECK-NEXT: [[ARRAYIDX34_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 3 -; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 -; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP45]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX34_2]], align 1 +; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP13]] to i32 ; CHECK-NEXT: [[ARRAYIDX37_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 7 -; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[ARRAYIDX37_2]], align 1 -; CHECK-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP46]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX37_2]], align 1 +; CHECK-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP14]] to i32 ; CHECK-NEXT: [[ARRAYIDX39_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 7 -; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[ARRAYIDX39_2]], align 1 -; CHECK-NEXT: [[CONV40_2:%.*]] = zext i8 [[TMP47]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX39_2]], align 1 +; CHECK-NEXT: [[CONV40_2:%.*]] = zext i8 [[TMP15]] to i32 ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP48:%.*]] = load i8, ptr [[ADD_PTR_2]], align 1 -; CHECK-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 +; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ADD_PTR_2]], align 1 +; CHECK-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP16]] to i32 ; CHECK-NEXT: [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[ADD_PTR64_2]], align 1 -; CHECK-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP49]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ADD_PTR64_2]], align 1 +; CHECK-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP17]] to i32 ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 4 -; CHECK-NEXT: [[TMP50:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP50]] to i32 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP18]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_2]], i64 4 -; CHECK-NEXT: [[TMP51:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP51]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX5_3]], align 1 +; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP19]] to i32 ; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_2]], i64 1 ; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX8_3]], align 1 ; CHECK-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP20]] to i32 @@ -1118,32 +1123,33 @@ define void @store_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound ; CHECK-NEXT: [[DST4:%.*]] = getelementptr inbounds i32, ptr [[DST0:%.*]], i64 4 ; CHECK-NEXT: [[DST8:%.*]] = getelementptr inbounds i32, ptr [[DST0]], i64 8 ; CHECK-NEXT: [[DST12:%.*]] = getelementptr inbounds i32, ptr [[DST0]], i64 12 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[TMP1]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[P2]], align 1 -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP7]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP6]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 -; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i8> [[TMP12]] to <4 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = mul <4 x i32> [[TMP11]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 -; CHECK-NEXT: [[TMP16:%.*]] = zext <4 x i8> [[TMP15]] to <4 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i8> [[TMP17]] to <4 x i32> -; CHECK-NEXT: [[TMP19:%.*]] = mul <4 x i32> [[TMP16]], [[TMP18]] -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[DST0]], align 4 -; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[DST4]], align 4 -; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[DST8]], align 4 -; CHECK-NEXT: store <4 x i32> [[TMP19]], ptr [[DST12]], align 4 +; CHECK-NEXT: [[TMP32:%.*]] = load <4 x i8>, ptr [[P1]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = zext <4 x i8> [[TMP32]] to <4 x i32> +; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = zext <4 x i8> [[TMP34]] to <4 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = mul <4 x i32> [[TMP33]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = load <4 x i8>, ptr [[P2]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = zext <4 x i8> [[TMP37]] to <4 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1 +; CHECK-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = mul <4 x i32> [[TMP38]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = zext <4 x i8> [[TMP44]] to <4 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = mul <4 x i32> [[TMP43]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1 +; CHECK-NEXT: [[TMP48:%.*]] = zext <4 x i8> [[TMP47]] to <4 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1 +; CHECK-NEXT: [[TMP50:%.*]] = zext <4 x i8> [[TMP49]] to <4 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = mul <4 x i32> [[TMP48]], [[TMP50]] +; CHECK-NEXT: store <4 x i32> [[TMP36]], ptr [[DST0]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP41]], ptr [[DST4]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP46]], ptr [[DST8]], align 4 +; CHECK-NEXT: store <4 x i32> [[TMP51]], ptr [[DST12]], align 4 ; CHECK-NEXT: ret void ; +; TODO: Dead code must be removed below. entry: %idx.ext = sext i32 %off1 to i64 %idx.ext63 = sext i32 %off2 to i64 @@ -1422,29 +1428,41 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_2]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_2]], align 1 ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ADD_PTR_2]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP13]], <4 x i8> [[TMP4]], i64 4) -; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP14]], <4 x i8> [[TMP8]], i64 8) -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP15]], <4 x i8> [[TMP12]], i64 12) +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP16]], <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i8> [[TMP12]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP21]], <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = zext <16 x i8> [[TMP17]] to <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_2]], align 1 -; CHECK-NEXT: [[TMP22:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP20:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP22]], <4 x i8> [[TMP5]], i64 4) -; CHECK-NEXT: [[TMP21:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP20]], <4 x i8> [[TMP9]], i64 8) -; CHECK-NEXT: [[TMP24:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP21]], <4 x i8> [[TMP19]], i64 12) +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP5]], <16 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP30]], <16 x i8> [[TMP31]], <16 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = sub nsw <16 x i32> [[TMP18]], [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[TMP29:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP30:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP29]], <4 x i8> [[TMP6]], i64 4) -; CHECK-NEXT: [[TMP28:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP30]], <4 x i8> [[TMP10]], i64 8) -; CHECK-NEXT: [[TMP32:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP28]], <4 x i8> [[TMP27]], i64 12) +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP66:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP10]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP67]], <16 x i8> [[TMP35]], <16 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP27]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> ; CHECK-NEXT: [[TMP33:%.*]] = zext <16 x i8> [[TMP32]] to <16 x i32> ; CHECK-NEXT: [[TMP34:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[TMP35:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> poison, <4 x i8> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP36:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP35]], <4 x i8> [[TMP7]], i64 4) -; CHECK-NEXT: [[TMP37:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP36]], <4 x i8> [[TMP11]], i64 8) -; CHECK-NEXT: [[TMP39:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP37]], <4 x i8> [[TMP34]], i64 12) +; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP70:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i8> [[TMP70]], <16 x i8> [[TMP71]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = shufflevector <4 x i8> [[TMP34]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP72]], <16 x i8> [[TMP73]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32> ; CHECK-NEXT: [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]] ; CHECK-NEXT: [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll index 07411cacb3626..9562e6d41f7cd 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll @@ -29,14 +29,21 @@ define i64 @straight(ptr nocapture noundef readonly %p, i32 noundef %st) { ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr [[ADD_PTR_4]], align 2 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr [[ADD_PTR_5]], align 2 ; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr [[ADD_PTR_6]], align 2 -; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> poison, <8 x i16> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP8]], <8 x i16> [[TMP1]], i64 8) -; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP9]], <8 x i16> [[TMP2]], i64 16) -; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP10]], <8 x i16> [[TMP3]], i64 24) -; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP11]], <8 x i16> [[TMP4]], i64 32) -; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP12]], <8 x i16> [[TMP5]], i64 40) -; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP13]], <8 x i16> [[TMP6]], i64 48) -; CHECK-NEXT: [[TMP15:%.*]] = call <64 x i16> @llvm.vector.insert.v64i16.v8i16(<64 x i16> [[TMP14]], <8 x i16> [[TMP7]], i64 56) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <64 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <64 x i16> [[TMP10]], <64 x i16> [[TMP11]], <64 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <64 x i16> [[TMP12]], <64 x i16> [[TMP13]], <64 x i32> +; CHECK-NEXT: [[TMP83:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP84:%.*]] = shufflevector <64 x i16> [[TMP14]], <64 x i16> [[TMP83]], <64 x i32> +; CHECK-NEXT: [[TMP85:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <64 x i16> [[TMP84]], <64 x i16> [[TMP85]], <64 x i32> +; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <64 x i16> [[TMP86]], <64 x i16> [[TMP87]], <64 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <64 x i16> [[TMP88]], <64 x i16> [[TMP89]], <64 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = zext <64 x i16> [[TMP15]] to <64 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <64 x i32> [[TMP16]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <64 x i32> [[TMP16]], i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll index 8d4a1152fe4da..0e3d79900d435 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll @@ -554,8 +554,9 @@ define float @reduce_fast_float_case2(ptr %a, ptr %b) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[TMP0]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP0]], <8 x i32> ; CHECK-NEXT: [[RED3:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) ; CHECK-NEXT: ret float [[RED3]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll index a504f3ed02014..64bdcf28af550 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll @@ -15,7 +15,8 @@ define fastcc i64 @zot(float %arg, float %arg1, float %arg2, float %arg3, float ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> , [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[ARG3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x float> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP2]], <2 x float> [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[TMP6]], ; CHECK-NEXT: br i1 [[ARG6:%.*]], label [[BB18:%.*]], label [[BB57:%.*]] ; CHECK: bb18: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll index 4f88182374622..0783a28f56d85 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll @@ -13,7 +13,8 @@ define void @p(double %0) { ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> zeroinitializer, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> , <2 x double> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP9]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x double> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = fptosi <4 x double> [[TMP12]] to <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll index 2191d04cd797d..833bc56c4ec6b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll @@ -7,7 +7,8 @@ define void @vec3_vectorize_call(ptr %Colour, float %0) { ; NON-POWER-OF-2-NEXT: entry: ; NON-POWER-OF-2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4 ; NON-POWER-OF-2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2 -; NON-POWER-OF-2-NEXT: [[TMP4:%.*]] = call <3 x float> @llvm.vector.insert.v3f32.v2f32(<3 x float> [[TMP2]], <2 x float> [[TMP1]], i64 0) +; NON-POWER-OF-2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <3 x i32> +; NON-POWER-OF-2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> [[TMP3]], <3 x i32> ; NON-POWER-OF-2-NEXT: [[TMP5:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP4]], <3 x float> zeroinitializer, <3 x float> zeroinitializer) ; NON-POWER-OF-2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR]], align 4 ; NON-POWER-OF-2-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll index 61a944101586b..c728572313d77 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll @@ -253,13 +253,14 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) { ; CHECK-NEXT: [[S_8:%.*]] = select i1 [[CMP_8]], i8 [[TMP1]], i8 [[X:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[GEP_9]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[GEP_12]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP0]], <8 x i8> [[TMP4]], <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11 -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12) +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP15]], <16 x i8> [[TMP10]], <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1) ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> poison, <16 x i32> zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll index cd79250e8fb6b..b772e4be3b0aa 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll @@ -7,8 +7,9 @@ define void @test(ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[P]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i16>, ptr [[GEP2]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> poison, <2 x i16> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> [[TMP3]], <2 x i16> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> [[TMP2]], <4 x i32> ; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[P1]], align 2 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 15425c38bbb04..5ee9f3ca46ca8 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -88,7 +88,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP69:%.*]] = insertelement <4 x i8> poison, i8 [[TMP115]], i32 0 ; CHECK-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP71:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP70]], <2 x i8> [[TMP62]], i64 2) +; CHECK-NEXT: [[TMP117:%.*]] = shufflevector <2 x i8> [[TMP62]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <4 x i8> [[TMP70]], <4 x i8> [[TMP117]], <4 x i32> ; CHECK-NEXT: [[TMP72:%.*]] = zext <4 x i8> [[TMP71]] to <4 x i32> ; CHECK-NEXT: [[TMP73:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; CHECK-NEXT: [[TMP74:%.*]] = zext <4 x i8> [[TMP73]] to <4 x i32> @@ -112,7 +113,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]] ; CHECK-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]] ; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP94:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP93]], <4 x i32> [[TMP91]], i64 4) +; CHECK-NEXT: [[TMP118:%.*]] = shufflevector <4 x i32> [[TMP91]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP94:%.*]] = shufflevector <8 x i32> [[TMP93]], <8 x i32> [[TMP118]], <8 x i32> ; CHECK-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]] ; CHECK-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]] ; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> @@ -220,7 +222,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP68:%.*]] = shufflevector <4 x i32> [[TMP67]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP69:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0 ; THR15-NEXT: [[TMP70:%.*]] = insertelement <4 x i8> [[TMP69]], i8 [[TMP0]], i32 1 -; THR15-NEXT: [[TMP71:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP70]], <2 x i8> [[TMP62]], i64 2) +; THR15-NEXT: [[TMP116:%.*]] = shufflevector <2 x i8> [[TMP62]], <2 x i8> poison, <4 x i32> +; THR15-NEXT: [[TMP71:%.*]] = shufflevector <4 x i8> [[TMP70]], <4 x i8> [[TMP116]], <4 x i32> ; THR15-NEXT: [[TMP72:%.*]] = zext <4 x i8> [[TMP71]] to <4 x i32> ; THR15-NEXT: [[TMP73:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_3]], align 1 ; THR15-NEXT: [[TMP74:%.*]] = zext <4 x i8> [[TMP73]] to <4 x i32> @@ -244,7 +247,8 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]] ; THR15-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]] ; THR15-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> -; THR15-NEXT: [[TMP94:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP93]], <4 x i32> [[TMP91]], i64 4) +; THR15-NEXT: [[TMP117:%.*]] = shufflevector <4 x i32> [[TMP91]], <4 x i32> poison, <8 x i32> +; THR15-NEXT: [[TMP94:%.*]] = shufflevector <8 x i32> [[TMP93]], <8 x i32> [[TMP117]], <8 x i32> ; THR15-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]] ; THR15-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]] ; THR15-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll index cc88718484172..82c940353ba5a 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll @@ -14,8 +14,9 @@ define i16 @test(ptr %i) { ; CHECK: [[FOR_COND5_US]]: ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i64(ptr align 2 [[GEP_US154_2]], i64 4914, <4 x i1> splat (i1 true), i32 4) ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> [[TMP3]], i32 2, <4 x i1> splat (i1 true), <4 x i16> poison) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP6]], <4 x i16> [[TMP5]], i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[TMP5]], <4 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP8]], i16 0) ; CHECK-NEXT: ret i16 [[TMP9]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll index 9269a710c61d3..8e80aee7070a9 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll @@ -11,11 +11,12 @@ define void @test(ptr %c) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <8 x ptr> [[TMP1]], <8 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP2]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> [[TMP3]], i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i8> [[TMP5]], <8 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> ; CHECK-NEXT: br label %[[FOR_COND:.*]] ; CHECK: [[FOR_COND]]: ; CHECK-NEXT: [[A_PROMOTED2226:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[TMP8:%.*]], %[[FOR_COND]] ] -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP5]], i64 8) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP8]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP7]]) ; CHECK-NEXT: br label %[[FOR_COND]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll index 78b5acad0df9a..457f2600b539f 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll @@ -45,12 +45,14 @@ define float @test(ptr %x) { ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = call fast <8 x float> @llvm.vector.extract.v8f32.v16f32(<16 x float> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> poison, <8 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = fadd fast <8 x float> [[TMP5]], [[TMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[RDX_OP]], i64 0) -; CHECK-NEXT: [[RDX_OP4:%.*]] = call fast <4 x float> @llvm.vector.extract.v4f32.v16f32(<16 x float> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[RDX_OP]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> [[TMP6]], <16 x i32> +; CHECK-NEXT: [[RDX_OP4:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP5:%.*]] = fadd fast <4 x float> [[RDX_OP4]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = call fast <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> [[RDX_OP5]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[RDX_OP5]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> [[TMP9]], <16 x i32> ; CHECK-NEXT: [[OP_RDX1:%.*]] = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[TMP8]]) ; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] ; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll index 1e7cc9c268cfa..b6a40f0162bbd 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll @@ -12,7 +12,8 @@ define fastcc void @rephase(ptr %phases_in, ptr %157, i64 %158) { ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IMAG_1_251]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> , <2 x double> [[TMP3]], i64 2) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> , <4 x double> [[TMP8]], <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP5]], [[TMP6]] ; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[IMAG_247]], align 8 ; CHECK-NEXT: store double [[TMP2]], ptr [[PHASES_IN]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index db09843a6ef72..5bc2e94485432 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -1027,8 +1027,9 @@ define i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[TMP5]] @@ -1075,8 +1076,9 @@ define i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Y:%.*]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) ; CHECK-NEXT: ret i32 [[TMP5]] diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll index 258b0ec0bcfc7..f6e4643006816 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll @@ -17,12 +17,13 @@ define void @test1(<4 x float> %load6, <4 x float> %load7, <4 x float> %load8, < ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <4 x float> [[LOAD6:%.*]], <4 x float> [[LOAD7:%.*]], <4 x i32> ; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <4 x float> [[LOAD7]], <4 x float> [[LOAD8:%.*]], <4 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[VEXT165_I]], i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP0]], <4 x float> [[VEXT309_I]], i64 4) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[LOAD17:%.*]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[FMULADD7:%.*]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP5]], <4 x float> [[FMULADD16:%.*]], i64 4) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> [[VEXT165_I]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[VEXT309_I]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[LOAD17:%.*]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x float> [[FMULADD7:%.*]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[FMULADD16:%.*]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> [[TMP5]], <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[TMP6]]) ; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4 ; CHECK-NEXT: ret void @@ -55,12 +56,13 @@ define void @test2(<8 x float> %load6, <8 x float> %load7, <8 x float> %load8, < ; CHECK-NEXT: entry: ; CHECK-NEXT: [[VEXT165_I:%.*]] = shufflevector <8 x float> [[LOAD6:%.*]], <8 x float> [[LOAD7:%.*]], <8 x i32> ; CHECK-NEXT: [[VEXT309_I:%.*]] = shufflevector <8 x float> [[LOAD7]], <8 x float> [[LOAD8:%.*]], <8 x i32> -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[VEXT165_I]], i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> [[VEXT309_I]], i64 8) -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[LOAD17:%.*]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[FMULADD7:%.*]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP5]], <8 x float> [[FMULADD16:%.*]], i64 8) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x float> [[VEXT165_I]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[VEXT309_I]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[TMP0]], <16 x float> [[TMP2]], <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[LOAD17:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[FMULADD7:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[FMULADD16:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP8]], <16 x float> [[TMP5]], <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP4]], <16 x float> [[TMP6]]) ; CHECK-NEXT: store <16 x float> [[TMP7]], ptr [[OUT_PTR:%.*]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index c8517be755f21..da08718d5c248 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -8,8 +8,7 @@ define i32 @test() { ; CHECK-NEXT: br label [[IF_END_I87:%.*]] ; CHECK: if.end.i87: ; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> , <4 x i64> ), i32 4, <4 x i1> splat (i1 true), <4 x i32> poison) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> , <4 x i32> ; CHECK-NEXT: switch i32 0, label [[SW_BB509_I:%.*]] [ ; CHECK-NEXT: i32 1, label [[SW_BB509_I]] ; CHECK-NEXT: i32 0, label [[IF_THEN458_I:%.*]] @@ -51,21 +50,15 @@ define void @test2() { ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x float> [[TMP6]], <32 x float> [[TMP7]], <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x float> [[TMP10]], <32 x float> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double> -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> zeroinitializer, [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float> -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <32 x float> zeroinitializer, [[TMP15]] ; CHECK-NEXT: ret void ; entry: @@ -101,20 +94,17 @@ define void @test3(float %0) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY_LR_PH:%.*]] ; CHECK: for.body.lr.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP1]], <2 x float> zeroinitializer, i64 2) ; CHECK-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x float> [ zeroinitializer, [[FOR_BODY_LR_PH]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr null, align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x float> zeroinitializer, [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> poison, <2 x i1> splat (i1 true), i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP6]], <2 x i1> [[TMP5]], i64 2) -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP9]], <4 x float> [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i1> [[TMP5]], <2 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i1> , <4 x i1> [[TMP8]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7]] = select <4 x i1> [[TMP9]], <4 x float> [[TMP6]], <4 x float> zeroinitializer ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; entry: @@ -142,19 +132,20 @@ define ptr @test4() { ; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> -; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0) -; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2) +; POWEROF2-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; POWEROF2-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> +; POWEROF2-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP16]], <4 x i32> ; POWEROF2-NEXT: br label [[TMP8:%.*]] -; POWEROF2: 7: -; POWEROF2-NEXT: br label [[TMP8]] ; POWEROF2: 8: +; POWEROF2-NEXT: br label [[TMP8]] +; POWEROF2: 9: ; POWEROF2-NEXT: [[TMP9:%.*]] = phi <2 x float> [ poison, [[TMP7:%.*]] ], [ [[TMP4]], [[TMP0:%.*]] ] ; POWEROF2-NEXT: [[TMP10:%.*]] = phi <4 x float> [ poison, [[TMP7]] ], [ [[TMP6]], [[TMP0]] ] ; POWEROF2-NEXT: br label [[TMP11:%.*]] -; POWEROF2: 11: -; POWEROF2-NEXT: [[TMP12:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 0) +; POWEROF2: 12: +; POWEROF2-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer -; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2) +; POWEROF2-NEXT: [[TMP14:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]] ; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 0 ; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]] @@ -176,18 +167,19 @@ define ptr @test4() { ; NONPOWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer ; NONPOWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> ; NONPOWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <3 x i32> -; NONPOWEROF2-NEXT: [[TMP4:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> poison, <3 x float> [[TMP2]], i64 0) -; NONPOWEROF2-NEXT: [[TMP5:%.*]] = call <6 x float> @llvm.vector.insert.v6f32.v3f32(<6 x float> [[TMP4]], <3 x float> [[TMP3]], i64 3) +; NONPOWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> poison, <6 x i32> +; NONPOWEROF2-NEXT: [[TMP18:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <6 x i32> +; NONPOWEROF2-NEXT: [[TMP5:%.*]] = shufflevector <6 x float> [[TMP4]], <6 x float> [[TMP18]], <6 x i32> ; NONPOWEROF2-NEXT: br label [[TMP7:%.*]] -; NONPOWEROF2: 6: -; NONPOWEROF2-NEXT: br label [[TMP7]] ; NONPOWEROF2: 7: +; NONPOWEROF2-NEXT: br label [[TMP7]] +; NONPOWEROF2: 8: ; NONPOWEROF2-NEXT: [[TMP8:%.*]] = phi <6 x float> [ poison, [[TMP6:%.*]] ], [ [[TMP5]], [[TMP0:%.*]] ] ; NONPOWEROF2-NEXT: br label [[TMP9:%.*]] -; NONPOWEROF2: 9: -; NONPOWEROF2-NEXT: [[TMP10:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 0) +; NONPOWEROF2: 10: +; NONPOWEROF2-NEXT: [[TMP10:%.*]] = shufflevector <6 x float> [[TMP8]], <6 x float> poison, <3 x i32> ; NONPOWEROF2-NEXT: [[TMP11:%.*]] = fmul <3 x float> zeroinitializer, [[TMP10]] -; NONPOWEROF2-NEXT: [[TMP12:%.*]] = call <3 x float> @llvm.vector.extract.v3f32.v6f32(<6 x float> [[TMP8]], i64 3) +; NONPOWEROF2-NEXT: [[TMP12:%.*]] = shufflevector <6 x float> [[TMP8]], <6 x float> poison, <3 x i32> ; NONPOWEROF2-NEXT: [[TMP13:%.*]] = fmul <3 x float> zeroinitializer, [[TMP12]] ; NONPOWEROF2-NEXT: [[TMP14:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP11]]) ; NONPOWEROF2-NEXT: [[TMP15:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP13]]) @@ -235,21 +227,9 @@ define ptr @test4() { define i32 @test5() { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> [[TMP0]], <2 x double> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP3]], <2 x double> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP4]], <2 x double> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP5]], <2 x double> zeroinitializer, i64 6) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> poison, <2 x double> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v2f64(<8 x double> [[TMP7]], <2 x double> zeroinitializer, i64 6) -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> poison, <4 x double> [[TMP2]], i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> [[TMP8]], <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = fadd <8 x double> [[TMP6]], [[TMP10]] ; CHECK-NEXT: br label [[FOR_END47:%.*]] ; CHECK: for.end47: -; CHECK-NEXT: [[TMP12:%.*]] = phi <8 x double> [ [[TMP11]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <8 x double> [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll index 4dd659a7ae802..510cf45edbb52 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll @@ -20,10 +20,10 @@ define void @test(ptr %mdct_forward_x) { ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> splat (i1 true), <4 x float> poison) ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <3 x float> [[TMP6]], <3 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <3 x float> [[TMP5]], <3 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> , <4 x float> [[TMP22]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP11]], <2 x float> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = fsub <4 x float> [[TMP9]], [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd <4 x float> [[TMP9]], [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll index 9e6270376ddd4..0d1de729bf18c 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll @@ -13,7 +13,8 @@ define void @foo() { ; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], ptr @dct_luma, i64 0, i64 3, i64 0 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([4 x [4 x i32]], ptr @bar, i64 0, i64 3, i64 2), align 4 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP2]], <2 x i32> [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP7]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> , i32 [[ADD277]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], splat (i32 6) diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll index 5681fb7346124..dbeff25954085 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll @@ -16,8 +16,10 @@ define void @test(i32 %0, i64 %1, i32 %2, i32 %3, ptr %4) { ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <60 x i32> [[TMP14]], i32 [[TMP98]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <60 x i32> [[TMP15]], i32 [[TMP73]], i32 6 ; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <60 x i32> [[TMP16]], <60 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP17]], <2 x i32> [[TMP8]], i64 2) -; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP18]], <2 x i32> [[TMP8]], i64 4) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <60 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <60 x i32> [[TMP16]], <60 x i32> [[TMP22]], <8 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> [[TMP18]], <8 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP19]], <8 x i32> poison, <60 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = xor <60 x i32> [[TMP12]], [[TMP20]] ; CHECK-NEXT: [[TMP130:%.*]] = call i32 @llvm.vector.reduce.or.v60i32(<60 x i32> [[TMP21]]) diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll index 1dd6c7b81fb73..3f4436f33fad6 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll @@ -25,23 +25,19 @@ define void @e(<4 x i16> %0) { ; ; THRESH-LABEL: @e( ; THRESH-NEXT: entry: -; THRESH-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; THRESH-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP1]], <4 x i16> zeroinitializer, i64 4) -; THRESH-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 4) -; THRESH-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; THRESH-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP4]], <4 x i16> zeroinitializer, i64 4) -; THRESH-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP5]], <4 x i16> zeroinitializer, i64 8) -; THRESH-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP6]], <4 x i16> zeroinitializer, i64 12) ; THRESH-NEXT: br label [[VECTOR_BODY:%.*]] ; THRESH: vector.body: ; THRESH-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[VECTOR_BODY]] ] ; THRESH-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; THRESH-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP3]], <4 x i16> [[VEC_IND]], i64 0) -; THRESH-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP2]], [[TMP8]] +; THRESH-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[VEC_IND]], <4 x i16> poison, <8 x i32> +; THRESH-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> +; THRESH-NEXT: [[TMP3:%.*]] = add <8 x i16> zeroinitializer, [[TMP8]] ; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <16 x i32> -; THRESH-NEXT: [[TMP11:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP10]], <4 x i16> [[TMP0:%.*]], i64 4) -; THRESH-NEXT: [[TMP12:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP11]], <8 x i16> [[TMP9]], i64 8) -; THRESH-NEXT: [[TMP13:%.*]] = icmp sgt <16 x i16> [[TMP12]], [[TMP7]] +; THRESH-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0:%.*]], <4 x i16> poison, <16 x i32> +; THRESH-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> [[TMP5]], <16 x i32> +; THRESH-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <16 x i32> +; THRESH-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[TMP7]], <16 x i32> +; THRESH-NEXT: [[TMP13:%.*]] = icmp sgt <16 x i16> [[TMP9]], zeroinitializer ; THRESH-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP13]], <16 x i1> poison, <4 x i32> ; THRESH-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) ; THRESH-NEXT: [[TMP23:%.*]] = insertelement <4 x i1> poison, i1 [[TMP15]], i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll index 65e5458b25d2f..6be51062f6fa1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll @@ -13,7 +13,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x float> [[TMP5]] ; ; SLM-LABEL: @sitofp_uitofp( @@ -22,7 +23,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; ; AVX-LABEL: @sitofp_uitofp( @@ -77,7 +79,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @fptosi_fptoui( @@ -86,7 +89,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @fptosi_fptoui( @@ -143,7 +147,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SSE2-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SSE2-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SSE2-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -155,7 +160,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SLM-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SLM-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -233,7 +239,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @sext_zext( @@ -242,7 +249,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @sext_zext( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll index fad46870ec475..1db428706047a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -13,7 +13,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x float> [[TMP5]] ; ; SLM-LABEL: @sitofp_uitofp( @@ -22,7 +23,8 @@ define <8 x float> @sitofp_uitofp(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = uitofp <4 x i32> [[TMP3]] to <4 x float> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP6]], <4 x float> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; ; AVX-LABEL: @sitofp_uitofp( @@ -77,7 +79,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @fptosi_fptoui( @@ -86,7 +89,8 @@ define <8 x i32> @fptosi_fptoui(<8 x float> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = fptoui <4 x float> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @fptosi_fptoui( @@ -143,7 +147,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SSE2-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SSE2-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SSE2-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SSE2-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -155,7 +160,8 @@ define <8 x float> @fneg_fabs(<8 x float> %a) { ; SLM-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -2147483648) ; SLM-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP4]], splat (i32 2147483647) ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: [[DOTUNCASTED:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float> ; SLM-NEXT: ret <8 x float> [[DOTUNCASTED]] ; @@ -233,7 +239,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SSE2-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SSE2-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE2-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE2-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE2-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @sext_zext( @@ -242,7 +249,8 @@ define <8 x i32> @sext_zext(<8 x i16> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX-LABEL: @sext_zext( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll index 99b13bdc05082..06498563a7d37 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll @@ -15,7 +15,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -27,7 +28,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -39,7 +41,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -99,7 +102,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -111,7 +115,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -123,7 +128,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -135,7 +141,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX2-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX2-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX2-NEXT: ret <8 x float> [[TMP5]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll index 7f9475917b566..6275d984295c0 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -15,7 +15,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -27,7 +28,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -39,7 +41,8 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -99,7 +102,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SSE-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SSE-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SSE-NEXT: ret <8 x float> [[TMP5]] ; @@ -111,7 +115,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SLM-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; SLM-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; SLM-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; SLM-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; SLM-NEXT: ret <8 x float> [[TMP5]] ; @@ -123,7 +128,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX-NEXT: ret <8 x float> [[TMP5]] ; @@ -135,7 +141,8 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <4 x i32> ; AVX2-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[TMP11]], [[TMP4]] ; AVX2-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <8 x i32> -; AVX2-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP7]], <4 x float> [[TMP10]], i64 4) +; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP10]], <4 x float> poison, <8 x i32> +; AVX2-NEXT: [[TMP8:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> [[TMP12]], <8 x i32> ; AVX2-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <8 x i32> ; AVX2-NEXT: ret <8 x float> [[TMP5]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll index 11ab7770a5383..d02df1ac92b4d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll @@ -15,7 +15,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @add_sub_v8i32( @@ -26,7 +27,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @add_sub_v8i32( @@ -143,7 +145,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32( @@ -154,7 +157,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32( @@ -217,7 +221,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( @@ -226,7 +231,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32_const( @@ -592,7 +598,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP7]] ; ; SLM-LABEL: @add_sub_v8i32_splat( @@ -603,7 +610,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SLM-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP7]] ; ; AVX1-LABEL: @add_sub_v8i32_splat( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index 9589ec24d49d4..d9a7586ecd23d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -15,7 +15,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @add_sub_v8i32( @@ -26,7 +27,8 @@ define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @add_sub_v8i32( @@ -143,7 +145,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SSE-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32( @@ -154,7 +157,8 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { ; SLM-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP3]], [[TMP9]] ; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP10]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32( @@ -217,7 +221,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP5]] ; ; SLM-LABEL: @ashr_shl_v8i32_const( @@ -226,7 +231,8 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) { ; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3) ; SLM-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP6]], <4 x i32> [[TMP4]], i64 4) +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP7]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP5]] ; ; AVX1-LABEL: @ashr_shl_v8i32_const( @@ -592,7 +598,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SSE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SSE-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SSE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SSE-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SSE-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SSE-NEXT: ret <8 x i32> [[TMP7]] ; ; SLM-LABEL: @add_sub_v8i32_splat( @@ -603,7 +610,8 @@ define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { ; SLM-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> ; SLM-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP3]], [[TMP5]] ; SLM-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> -; SLM-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP6]], i64 4) +; SLM-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> +; SLM-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; SLM-NEXT: ret <8 x i32> [[TMP7]] ; ; AVX1-LABEL: @add_sub_v8i32_splat( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll b/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll index f2992cf044cd5..e1ee35217d187 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll @@ -40,9 +40,10 @@ define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) { ; SSE-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1 ; SSE-NEXT: [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0 ; SSE-NEXT: [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768 -; SSE-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> [[TMP3]], i64 0) +; SSE-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> ; SSE-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP10]], [[TMP6]] -; SSE-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP3]], <4 x i64> [[RDX_OP]], i64 0) +; SSE-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[RDX_OP]], <4 x i64> poison, <8 x i32> +; SSE-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP12]], <8 x i32> ; SSE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP11]]) ; SSE-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP16]], [[OR_13]] ; SSE-NEXT: [[OP_RDX5:%.*]] = or i64 [[OR_14]], [[OR_15]] @@ -75,9 +76,10 @@ define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) { ; AVX-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1 ; AVX-NEXT: [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0 ; AVX-NEXT: [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768 -; AVX-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> [[TMP3]], i64 0) +; AVX-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> ; AVX-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP10]], [[TMP6]] -; AVX-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP3]], <4 x i64> [[RDX_OP]], i64 0) +; AVX-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[RDX_OP]], <4 x i64> poison, <8 x i32> +; AVX-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP13]], <8 x i32> ; AVX-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP11]]) ; AVX-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP12]], [[OR_13]] ; AVX-NEXT: [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]] @@ -110,9 +112,10 @@ define i64 @bitmask_16xi8(ptr nocapture noundef readonly %src) { ; AVX512-NEXT: [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX_15]], align 1 ; AVX512-NEXT: [[TOBOOL_NOT_15:%.*]] = icmp eq i8 [[TMP9]], 0 ; AVX512-NEXT: [[OR_15:%.*]] = select i1 [[TOBOOL_NOT_15]], i64 0, i64 32768 -; AVX512-NEXT: [[TMP10:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.v8i64(<8 x i64> [[TMP3]], i64 0) +; AVX512-NEXT: [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> poison, <4 x i32> ; AVX512-NEXT: [[RDX_OP:%.*]] = or <4 x i64> [[TMP10]], [[TMP6]] -; AVX512-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP3]], <4 x i64> [[RDX_OP]], i64 0) +; AVX512-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[RDX_OP]], <4 x i64> poison, <8 x i32> +; AVX512-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP13]], <8 x i32> ; AVX512-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP11]]) ; AVX512-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP12]], [[OR_13]] ; AVX512-NEXT: [[OP_RDX2:%.*]] = or i64 [[OR_14]], [[OR_15]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll index 5d2f059a8cf41..ff0887cf12447 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll @@ -16,7 +16,8 @@ define void @test(ptr %0, i64 %1, i64 %2) { ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i64> poison, i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x i64> [[TMP9]], i64 [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP11]], <4 x i64> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP16]], <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP8]] ; CHECK-NEXT: br [[DOTPREHEADER_US_US:label %.*]] ; CHECK: [[_PREHEADER_US_US:.*:]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll index 7ed5f33c9dc6c..07fdc9d8dd2fa 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll @@ -17,7 +17,8 @@ define void @test() { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[CALL]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP4]], <4 x i1> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll index fa46bd3d83249..c8748f316f024 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll @@ -24,7 +24,8 @@ define void @test(ptr %0, i32 %add651) { ; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[ADD651]], i32 0 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP13]], <2 x i32> [[TMP10]], i64 2) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> [[TMP19]], <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], splat (i32 1) ; CHECK-NEXT: [[SHR685:%.*]] = lshr i32 [[TMP2]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll index 9d48e7f8a787a..bfb623ac5a9b9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll @@ -34,7 +34,8 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) { ; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP19]], zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> [[TMP24]], <4 x i32> -; CHECK-NEXT: [[TMP26:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> , <2 x float> [[TMP22]], i64 2) +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x float> , <4 x float> [[TMP28]], <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP25]], [[TMP26]] ; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[CALL25]], align 4 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll index 55fe7d6ed52e5..77585965d68e9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll @@ -16,7 +16,8 @@ define i32 @test() { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> , <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v2i1(<8 x i1> , <2 x i1> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i1> , <8 x i1> [[TMP14]], <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i32> zeroinitializer, <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i32> [[TMP5]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP13]] to <8 x i8> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll index 20d7ba99fd515..3bf73034a1718 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll @@ -17,7 +17,8 @@ define i32 @test(ptr %c, i16 %a, i16 %0) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i16 [[A]], -2 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i1> [[TMP10]], i1 [[TMP9]], i32 7 -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP11]], <4 x i1> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i1> [[TMP8]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> [[TMP17]], <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = freeze <8 x i1> [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP13]]) ; CHECK-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll index 0e08ef4d74308..18e03df0fbcc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll @@ -10,7 +10,7 @@ define i32 @test() { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> , <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP4]], <4 x i64> [[TMP0]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> @@ -19,9 +19,10 @@ define i32 @test() { ; CHECK-NEXT: [[TMP8:%.*]] = add <16 x i32> [[TMP7]], zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: [[INC_3_3_I_1:%.*]] = or i64 [[TMP9]], 0 -; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.v16i32(<16 x i32> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = or <8 x i32> [[TMP16]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP8]], <8 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[RDX_OP]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> [[TMP18]], <16 x i32> ; CHECK-NEXT: [[OP_RDX:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP17]]) ; CHECK-NEXT: ret i32 [[OP_RDX]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll index 992909fb3e87f..15ba98f90f0b8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll @@ -15,8 +15,9 @@ define i32 @test() { ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[G_228_PROMOTED166_I1105_I]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v12i32(<16 x i32> poison, <12 x i32> [[TMP3]], i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP6]], <16 x i32> [[TMP8]], <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <12 x i32> [[TMP3]], <12 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP17]], <16 x i32> [[TMP8]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt <16 x i32> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP13:%.*]] = icmp ult <16 x i32> [[TMP11]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP12]], <16 x i1> [[TMP13]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll index 2a54ae9a1e749..ce65f532e0b3b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll @@ -11,6 +11,7 @@ define i32 @test(i64 %l.549) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[L_549]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP8]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: br label %[[IF_THEN19:.*]] ; CHECK: [[P:.*]]: ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ zeroinitializer, %[[IF_END29:.*]] ], [ [[TMP13:%.*]], %[[IF_END25:.*]] ] @@ -18,20 +19,21 @@ define i32 @test(i64 %l.549) { ; CHECK-NEXT: br i1 false, label %[[S:.*]], label %[[Q:.*]] ; CHECK: [[Q]]: ; CHECK-NEXT: [[XOR39:%.*]] = phi i64 [ 0, %[[P]] ], [ 0, %[[LAND_LHS_TRUE:.*]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i64> [ zeroinitializer, %[[P]] ], [ zeroinitializer, %[[LAND_LHS_TRUE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i64> [ zeroinitializer, %[[P]] ], [ zeroinitializer, %[[LAND_LHS_TRUE]] ] ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[XOR39]], i32 2 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP4]], <2 x i64> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i64> [[TMP16]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP18]], <4 x i32> ; CHECK-NEXT: br i1 false, label %[[LOR_LHS_FALSE:.*]], label %[[R:.*]] ; CHECK: [[LOR_LHS_FALSE]]: ; CHECK-NEXT: br i1 false, label %[[LAND_LHS_TRUE]], label %[[S]] ; CHECK: [[R]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i64> [ [[TMP7]], %[[Q]] ], [ [[TMP16:%.*]], %[[IF_THEN19]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i64> [ [[TMP19]], %[[Q]] ], [ [[TMP20:%.*]], %[[IF_THEN19]] ] ; CHECK-NEXT: br i1 false, label %[[S]], label %[[LAND_LHS_TRUE]] ; CHECK: [[LAND_LHS_TRUE]]: -; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i64> [ [[TMP18]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i64> [ [[TMP21]], %[[R]] ], [ zeroinitializer, %[[LOR_LHS_FALSE]] ] ; CHECK-NEXT: br i1 false, label %[[Q]], label %[[S]] ; CHECK: [[S]]: -; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP19]], %[[LAND_LHS_TRUE]] ], [ [[TMP18]], %[[R]] ], [ [[TMP7]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP22]], %[[LAND_LHS_TRUE]] ], [ [[TMP21]], %[[R]] ], [ [[TMP19]], %[[LOR_LHS_FALSE]] ], [ [[TMP17]], %[[P]] ] ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> poison, <2 x i32> ; CHECK-NEXT: br label %[[IF_THEN19]] ; CHECK: [[IF_THEN19]]: @@ -39,7 +41,7 @@ define i32 @test(i64 %l.549) { ; CHECK-NEXT: [[TMP13]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <2 x i32> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP14]], <4 x i64> [[TMP9]], <4 x i32> -; CHECK-NEXT: [[TMP16]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP15]], <2 x i64> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP20]] = shufflevector <4 x i64> [[TMP15]], <4 x i64> [[TMP6]], <4 x i32> ; CHECK-NEXT: br i1 false, label %[[R]], label %[[IF_END25]] ; CHECK: [[IF_END25]]: ; CHECK-NEXT: br i1 false, label %[[IF_END29]], label %[[P]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll index 19c29be1ef384..4f62a8d24387f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll @@ -18,7 +18,8 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) { ; CHECK-NEXT: [[TMP8:%.*]] = load <2 x float>, ptr [[RC21]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 2 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP9]], <2 x float> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP11]], <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = fcmp olt <4 x float> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <4 x float> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP14]], <4 x float> [[TMP5]], <4 x float> zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll index 2484a2d2193fc..eaa77d74f8df1 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -605,9 +605,10 @@ define float @loadadd31(ptr nocapture readonly %x) { ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 ; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 -; CHECK-NEXT: [[RDX_OP2:%.*]] = call fast <4 x float> @llvm.vector.extract.v4f32.v24f32(<24 x float> [[TMP0]], i64 0) +; CHECK-NEXT: [[RDX_OP2:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP3:%.*]] = fadd fast <4 x float> [[RDX_OP2]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = call fast <24 x float> @llvm.vector.insert.v24f32.v4f32(<24 x float> [[TMP0]], <4 x float> [[RDX_OP3]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RDX_OP3]], <4 x float> poison, <24 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> [[TMP6]], <24 x i32> ; CHECK-NEXT: [[OP_RDX1:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] ; CHECK-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] @@ -623,9 +624,10 @@ define float @loadadd31(ptr nocapture readonly %x) { ; THRESHOLD-NEXT: [[TMP3:%.*]] = load float, ptr [[ARRAYIDX_28]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, ptr [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX_29]], align 4 -; THRESHOLD-NEXT: [[RDX_OP2:%.*]] = call fast <4 x float> @llvm.vector.extract.v4f32.v24f32(<24 x float> [[TMP0]], i64 0) +; THRESHOLD-NEXT: [[RDX_OP2:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> poison, <4 x i32> ; THRESHOLD-NEXT: [[RDX_OP3:%.*]] = fadd fast <4 x float> [[RDX_OP2]], [[TMP2]] -; THRESHOLD-NEXT: [[TMP5:%.*]] = call fast <24 x float> @llvm.vector.insert.v24f32.v4f32(<24 x float> [[TMP0]], <4 x float> [[RDX_OP3]], i64 0) +; THRESHOLD-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[RDX_OP3]], <4 x float> poison, <24 x i32> +; THRESHOLD-NEXT: [[TMP5:%.*]] = shufflevector <24 x float> [[TMP0]], <24 x float> [[TMP6]], <24 x i32> ; THRESHOLD-NEXT: [[OP_RDX1:%.*]] = call fast float @llvm.vector.reduce.fadd.v24f32(float 0.000000e+00, <24 x float> [[TMP5]]) ; THRESHOLD-NEXT: [[OP_RDX2:%.*]] = fadd fast float [[OP_RDX1]], [[TMP3]] ; THRESHOLD-NEXT: [[OP_RDX3:%.*]] = fadd fast float [[OP_RDX2]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index ca662b838938f..b7bd3e41b0d29 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -977,9 +977,12 @@ define i32 @maxi8_wrong_parent(i32) { ; SSE4: pp: ; SSE4-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8 ; SSE4-NEXT: [[TMP8:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 -; SSE4-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP4]], i64 0) -; SSE4-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP8]], i64 4) -; SSE4-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6) +; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <8 x i32> +; SSE4-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> +; SSE4-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP10]], <8 x i32> +; SSE4-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; SSE4-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP9]], <8 x i32> ; SSE4-NEXT: [[OP_RDX7:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]]) ; SSE4-NEXT: ret i32 [[OP_RDX7]] ; @@ -989,8 +992,9 @@ define i32 @maxi8_wrong_parent(i32) { ; AVX: pp: ; AVX-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8 ; AVX-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP7]], i64 0) -; AVX-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP5]], <2 x i32> [[TMP2]], i64 2) +; AVX-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> +; AVX-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> [[TMP2]], <4 x i32> ; AVX-NEXT: [[RDX_OP:%.*]] = icmp sgt <4 x i32> [[TMP4]], [[TMP6]] ; AVX-NEXT: [[RDX_OP1:%.*]] = select <4 x i1> [[RDX_OP]], <4 x i32> [[TMP4]], <4 x i32> [[TMP6]] ; AVX-NEXT: [[OP_RDX7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_OP1]]) @@ -1002,9 +1006,12 @@ define i32 @maxi8_wrong_parent(i32) { ; THRESH: pp: ; THRESH-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 2), align 8 ; THRESH-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr getelementptr inbounds ([32 x i32], ptr @arr, i64 0, i64 6), align 8 -; THRESH-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0) -; THRESH-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP4]], i64 4) -; THRESH-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP2]], i64 6) +; THRESH-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> +; THRESH-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> +; THRESH-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> +; THRESH-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP10]], <8 x i32> +; THRESH-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; THRESH-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP11]], <8 x i32> [[TMP9]], <8 x i32> ; THRESH-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP7]]) ; THRESH-NEXT: ret i32 [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll index d6f0b7692bdd9..f07424f0d2934 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll @@ -20,8 +20,10 @@ define i32 @test() { ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <64 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <64 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <64 x i32> [[TMP13]], <64 x i32> [[TMP15]], <64 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v24i32(<64 x i32> [[TMP16]], <24 x i32> [[TMP6]], i64 24) -; CHECK-NEXT: [[TMP18:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v16i32(<64 x i32> [[TMP17]], <16 x i32> [[TMP4]], i64 16) +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <24 x i32> [[TMP6]], <24 x i32> poison, <64 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <64 x i32> [[TMP16]], <64 x i32> [[TMP15]], <64 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <64 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <64 x i32> [[TMP27]], <64 x i32> [[TMP28]], <64 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq <64 x i32> zeroinitializer, [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp ne <64 x i32> zeroinitializer, [[TMP18]] ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <64 x i1> [[TMP19]], <64 x i1> [[TMP20]], <64 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll index 80b62c3cfffac..0fddb7322e9b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll @@ -15,14 +15,17 @@ define <16 x double> @test(ptr %x, double %v, double %a) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> poison, double [[V]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v6f64(<16 x double> poison, <6 x double> [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <6 x double> [[TMP1]], <6 x double> poison, <16 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP3]], <4 x double> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP12]], <2 x double> [[TMP6]], i64 6) -; CHECK-NEXT: [[TMP14:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP13]], <2 x double> [[TMP7]], i64 8) -; CHECK-NEXT: [[TMP15:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP14]], <2 x double> [[TMP9]], i64 10) -; CHECK-NEXT: [[TMP16:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP15]], <2 x double> [[TMP9]], i64 12) -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x double> @llvm.vector.insert.v16f64.v2f64(<16 x double> [[TMP16]], <2 x double> [[TMP9]], i64 14) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x double> [[TMP10]], <16 x double> [[TMP11]], <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x double> [[TMP12]], <16 x double> [[TMP13]], <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x double> [[TMP14]], <16 x double> [[TMP15]], <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x double> [[TMP16]], <16 x double> [[TMP20]], <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <16 x double> [[TMP21]], <16 x double> [[TMP20]], <16 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <16 x double> [[TMP19]], <16 x double> [[TMP20]], <16 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = fadd <16 x double> [[TMP5]], [[TMP17]] ; CHECK-NEXT: ret <16 x double> [[TMP18]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll index 54c950a078502..48b657e8bf6e5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll @@ -11,7 +11,8 @@ define void @inst_size(ptr %a, <2 x i64> %b) { ; CHECK-NEXT: [[TMPL4:%.*]] = load i64, ptr [[PTR4]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[B:%.*]], <2 x i64> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMPL1]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> [[TMP2]], <2 x i64> [[TMP0]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> zeroinitializer, [[TMP3]] ; CHECK-NEXT: [[T45:%.*]] = icmp sgt i64 0, [[TMPL4]] ; CHECK-NEXT: br label [[BLOCK:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll index d6552adbd4abf..6c729d17c1a9b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll @@ -29,14 +29,15 @@ define void @test(i32 %arg) personality ptr null { ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[PHI6]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[PHI7]], i32 3 ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP8]], <4 x i32> [[TMP7]], i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; CHECK-NEXT: br label %[[BB11:.*]] ; CHECK: [[BB9:.*]]: ; CHECK-NEXT: [[LANDINGPAD10:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup ; CHECK-NEXT: br label %[[BB11]] ; CHECK: [[BB11]]: -; CHECK-NEXT: [[TMP10:%.*]] = phi <8 x i32> [ poison, %[[BB9]] ], [ [[TMP9]], %[[BB5]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi <8 x i32> [ poison, %[[BB9]] ], [ [[TMP10]], %[[BB5]] ] ; CHECK-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll index ccb7e9b514cf1..842bd6c6bec37 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll @@ -108,9 +108,10 @@ define i64 @test_3() #0 { ; CHECK-NEXT: [[VAL4:%.*]] = extractelement <28 x i32> [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> poison, i32 [[VAL4]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = call <28 x i32> @llvm.vector.extract.v28i32.v32i32(<32 x i32> [[TMP1]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <28 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = mul <28 x i32> [[TMP5]], [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v28i32(<32 x i32> [[TMP1]], <28 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <28 x i32> [[RDX_OP]], <28 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> [[TMP7]], <32 x i32> ; CHECK-NEXT: [[OP_RDX27:%.*]] = call i32 @llvm.vector.reduce.mul.v32i32(<32 x i32> [[TMP6]]) ; CHECK-NEXT: [[VAL64:%.*]] = add i32 3, [[OP_RDX27]] ; CHECK-NEXT: [[VAL65:%.*]] = sext i32 [[VAL64]] to i64 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll index 289c6002851d7..f56af934f19f5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll @@ -22,9 +22,12 @@ define i32 @test(i32 %s.0) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP22]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP8]], <2 x i32> [[TMP2]], i64 2) -; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP9]], <2 x i32> [[TMP3]], i64 4) -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP10]], <2 x i32> [[TMP5]], i64 6) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <8 x i32> [[TMP27]], <8 x i32> [[TMP30]], <8 x i32> ; CHECK-NEXT: br i1 false, label %[[IF_END24:.*]], label %[[IF_THEN11:.*]] ; CHECK: [[IF_THEN11]]: ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> @@ -37,11 +40,11 @@ define i32 @test(i32 %s.0) { ; CHECK: [[IF_THEN18:.*]]: ; CHECK-NEXT: br label %[[T]] ; CHECK: [[T]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <8 x i32> [ [[TMP27:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi <8 x i32> [ [[TMP33:%.*]], %[[O]] ], [ poison, %[[IF_THEN18]] ] ; CHECK-NEXT: [[TMP17]] = extractelement <4 x i32> [[TMP23:%.*]], i32 0 ; CHECK-NEXT: br i1 false, label %[[IF_END24]], label %[[K]] ; CHECK: [[IF_END24]]: -; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP11]], %[[IF_END6]] ], [ [[TMP30]], %[[T]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi <8 x i32> [ [[TMP29]], %[[IF_THEN11]] ], [ [[TMP31]], %[[IF_END6]] ], [ [[TMP34]], %[[T]] ] ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i32> [[TMP18]], <8 x i32> poison, <4 x i32> @@ -52,7 +55,8 @@ define i32 @test(i32 %s.0) { ; CHECK-NEXT: [[TMP24:%.*]] = phi <4 x i32> [ zeroinitializer, %[[K]] ], [ [[TMP21]], %[[IF_END24]] ] ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i32> [[TMP25]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP27]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP26]], <4 x i32> [[TMP23]], i64 4) +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP33]] = shufflevector <8 x i32> [[TMP26]], <8 x i32> [[TMP32]], <8 x i32> ; CHECK-NEXT: [[TMP28]] = extractelement <4 x i32> [[TMP24]], i32 3 ; CHECK-NEXT: br i1 false, label %[[T]], label %[[IF_END6]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll index ea497c95d4114..1abc8102dc332 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -22,7 +22,8 @@ define i32 @bar() local_unnamed_addr { ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <8 x i32> [[TMP7]], [[TMP9]] ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP18]], <8 x i32> [[TMP10]], i64 8) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP18]], <16 x i32> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], splat (i32 15) ; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i32> [[TMP12]], splat (i32 65537) ; CHECK-NEXT: [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], splat (i32 65535) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll index 76104efc1bb78..6da0ecef5cd96 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll @@ -8,7 +8,7 @@ define void @test(i64 %d.promoted.i) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[D_PROMOTED_I]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i1> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v2i1(<16 x i1> poison, <2 x i1> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i1> [[TMP4]], <16 x i1> , <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i1> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP6]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll index f7d78be4f13ca..a9f2ed61d9ee4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll @@ -18,8 +18,8 @@ define i64 @test() { ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <32 x i32> , i32 0, i32 6 -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> , i64 24) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i32> [[TMP0]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> , <32 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sub <32 x i32> zeroinitializer, [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> [[TMP3]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP4]], 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll index e9a65bf6d6f0d..7df97492b874b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll @@ -10,7 +10,8 @@ define i1 @foo() { ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> , <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> , <4 x i1> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> , <8 x i1> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 false, i1 [[TMP5]], i1 false diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll index 4ad02d47fb385..f1bd3384f0488 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll @@ -6,7 +6,7 @@ define i64 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[OR54_I_I_6:%.*]] = or i32 0, 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8 -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> , <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll index 355f5306ee4db..04359eb6fcd7c 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll @@ -16,8 +16,10 @@ define void @e(ptr %c, i64 %0) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x ptr> [[TMP5]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <6 x ptr> poison, ptr [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <6 x ptr> [[TMP7]], ptr [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP8]], <2 x ptr> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = call <6 x ptr> @llvm.vector.insert.v6p0.v2p0(<6 x ptr> [[TMP9]], <2 x ptr> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x ptr> [[TMP4]], <2 x ptr> poison, <6 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <6 x ptr> [[TMP8]], <6 x ptr> [[TMP19]], <6 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <2 x ptr> [[TMP6]], <2 x ptr> poison, <6 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <6 x ptr> [[TMP20]], <6 x ptr> [[TMP21]], <6 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint <6 x ptr> [[TMP10]] to <6 x i64> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <6 x i64> [[TMP11]], <6 x i64> poison, <32 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll index 70b7f14a3a2c9..1fedde4cc9fd7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll @@ -7,7 +7,7 @@ define void @test() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <12 x i64> , i64 [[XOR108_I_I_I]], i32 10 ; CHECK-NEXT: [[TMP2:%.*]] = lshr <12 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v12i64(<16 x i64> poison, <12 x i64> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <12 x i64> [[TMP2]], <12 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll index 382d6ae0e0a6f..652abef14771d 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll @@ -12,11 +12,11 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = zext i1 false to i64 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> , <2 x i64> [[TMP2]], i64 2) -; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> , <2 x i64> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> , <4 x i64> [[TMP3]], <4 x i32> ; CHECK-NEXT: br i1 false, label %[[BB5]], label %[[BB2:.*]] ; CHECK: [[BB5]]: -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i64> [ [[TMP3]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i64> [ [[TMP4]], %[[BB1]] ], [ poison, %[[IF_THEN_I_I]] ] ; CHECK-NEXT: br label %[[BB2]] ; CHECK: [[BB2]]: ; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i64> [ [[TMP6]], %[[BB5]] ], [ [[TMP4]], %[[BB1]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll index eaf7bb2c9fdce..98ea4db6f6492 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll @@ -17,7 +17,8 @@ define i64 @test(i256 %0, { i32, i1 } %1) { ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP7]], i32 2 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP13]], <4 x i32> [[TMP12]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x i32> [[TMP13]], <8 x i32> [[TMP19]], <8 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i32> [[TMP14]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i1> [[TMP15]] to i8 ; CHECK-NEXT: [[TMP17:%.*]] = call i8 @llvm.ctpop.i8(i8 [[TMP16]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll index 232e458504188..7206293444d55 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll @@ -11,7 +11,8 @@ define void @test() { ; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i1> [ poison, %[[CONT221_THREAD781]] ], [ zeroinitializer, %[[ENTRY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i1> [[TMP0]], <2 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> , <4 x i1> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> , <8 x i1> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> [[TMP5]]) ; CHECK-NEXT: [[OP_RDX:%.*]] = or i64 0, [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll index 048d2814b9abb..d62623047763f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll @@ -8,19 +8,20 @@ define void @test(ptr %0, i1 %1, i1 %2) { ; CHECK: [[BB4]]: ; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ [[TMP12:%.*]], %[[TMP7:.*]] ], [ zeroinitializer, [[TMP3:%.*]] ] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: br i1 [[TMP1]], label %[[TMP7]], label %[[BB14:.*]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[TMP7]], label %[[BB15:.*]] ; CHECK: [[TMP7]]: ; CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x i32>, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = or <2 x i32> [[TMP10]], splat (i32 1) ; CHECK-NEXT: [[TMP12]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> , <2 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> , <2 x i32> [[TMP11]], i64 2) -; CHECK-NEXT: br i1 [[TMP2]], label %[[BB15:.*]], label %[[BB4]] -; CHECK: [[BB14]]: -; CHECK-NEXT: br label %[[BB15]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP13]], <4 x i32> +; CHECK-NEXT: br i1 [[TMP2]], label %[[BB16:.*]], label %[[BB4]] ; CHECK: [[BB15]]: -; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP6]], %[[BB14]] ], [ [[TMP13]], %[[TMP7]] ] +; CHECK-NEXT: br label %[[BB16]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP6]], %[[BB15]] ], [ [[TMP14]], %[[TMP7]] ] ; CHECK-NEXT: [[TMP17:%.*]] = load volatile ptr, ptr null, align 8 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i64 176 ; CHECK-NEXT: store <4 x i32> [[TMP16]], ptr [[TMP18]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll index 5baa5f3cdcdae..e35491823cc55 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll @@ -4,10 +4,7 @@ define i16 @test() { ; CHECK-LABEL: define i16 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i16> [[TMP0]], zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> zeroinitializer, <4 x i16> [[RDX_OP]], i64 0) -; CHECK-NEXT: [[OP_RDX:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP1]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> zeroinitializer) ; CHECK-NEXT: [[OP_RDX1:%.*]] = or i16 [[OP_RDX]], 0 ; CHECK-NEXT: ret i16 [[OP_RDX1]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index 81da11dc42e88..1904540c23146 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -210,7 +210,8 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42) ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP3]], <4 x i1> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) ; CHECK-NEXT: ret i1 [[TMP6]] @@ -244,7 +245,8 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) { ; CHECK-NEXT: call void @use1(i1 [[TMP5]]) ; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> poison, <8 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP8]], <4 x i1> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP8]], <8 x i1> [[TMP9]], <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = freeze <8 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP6]]) ; CHECK-NEXT: ret i1 [[TMP7]] @@ -316,7 +318,8 @@ define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> , <4 x i32> [[TMP2]], i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP7]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) @@ -392,7 +395,7 @@ define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) { ; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[X]], i64 0) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <8 x i32> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <8 x i32> [[TMP3]], [[TMP4]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll index 799533824c5aa..fe5f4deecb8b3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll @@ -9,19 +9,16 @@ define i32 @test(i32 %arg) { ; CHECK-NEXT: br label %[[BB1:.*]] ; CHECK: [[BB1]]: ; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[OP_RDX:%.*]], %[[BB1]] ] -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.vector.insert.v4i64.v2i64(<4 x i64> , <2 x i64> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i64> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i64> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> zeroinitializer, [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul <2 x i32> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP7]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = mul <4 x i32> [[TMP11]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP7]], <4 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[RDX_OP]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP7]], <8 x i32> [[TMP14]], <8 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP12]]) ; CHECK-NEXT: [[OP_RDX]] = mul i32 0, [[TMP13]] ; CHECK-NEXT: br label %[[BB1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll index 8aaa71ef47a8c..c258c7d54df82 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll @@ -20,7 +20,8 @@ define <4 x float> @test(ptr %x, float %v, float %a) { ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[V]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]] ; CHECK-NEXT: ret <4 x float> [[TMP8]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll index 02058b1fe8578..19ce11c457f63 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll @@ -15,7 +15,8 @@ define void @test() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP6]], <2 x i32> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr <4 x i32> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> @@ -75,7 +76,8 @@ define void @test1() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP9]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP12]], <2 x i32> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP14]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer @@ -137,7 +139,8 @@ define void @test_div() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP11]], <2 x i32> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = udiv <4 x i32> [[TMP9]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> @@ -197,7 +200,8 @@ define void @test_rem() { ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 2 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP11]], <2 x i32> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = urem <4 x i32> [[TMP9]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll index af9d808f45fa1..3f6ec8ccad4ee 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll @@ -56,7 +56,8 @@ define void @test(i32 %j.6, i32 %m.4, i8 %v.5, ptr %a, i1 %tobool14.not) { ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[SUB13]], i32 1 ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP27]], <4 x i32> [[TMP23]], i64 4) +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <8 x i32> [[TMP27]], <8 x i32> [[TMP34]], <8 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i32> [[TMP28]], <8 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x i32> [[TMP29]], <2 x i32> , <2 x i32> ; CHECK-NEXT: br i1 [[TOBOOL14_NOT]], label %[[IF_END18]], label %[[Q]] @@ -68,11 +69,13 @@ define void @test(i32 %j.6, i32 %m.4, i8 %v.5, ptr %a, i1 %tobool14.not) { ; CHECK-NEXT: [[CONV17:%.*]] = sext i8 [[V_44]] to i32 ; CHECK-NEXT: [[REM:%.*]] = mul i32 [[U_4]], [[CONV17]] ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i32> , i32 [[REM]], i32 5 -; CHECK-NEXT: [[TMP34:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP33]], <4 x i32> [[TMP32]], i64 0) +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP32]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <2 x i32> [[TMP31]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = shufflevector <8 x i32> [[TMP33]], <8 x i32> [[TMP39]], <8 x i32> ; CHECK-NEXT: br label %[[IF_END18]] ; CHECK: [[IF_END18]]: ; CHECK-NEXT: [[L_4]] = phi i8 [ 0, %[[Q]] ], [ [[L_3_PH]], %[[O]] ] -; CHECK-NEXT: [[TMP35]] = phi <8 x i32> [ [[TMP34]], %[[Q]] ], [ [[TMP28]], %[[O]] ] +; CHECK-NEXT: [[TMP35]] = phi <8 x i32> [ [[TMP40]], %[[Q]] ], [ [[TMP28]], %[[O]] ] ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <8 x i32> [[TMP35]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP37]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP36]], <4 x i32> ; CHECK-NEXT: br i1 [[TOBOOL14_NOT]], label %[[N]], label %[[P]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll index 52e13de8118d7..61294089fd4cb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll @@ -16,9 +16,10 @@ define void @test(i32 %0, ptr %p) { ; CHECK: exit: ; CHECK-NEXT: [[TMP9:%.*]] = phi <8 x i32> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP6]], [[PH]] ] ; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ [[TMP5]], [[ENTRY]] ], [ zeroinitializer, [[PH]] ] -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP9]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i32> [[TMP10]], [[TMP7]] -; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP9]], <4 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[RDX_OP]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> [[TMP12]], <8 x i32> ; CHECK-NEXT: [[OP_RDX5:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP11]]) ; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX5]], [[OP_RDX]] ; CHECK-NEXT: store i32 [[OP_RDX2]], ptr [[P]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll index ef1149a108e29..20a42777cf8e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll @@ -7,7 +7,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i1 %arg) unnamed_addr #0 align 2 { ; CHECK-LABEL: @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 %arg, label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]] +; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_END50_I:%.*]], label [[IF_THEN22_I:%.*]] ; CHECK: if.then22.i: ; CHECK-NEXT: [[SUB_I:%.*]] = add nsw i32 undef, -1 ; CHECK-NEXT: [[CONV31_I:%.*]] = and i32 undef, [[SUB_I]] @@ -24,11 +24,14 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv(i ; CHECK-NEXT: [[TMP9:%.*]] = trunc i32 [[SHR_I_I]] to i8 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i8> [[TMP8]], i8 [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i8> -; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP10]], <8 x i8> [[TMP11]], i64 8) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i8> [[TMP11]], <8 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP10]], <16 x i8> [[TMP12]], <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8> -; CHECK-NEXT: [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2) +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <2 x i8> [[TMP15]], <2 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP20]], <16 x i8> [[TMP18]], <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1) ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr undef, align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll index e56131b4681e3..92a1e289044d7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll @@ -32,14 +32,12 @@ define <16 x half> @test(i32 %0, float %1, i32 %2) { ; CHECK-NEXT: [[TMP29:%.*]] = sitofp <16 x i32> [[TMP28]] to <16 x float> ; CHECK-NEXT: [[TMP30:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> [[TMP29]], <16 x float> zeroinitializer, <16 x float> zeroinitializer) ; CHECK-NEXT: [[TMP31:%.*]] = fadd <16 x float> [[TMP30]], zeroinitializer -; CHECK-NEXT: [[TMP32:%.*]] = call <12 x i1> @llvm.vector.insert.v12i1.v2i1(<12 x i1> poison, <2 x i1> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <12 x i1> [[TMP32]], <12 x i1> , <12 x i32> -; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <12 x i1> [[TMP33]], <12 x i1> poison, <16 x i32> -; CHECK-NEXT: [[TMP35:%.*]] = select <16 x i1> [[TMP34]], <16 x float> zeroinitializer, <16 x float> [[TMP31]] +; CHECK-NEXT: [[TMP35:%.*]] = select <16 x i1> zeroinitializer, <16 x float> zeroinitializer, <16 x float> [[TMP31]] ; CHECK-NEXT: [[TMP36:%.*]] = bitcast <16 x float> [[TMP35]] to <16 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = and <16 x i32> [[TMP36]], zeroinitializer ; CHECK-NEXT: [[TMP38:%.*]] = bitcast <16 x i32> [[TMP37]] to <16 x float> -; CHECK-NEXT: [[TMP39:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> , <2 x float> [[TMP6]], i64 14) +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> , <16 x float> [[TMP53]], <16 x i32> ; CHECK-NEXT: [[TMP40:%.*]] = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> zeroinitializer, <16 x float> [[TMP38]], <16 x float> [[TMP39]]) ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x float> [[TMP29]], i32 0 ; CHECK-NEXT: [[TMP42:%.*]] = fcmp olt float [[TMP41]], 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll index 976de7cc8c21f..f98ed81b087b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll @@ -4,30 +4,25 @@ define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> poison, <4 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP0]], <4 x i32> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP1]], <4 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP3:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP2]], <4 x i32> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP3]], <4 x i32> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP4]], <4 x i32> zeroinitializer, i64 20) -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP5]], <4 x i32> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP6]], <4 x i32> zeroinitializer, i64 28) -; CHECK-NEXT: [[TMP8:%.*]] = trunc <32 x i32> [[TMP7]] to <32 x i1> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP13:%.*]] = phi <32 x i1> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi <32 x i1> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NARROW:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: [[NARROW66:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: [[NARROW67:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer ; CHECK-NEXT: [[NARROW68:%.*]] = select <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> poison, <4 x i1> [[NARROW]], i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP10]], <4 x i1> [[NARROW66]], i64 4) -; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP11]], <4 x i1> [[NARROW67]], i64 8) -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP12]], <4 x i1> [[NARROW68]], i64 12) -; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP19]], <4 x i1> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP14]], <4 x i1> zeroinitializer, i64 20) -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP15]], <4 x i1> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i1> @llvm.vector.insert.v32i1.v4i1(<32 x i1> [[TMP16]], <4 x i1> zeroinitializer, i64 28) +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[NARROW]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[NARROW66]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i1> [[TMP1]], <32 x i1> [[TMP2]], <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i1> [[NARROW67]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i1> [[TMP3]], <32 x i1> [[TMP4]], <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i1> [[NARROW68]], <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i1> [[TMP5]], <32 x i1> [[TMP6]], <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> zeroinitializer, <4 x i1> poison, <32 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i1> [[TMP7]], <32 x i1> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i1> [[TMP9]], <32 x i1> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i1> [[TMP10]], <32 x i1> [[TMP8]], <32 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i1> [[TMP11]], <32 x i1> [[TMP8]], <32 x i32> ; CHECK-NEXT: [[TMP18]] = or <32 x i1> [[TMP13]], [[TMP17]] ; CHECK-NEXT: br label [[VECTOR_BODY]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll index 3aea112e9edfe..14bdcd062edf8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll @@ -4,9 +4,7 @@ define void @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP0]], <4 x i8> zeroinitializer, i64 4) -; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr null, align 1 +; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr null, align 1 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll index 069274df396d7..4990fe102564a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll @@ -8,10 +8,7 @@ define void @test(ptr %in) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[IN]], i64 64 ; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr [[TMP1]], i32 2, <32 x i1> , <32 x i16> poison) ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP4]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i16> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i16> zeroinitializer, [[TMP3]] ; CHECK-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP0]], align 2 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll index 3d0e6be661fd1..8f6a53c03ac68 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll @@ -4,31 +4,17 @@ define <4 x i16> @test() { ; CHECK-LABEL: define <4 x i16> @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP4]], <4 x i16> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP6]], <4 x i16> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP7]], <4 x i16> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i16> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i16> [[TMP8]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = add <16 x i16> [[TMP3]], [[TMP8]] -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP24]]) +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> zeroinitializer, <4 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i16> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP25]], i64 0 -; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP27]]) +; CHECK-NEXT: [[TMP28:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP28]], i64 1 -; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP30]]) +; CHECK-NEXT: [[TMP31:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP29]], i16 [[TMP31]], i64 2 -; CHECK-NEXT: [[TMP33:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <4 x i32> -; CHECK-NEXT: [[TMP34:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP33]]) +; CHECK-NEXT: [[TMP34:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP34]], i64 3 -; CHECK-NEXT: [[RDX_OP:%.*]] = or <16 x i16> [[TMP11]], [[TMP9]] +; CHECK-NEXT: [[RDX_OP:%.*]] = or <16 x i16> zeroinitializer, [[TMP1]] ; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i16> [[RDX_OP]], <16 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP37:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP36]]) ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> poison, i16 [[TMP37]], i64 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll b/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll index 3b9222b7d5ed1..9c0f65ec27165 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll @@ -35,13 +35,15 @@ define i32 @test(ptr %f, i1 %tobool.i.4, i32 %retval.0.i.219) { ; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> [[TMP15]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP21]], <4 x i32> [[TMP10]], i64 4) +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i32> [[TMP21]], <8 x i32> [[TMP24]], <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i32> [[TMP23]], <8 x i32> , <8 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = add <8 x i32> [[TMP18]], [[TMP22]] -; CHECK-NEXT: [[TMP20:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP19]], i64 0) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i32> [[TMP19]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i32> [[TMP20]], [[TMP16]] -; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP19]], <4 x i32> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i32> [[RDX_OP]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP19]], <8 x i32> [[TMP25]], <8 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP12]]) ; CHECK-NEXT: ret i32 [[TMP17]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll index 4cf2f99e60aeb..8dc8db9b444dc 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll @@ -89,8 +89,9 @@ define dso_local void @test_unordered_splits(ptr nocapture %p) local_unnamed_add ; CHECK-NEXT: [[G20:%.*]] = getelementptr inbounds [16 x i32], ptr [[P2]], i32 0, i64 12 ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[G10]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[G20]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP2]], <4 x i32> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void @@ -147,10 +148,13 @@ define dso_local void @test_cost_splits(ptr nocapture %p) local_unnamed_addr { ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[G12]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[G20]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[G22]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> poison, <2 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP4]], <2 x i32> [[TMP1]], i64 2) -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP5]], <2 x i32> [[TMP2]], i64 4) -; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> [[TMP6]], <2 x i32> [[TMP3]], i64 6) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP10]], <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <8 x i32> ; CHECK-NEXT: store <8 x i32> [[TMP7]], ptr [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll index 10e73b042f19b..f6bf138944749 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll @@ -18,20 +18,21 @@ define void @test(double %0) { ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <6 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = call <6 x double> @llvm.vector.insert.v6f64.v2f64(<6 x double> [[TMP11]], <2 x double> [[TMP10]], i64 4) +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x double> [[TMP10]], <2 x double> poison, <6 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <6 x double> [[TMP11]], <6 x double> [[TMP13]], <6 x i32> ; CHECK-NEXT: br i1 false, label %[[DOTLR_PH272_PREHEADER:.*]], [[DOT_CRIT_EDGE:label %.*]] ; CHECK: [[_LR_PH272_PREHEADER:.*:]] -; CHECK-NEXT: br i1 false, [[DOT_CRIT_EDGE]], label %[[BB13:.*]] -; CHECK: [[BB13]]: +; CHECK-NEXT: br i1 false, [[DOT_CRIT_EDGE]], label %[[BB14:.*]] +; CHECK: [[BB14]]: ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <6 x double> [[TMP12]], <6 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <6 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = call <6 x double> @llvm.vector.insert.v6f64.v2f64(<6 x double> [[TMP15]], <2 x double> splat (double 0x7FF8000000000000), i64 4) -; CHECK-NEXT: br i1 false, label %[[BB17:.*]], [[DOT_CRIT_EDGE]] -; CHECK: [[BB17]]: +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <6 x double> [[TMP15]], <6 x double> , <6 x i32> +; CHECK-NEXT: br i1 false, label %[[BB18:.*]], [[DOT_CRIT_EDGE]] +; CHECK: [[BB18]]: ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <6 x double> , double [[TMP0]], i32 3 ; CHECK-NEXT: br [[DOT_CRIT_EDGE]] ; CHECK: [[__CRIT_EDGE:.*:]] -; CHECK-NEXT: [[TMP19:%.*]] = phi <6 x double> [ [[TMP12]], %[[BB7]] ], [ [[TMP18]], %[[BB17]] ], [ [[TMP16]], %[[BB13]] ], [ [[TMP12]], %[[DOTLR_PH272_PREHEADER]] ] +; CHECK-NEXT: [[TMP20:%.*]] = phi <6 x double> [ [[TMP12]], %[[BB7]] ], [ [[TMP18]], %[[BB18]] ], [ [[TMP17]], %[[BB14]] ], [ [[TMP12]], %[[DOTLR_PH272_PREHEADER]] ] ; CHECK-NEXT: ret void ; .thread: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll index 9abb994db1e73..680f950fae975 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll @@ -15,7 +15,8 @@ define i1 @test(ptr %0, ptr %1, <2 x float> %2, <2 x float> %3, <2 x float> %4) ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <8 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[TMP9]], i32 7 ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x float> [[TMP13]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP16]], <8 x float> [[TMP15]], i64 8) +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> [[TMP23]], <16 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> [[TMP12]], <16 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x float> [[TMP19]], float [[TMP9]], i32 15 ; CHECK-NEXT: [[TMP20:%.*]] = fmul <16 x float> [[TMP18]], [[TMP17]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll index 5491e8ea7e0f8..cd3663e28eb75 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll @@ -28,10 +28,11 @@ define void @test(i32 %arg) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[LOAD3]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[LOAD2]], i32 3 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP5]], <4 x i32> [[TMP4]], i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> ; CHECK-NEXT: br label %[[BB12]] ; CHECK: [[BB12]]: -; CHECK-NEXT: [[TMP7:%.*]] = phi <8 x i32> [ [[TMP6]], %[[BB8]] ], [ poison, %[[BB6]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi <8 x i32> [ [[TMP7]], %[[BB8]] ], [ poison, %[[BB6]] ] ; CHECK-NEXT: ret void ; CHECK: [[BB21]]: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll index 5bfbd69330564..8e09847e9264e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll @@ -27,7 +27,8 @@ define void @test(i32 %0, i8 %1, i64 %2, float %3) { ; CHECK-NEXT: [[TMP25:%.*]] = sitofp <2 x i32> [[TMP24]] to <2 x float> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <2 x float> [[TMP25]], <2 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x float> [[TMP26]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP28:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP27]], <4 x float> [[TMP16]], i64 4) +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x float> [[TMP16]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <8 x float> [[TMP27]], <8 x float> [[TMP51]], <8 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = fdiv <8 x float> zeroinitializer, [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = call <8 x float> @llvm.fabs.v8f32(<8 x float> [[TMP29]]) ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <8 x float> [[TMP30]] to <8 x i32> @@ -50,19 +51,21 @@ define void @test(i32 %0, i8 %1, i64 %2, float %3) { ; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i64 0, i64 8388608 ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <8 x i1> [[TMP32]], i32 1 ; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i64 0, i64 32768 -; CHECK-NEXT: br label %[[BB52:.*]] -; CHECK: [[BB51:.*]]: -; CHECK-NEXT: unreachable -; CHECK: [[BB52]]: ; CHECK-NEXT: br label %[[BB53:.*]] +; CHECK: [[BB52:.*]]: +; CHECK-NEXT: unreachable ; CHECK: [[BB53]]: +; CHECK-NEXT: br label %[[BB54:.*]] +; CHECK: [[BB54]]: ; CHECK-NEXT: [[TMP54:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP17]]) ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 0, ptr null) ; CHECK-NEXT: [[TMP55:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[TMP21]]) ; CHECK-NEXT: [[TMP56:%.*]] = insertelement <8 x float> poison, float [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <8 x float> [[TMP56]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP57]], <2 x float> [[TMP55]], i64 0) -; CHECK-NEXT: [[TMP59:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP58]], <2 x float> [[TMP54]], i64 6) +; CHECK-NEXT: [[TMP87:%.*]] = shufflevector <2 x float> [[TMP55]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <8 x float> [[TMP57]], <8 x float> [[TMP87]], <8 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <2 x float> [[TMP54]], <2 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <8 x float> [[TMP88]], <8 x float> [[TMP89]], <8 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = bitcast <8 x float> [[TMP59]] to <8 x i32> ; CHECK-NEXT: [[TMP61:%.*]] = icmp ult <8 x i32> [[TMP60]], splat (i32 1325400064) ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <8 x i1> [[TMP61]], i32 5 @@ -94,7 +97,7 @@ define void @test(i32 %0, i8 %1, i64 %2, float %3) { ; CHECK-NEXT: [[TMP85:%.*]] = or i64 [[TMP84]], [[TMP48]] ; CHECK-NEXT: [[TMP86:%.*]] = or i64 [[TMP85]], [[TMP81]] ; CHECK-NEXT: store i64 [[TMP86]], ptr null, align 1 -; CHECK-NEXT: br label %[[BB51]] +; CHECK-NEXT: br label %[[BB52]] ; %5 = and i64 %2, 255 %6 = and i64 %2, -65536 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll b/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll index dd804663ff121..972a58cecc822 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll @@ -10,11 +10,13 @@ define void @test(ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX7_US_I_1261]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> , <4 x i32> [[TMP2]], i64 4) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> , <16 x i32> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX7_US_I_841]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <12 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <12 x i32> @llvm.vector.insert.v12i32.v4i32(<12 x i32> [[TMP6]], <4 x i32> [[TMP5]], i64 8) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <12 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <12 x i32> [[TMP6]], <12 x i32> [[TMP20]], <12 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i32> , <16 x i32> [[TMP9]], <16 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll index d07353798edc9..3bafc3c6552f2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll @@ -15,12 +15,14 @@ define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP9]], zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[V3]], i32 0 ; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP30]], <2 x i32> [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[TMP30]], <4 x i32> [[TMP31]], <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = uitofp <4 x i32> [[TMP10]] to <4 x float> ; CHECK-NEXT: [[TMP12:%.*]] = fdiv <4 x float> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> poison, i1 [[V4]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> [[TMP14]], <2 x i1> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <2 x i1> [[TMP6]], <2 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i1> [[TMP14]], <4 x i1> [[TMP32]], <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP15]], <4 x float> zeroinitializer, <4 x float> [[TMP12]] ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP16]], i32 3 ; CHECK-NEXT: [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll index 3eabed5882e58..6073a264b9b12 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll @@ -160,7 +160,8 @@ define void @tiny_tree_not_fully_vectorizable2(ptr noalias nocapture %dst, ptr n ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> [[TMP2]], i64 2) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP6]], <4 x i32> ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[DST_ADDR_022]], align 4 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds float, ptr [[SRC_ADDR_021]], i64 [[I_023]] ; CHECK-NEXT: [[ADD_PTR8]] = getelementptr inbounds float, ptr [[DST_ADDR_022]], i64 [[I_023]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll index 4b62ef688ca44..4c295355617e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll @@ -4,16 +4,7 @@ define i16 @test() { ; CHECK-LABEL: define i16 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i1> @llvm.vector.insert.v4i1.v2i1(<4 x i1> , <2 x i1> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> zeroinitializer, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i1> [[TMP3]] to <4 x i64> -; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i1> zeroinitializer, [[TMP1]] -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> zeroinitializer, <4 x i1> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i1> [[TMP7]] to <4 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> zeroinitializer) ; CHECK-NEXT: ret i16 [[TMP9]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll index a821362a883a1..fd3c1a57aff34 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll @@ -7,7 +7,8 @@ define void @vec3_vectorize_call(ptr %Colour, float %0) { ; NON-POW2-NEXT: entry: ; NON-POW2-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[COLOUR:%.*]], align 4 ; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 2 -; NON-POW2-NEXT: [[TMP4:%.*]] = call <3 x float> @llvm.vector.insert.v3f32.v2f32(<3 x float> [[TMP2]], <2 x float> [[TMP1]], i64 0) +; NON-POW2-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <3 x i32> +; NON-POW2-NEXT: [[TMP4:%.*]] = shufflevector <3 x float> [[TMP2]], <3 x float> [[TMP3]], <3 x i32> ; NON-POW2-NEXT: [[TMP5:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP4]], <3 x float> zeroinitializer, <3 x float> zeroinitializer) ; NON-POW2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR]], align 4 ; NON-POW2-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll index c30f94159916a..32e59697486a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll @@ -16,17 +16,19 @@ define i1 @test(float %0, double %1) { ; X86-NEXT: [[TMP11:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> poison, <4 x i32> ; X86-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> , <4 x i32> ; X86-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]] -; X86-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP13]], i64 0) -; X86-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> , <6 x double> [[TMP8]], i64 0) -; X86-NEXT: [[TMP16:%.*]] = fsub <8 x double> [[TMP14]], [[TMP15]] -; X86-NEXT: [[TMP17:%.*]] = fmul <8 x double> [[TMP14]], [[TMP15]] -; X86-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> [[TMP17]], <8 x i32> -; X86-NEXT: [[TMP19:%.*]] = fptrunc <8 x double> [[TMP18]] to <8 x float> -; X86-NEXT: [[TMP20:%.*]] = fmul <8 x float> [[TMP19]], zeroinitializer -; X86-NEXT: [[TMP21:%.*]] = fcmp oeq <8 x float> [[TMP20]], zeroinitializer -; X86-NEXT: [[TMP22:%.*]] = freeze <8 x i1> [[TMP21]] -; X86-NEXT: [[TMP23:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP22]]) -; X86-NEXT: ret i1 [[TMP23]] +; X86-NEXT: [[TMP14:%.*]] = shufflevector <4 x double> [[TMP13]], <4 x double> poison, <8 x i32> +; X86-NEXT: [[TMP15:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP14]], <8 x i32> +; X86-NEXT: [[TMP16:%.*]] = shufflevector <6 x double> [[TMP8]], <6 x double> poison, <8 x i32> +; X86-NEXT: [[TMP17:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP16]], <8 x i32> +; X86-NEXT: [[TMP18:%.*]] = fsub <8 x double> [[TMP15]], [[TMP17]] +; X86-NEXT: [[TMP19:%.*]] = fmul <8 x double> [[TMP15]], [[TMP17]] +; X86-NEXT: [[TMP20:%.*]] = shufflevector <8 x double> [[TMP18]], <8 x double> [[TMP19]], <8 x i32> +; X86-NEXT: [[TMP21:%.*]] = fptrunc <8 x double> [[TMP20]] to <8 x float> +; X86-NEXT: [[TMP22:%.*]] = fmul <8 x float> [[TMP21]], zeroinitializer +; X86-NEXT: [[TMP23:%.*]] = fcmp oeq <8 x float> [[TMP22]], zeroinitializer +; X86-NEXT: [[TMP24:%.*]] = freeze <8 x i1> [[TMP23]] +; X86-NEXT: [[TMP25:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP24]]) +; X86-NEXT: ret i1 [[TMP25]] ; ; AARCH64-LABEL: define i1 @test ; AARCH64-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { @@ -42,17 +44,19 @@ define i1 @test(float %0, double %1) { ; AARCH64-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> , <4 x i32> ; AARCH64-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP4]], <4 x i32> ; AARCH64-NEXT: [[TMP14:%.*]] = fmul <4 x double> [[TMP10]], [[TMP13]] -; AARCH64-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> , <4 x double> [[TMP14]], i64 0) -; AARCH64-NEXT: [[TMP16:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> , <6 x double> [[TMP8]], i64 0) -; AARCH64-NEXT: [[TMP17:%.*]] = fsub <8 x double> [[TMP15]], [[TMP16]] -; AARCH64-NEXT: [[TMP18:%.*]] = fmul <8 x double> [[TMP15]], [[TMP16]] -; AARCH64-NEXT: [[TMP19:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> -; AARCH64-NEXT: [[TMP20:%.*]] = fptrunc <8 x double> [[TMP19]] to <8 x float> -; AARCH64-NEXT: [[TMP21:%.*]] = fmul <8 x float> [[TMP20]], zeroinitializer -; AARCH64-NEXT: [[TMP22:%.*]] = fcmp oeq <8 x float> [[TMP21]], zeroinitializer -; AARCH64-NEXT: [[TMP23:%.*]] = freeze <8 x i1> [[TMP22]] -; AARCH64-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP23]]) -; AARCH64-NEXT: ret i1 [[TMP24]] +; AARCH64-NEXT: [[TMP15:%.*]] = shufflevector <4 x double> [[TMP14]], <4 x double> poison, <8 x i32> +; AARCH64-NEXT: [[TMP16:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP15]], <8 x i32> +; AARCH64-NEXT: [[TMP17:%.*]] = shufflevector <6 x double> [[TMP8]], <6 x double> poison, <8 x i32> +; AARCH64-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> , <8 x double> [[TMP17]], <8 x i32> +; AARCH64-NEXT: [[TMP19:%.*]] = fsub <8 x double> [[TMP16]], [[TMP18]] +; AARCH64-NEXT: [[TMP20:%.*]] = fmul <8 x double> [[TMP16]], [[TMP18]] +; AARCH64-NEXT: [[TMP21:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> [[TMP20]], <8 x i32> +; AARCH64-NEXT: [[TMP22:%.*]] = fptrunc <8 x double> [[TMP21]] to <8 x float> +; AARCH64-NEXT: [[TMP23:%.*]] = fmul <8 x float> [[TMP22]], zeroinitializer +; AARCH64-NEXT: [[TMP24:%.*]] = fcmp oeq <8 x float> [[TMP23]], zeroinitializer +; AARCH64-NEXT: [[TMP25:%.*]] = freeze <8 x i1> [[TMP24]] +; AARCH64-NEXT: [[TMP26:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP25]]) +; AARCH64-NEXT: ret i1 [[TMP26]] ; %3 = fpext float %0 to double %4 = fpext float 0.000000e+00 to double diff --git a/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll b/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll index a42c8f2c650ae..fff988a0a746e 100644 --- a/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll +++ b/llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll @@ -28,10 +28,14 @@ define i32 @test(i8 %0) { ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x i8> [[TMP18]], i8 [[TMP13]], i32 1 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <8 x i8> [[TMP17]], [[TMP19]] ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <64 x i1> , i1 [[CMP13_NOT_5]], i32 0 -; CHECK-NEXT: [[TMP22:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v8i1(<64 x i1> [[TMP21]], <8 x i1> [[TMP8]], i64 8) -; CHECK-NEXT: [[TMP23:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v8i1(<64 x i1> [[TMP22]], <8 x i1> [[TMP20]], i64 56) -; CHECK-NEXT: [[TMP24:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v4i1(<64 x i1> [[TMP23]], <4 x i1> [[TMP11]], i64 32) -; CHECK-NEXT: [[TMP25:%.*]] = call <64 x i1> @llvm.vector.insert.v64i1.v2i1(<64 x i1> [[TMP24]], <2 x i1> [[TMP3]], i64 6) +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i1> [[TMP8]], <8 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <64 x i1> [[TMP21]], <64 x i1> [[TMP22]], <64 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <64 x i1> [[TMP23]], <64 x i1> [[TMP24]], <64 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <64 x i1> [[TMP29]], <64 x i1> [[TMP30]], <64 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x i1> [[TMP3]], <2 x i1> poison, <64 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <64 x i1> [[TMP31]], <64 x i1> [[TMP28]], <64 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = select <64 x i1> [[TMP25]], <64 x i32> zeroinitializer, <64 x i32> zeroinitializer ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.or.v64i32(<64 x i32> [[TMP26]]) ; CHECK-NEXT: ret i32 [[TMP27]] diff --git a/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll b/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll index f8a6c4dab3d51..c0a0318efd19e 100644 --- a/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll +++ b/llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll @@ -24,9 +24,10 @@ define i32 @test(i32 %v, ptr %p) { ; CHECK-NEXT: [[OP_RDX2:%.*]] = or i64 [[OP_RDX1]], [[I9_I_I]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP2]] -; CHECK-NEXT: [[TMP14:%.*]] = call <4 x i1> @llvm.vector.extract.v4i1.v16i1(<16 x i1> [[TMP10]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP10]], <16 x i1> poison, <4 x i32> ; CHECK-NEXT: [[RDX_OP:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v4i1(<16 x i1> [[TMP10]], <4 x i1> [[RDX_OP]], i64 0) +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i1> [[RDX_OP]], <4 x i1> poison, <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i1> [[TMP10]], <16 x i1> [[TMP15]], <16 x i32> ; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP13]]) ; CHECK-NEXT: [[AND252_US_I_24_I_I:%.*]] = select i1 [[OP_RDX]], i32 0, i32 0 ; CHECK-NEXT: br label %[[INC]] diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll index 3ef0de177b478..304af88b6d134 100644 --- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll @@ -10,9 +10,10 @@ define i64 @test(ptr %p) { ; RISCV-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 ; RISCV-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 ; RISCV-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; RISCV-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; RISCV-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) +; RISCV-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[TMP2]], <8 x i32> +; RISCV-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <8 x i32> +; RISCV-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP7]], <8 x i32> ; RISCV-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42) ; RISCV-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) ; RISCV-NEXT: ret i64 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll index caca410f056c1..8e71f884b3bb4 100644 --- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll +++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll @@ -14,10 +14,10 @@ define void @func(i32 %0) { ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <32 x i32> , i32 [[TMP11]], i32 30 ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i32> [[TMP12]], <32 x i32> poison, <32 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP13]], <8 x i32> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP14]], <4 x i32> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP15]], <2 x i32> zeroinitializer, i64 14) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP16]], <2 x i32> zeroinitializer, i64 28) +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i32> [[TMP13]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <32 x i32> [[TMP15]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i32> [[TMP16]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <32 x i32> [[TMP14]], <32 x i32> , <32 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i32> [[TMP8]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = sext <32 x i32> [[TMP18]] to <32 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = icmp slt <32 x i64> [[TMP19]], zeroinitializer diff --git a/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll index 9dbaadeca1f41..1572b6ba3307d 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll @@ -42,7 +42,7 @@ define void @test_missing_lanes_1_3(ptr %ptr, i32 %val0, i32 %val1) { ; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 0 ; CHECK-NEXT: store <4 x i32> [[TMP0]], ptr [[GETELEMENTPTR0]], align 4 ; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr i32, ptr [[PTR]], i64 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[GETELEMENTPTR1]], align 4 ; CHECK-NEXT: [[GETELEMENTPTR3:%.*]] = getelementptr i32, ptr [[PTR]], i64 12 ; CHECK-NEXT: store <4 x i32> poison, ptr [[GETELEMENTPTR3]], align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll index 250c60a61fea1..5611fda2c0223 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll @@ -32,7 +32,8 @@ define i1 @logical_and_icmp_clamp(<4 x i32> %x) { ; X86-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42) ; X86-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17) ; X86-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <8 x i32> -; X86-NEXT: [[TMP4:%.*]] = call <8 x i1> @llvm.vector.insert.v8i1.v4i1(<8 x i1> [[TMP3]], <4 x i1> [[TMP1]], i64 4) +; X86-NEXT: [[TMP7:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <8 x i32> +; X86-NEXT: [[TMP4:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP7]], <8 x i32> ; X86-NEXT: [[TMP5:%.*]] = freeze <8 x i1> [[TMP4]] ; X86-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP5]]) ; X86-NEXT: ret i1 [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll index cec99c694391b..b738d25b39be1 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll @@ -7,9 +7,8 @@ define void @test1(ptr %in, ptr %out) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> @@ -20,9 +19,8 @@ define void @test1(ptr %in, ptr %out) { ; COMBINE-NEXT: entry: ; COMBINE-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; COMBINE-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP5:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; COMBINE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <16 x i32> ; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[TMP5]], <8 x i64> poison, <8 x i32> @@ -55,9 +53,8 @@ define void @test2(ptr %in, ptr %out) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> @@ -68,9 +65,8 @@ define void @test2(ptr %in, ptr %out) { ; COMBINE-NEXT: entry: ; COMBINE-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 1 ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i64, ptr [[OUT1:%.*]], i64 0 -; COMBINE-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <8 x i32> +; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP1:%.*]] = zext <8 x i32> [[TMP4]] to <8 x i64> ; COMBINE-NEXT: [[TMP6:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <16 x i32> ; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> poison, <8 x i32> @@ -101,18 +97,16 @@ entry: define void @test3(<16 x i32> %0, ptr %out) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP3:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v16i32(<64 x i32> poison, <16 x i32> [[TMP0:%.*]], i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <64 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <64 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 ; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret void ; ; COMBINE-LABEL: @test3( ; COMBINE-NEXT: entry: -; COMBINE-NEXT: [[TMP3:%.*]] = call <64 x i32> @llvm.vector.insert.v64i32.v16i32(<64 x i32> poison, <16 x i32> [[TMP0:%.*]], i64 0) -; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <64 x i32> -; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <64 x i32> [[TMP3]], <64 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP0:%.*]], <16 x i32> poison, <64 x i32> +; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 ; COMBINE-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; COMBINE-NEXT: ret void @@ -138,9 +132,8 @@ define void @test4(ptr %in, ptr %out) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4 ; CHECK-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; CHECK-NEXT: ret void @@ -149,9 +142,8 @@ define void @test4(ptr %in, ptr %out) { ; COMBINE-NEXT: entry: ; COMBINE-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[IN:%.*]], align 4 ; COMBINE-NEXT: [[OUT:%.*]] = getelementptr inbounds i32, ptr [[OUT1:%.*]], i64 0 -; COMBINE-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> [[TMP0]], i64 0) -; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <16 x i32> -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> +; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> +; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <16 x i32> ; COMBINE-NEXT: store <16 x i32> [[TMP1]], ptr [[OUT]], align 4 ; COMBINE-NEXT: ret void @@ -174,20 +166,14 @@ entry: define void @test5(ptr %out) { ; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 0 -; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP3]], align 4 +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP3]], align 4 ; CHECK-NEXT: ret void ; ; COMBINE-LABEL: @test5( ; COMBINE-NEXT: entry: -; COMBINE-NEXT: [[TMP0:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; COMBINE-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 8) -; COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <8 x i32> ; COMBINE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 0 -; COMBINE-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP3]], align 4 +; COMBINE-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP3]], align 4 ; COMBINE-NEXT: ret void ; entry: @@ -214,7 +200,8 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i16> [[TMP9]], <32 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = uitofp <32 x i16> [[TMP10]] to <32 x float> ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP14]], <4 x float> [[LOAD2]], i64 8) +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP14]], <16 x float> [[TMP19]], <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP16]], <16 x float> poison, <32 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = fmul <32 x float> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[GEP10:%.*]] = getelementptr inbounds i8, ptr [[IN1]], i64 32 @@ -222,18 +209,18 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; CHECK-NEXT: [[TMP17:%.*]] = load <8 x float>, ptr [[IN0]], align 16 ; CHECK-NEXT: store <32 x float> [[TMP4]], ptr [[IN2]], align 16 ; CHECK-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1 -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[LOAD5]], i64 0) -; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP19]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16> ; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = uitofp <16 x i16> [[TMP18]] to <16 x float> -; CHECK-NEXT: [[TMP20:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[LOAD2]], i64 0) -; CHECK-NEXT: [[TMP21:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP17]], i64 0) -; CHECK-NEXT: [[TMP22:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP20]], <4 x float> [[TMP21]], i64 4) -; CHECK-NEXT: [[TMP23:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP17]], i64 4) -; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP22]], <4 x float> [[TMP23]], i64 8) +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP21]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP17]], <8 x float> poison, <4 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP22]], <16 x float> [[TMP24]], <16 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = fmul <16 x float> [[TMP12]], [[TMP6]] ; CHECK-NEXT: store <16 x float> [[TMP13]], ptr [[GEP11]], align 16 @@ -252,7 +239,8 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; COMBINE-NEXT: [[TMP19:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <32 x i32> ; COMBINE-NEXT: [[TMP2:%.*]] = uitofp <32 x i16> [[TMP19]] to <32 x float> ; COMBINE-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> -; COMBINE-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP3]], <4 x float> [[LOAD2]], i64 8) +; COMBINE-NEXT: [[TMP13:%.*]] = shufflevector <4 x float> [[LOAD2]], <4 x float> poison, <16 x i32> +; COMBINE-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> [[TMP13]], <16 x i32> ; COMBINE-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <32 x i32> ; COMBINE-NEXT: [[TMP7:%.*]] = fmul <32 x float> [[TMP6]], [[TMP2]] ; COMBINE-NEXT: [[GEP10:%.*]] = getelementptr inbounds i8, ptr [[IN1]], i64 32 @@ -260,18 +248,18 @@ define void @test6(ptr %in0, ptr %in1, ptr %in2) { ; COMBINE-NEXT: [[TMP8:%.*]] = load <8 x float>, ptr [[IN0]], align 16 ; COMBINE-NEXT: store <32 x float> [[TMP7]], ptr [[IN2]], align 16 ; COMBINE-NEXT: [[LOAD5:%.*]] = load <16 x i8>, ptr [[GEP10]], align 1 -; COMBINE-NEXT: [[TMP13:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[LOAD5]], i64 0) -; COMBINE-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <32 x i32> -; COMBINE-NEXT: [[TMP24:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> +; COMBINE-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <32 x i32> +; COMBINE-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[LOAD5]], <16 x i8> poison, <16 x i32> ; COMBINE-NEXT: [[TMP25:%.*]] = zext <16 x i8> [[TMP24]] to <16 x i16> ; COMBINE-NEXT: [[TMP26:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <32 x i32> ; COMBINE-NEXT: [[TMP18:%.*]] = shufflevector <16 x i16> [[TMP25]], <16 x i16> poison, <16 x i32> ; COMBINE-NEXT: [[TMP9:%.*]] = uitofp <16 x i16> [[TMP18]] to <16 x float> -; COMBINE-NEXT: [[TMP20:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> [[LOAD2]], i64 0) -; COMBINE-NEXT: [[TMP21:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP8]], i64 0) -; COMBINE-NEXT: [[TMP22:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP20]], <4 x float> [[TMP21]], i64 4) -; COMBINE-NEXT: [[TMP23:%.*]] = call <4 x float> @llvm.vector.extract.v4f32.v8f32(<8 x float> [[TMP8]], i64 4) -; COMBINE-NEXT: [[TMP15:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP22]], <4 x float> [[TMP23]], i64 8) +; COMBINE-NEXT: [[TMP20:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <4 x i32> +; COMBINE-NEXT: [[TMP21:%.*]] = shufflevector <4 x float> [[TMP20]], <4 x float> poison, <16 x i32> +; COMBINE-NEXT: [[TMP22:%.*]] = shufflevector <16 x float> [[TMP13]], <16 x float> [[TMP21]], <16 x i32> +; COMBINE-NEXT: [[TMP23:%.*]] = shufflevector <8 x float> [[TMP8]], <8 x float> poison, <4 x i32> +; COMBINE-NEXT: [[TMP27:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> poison, <16 x i32> +; COMBINE-NEXT: [[TMP15:%.*]] = shufflevector <16 x float> [[TMP22]], <16 x float> [[TMP27]], <16 x i32> ; COMBINE-NEXT: [[TMP16:%.*]] = shufflevector <16 x float> [[TMP15]], <16 x float> poison, <16 x i32> ; COMBINE-NEXT: [[TMP17:%.*]] = fmul <16 x float> [[TMP16]], [[TMP9]] ; COMBINE-NEXT: store <16 x float> [[TMP17]], ptr [[GEP11]], align 16 @@ -365,40 +353,12 @@ entry: define i32 @test7() { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP2:%.*]] = fsub <16 x float> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[TMP1]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <32 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP7]], <4 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = fsub <16 x float> [[TMP9]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP11]], <16 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = fadd <16 x float> [[TMP9]], [[TMP12]] -; CHECK-NEXT: store <16 x float> [[TMP13]], ptr null, align 16 +; CHECK-NEXT: store <16 x float> zeroinitializer, ptr null, align 16 ; CHECK-NEXT: ret i32 0 ; ; COMBINE-LABEL: @test7( ; COMBINE-NEXT: entry: -; COMBINE-NEXT: [[TMP0:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0) -; COMBINE-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP0]], <8 x float> zeroinitializer, i64 8) -; COMBINE-NEXT: [[TMP2:%.*]] = fsub <16 x float> [[TMP1]], [[TMP1]] -; COMBINE-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[TMP1]], [[TMP1]] -; COMBINE-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <32 x i32> -; COMBINE-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP3]], <16 x i32> -; COMBINE-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0) -; COMBINE-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 4) -; COMBINE-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP7]], <4 x float> zeroinitializer, i64 8) -; COMBINE-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12) -; COMBINE-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP5]] -; COMBINE-NEXT: [[TMP11:%.*]] = fsub <16 x float> [[TMP9]], [[TMP5]] -; COMBINE-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP11]], <16 x i32> -; COMBINE-NEXT: [[TMP13:%.*]] = fadd <16 x float> [[TMP9]], [[TMP12]] -; COMBINE-NEXT: store <16 x float> [[TMP13]], ptr null, align 16 +; COMBINE-NEXT: store <16 x float> zeroinitializer, ptr null, align 16 ; COMBINE-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index afe92f89ac0d1..ac8b10a0087d0 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -93,19 +93,15 @@ define void @test4(ptr %in, ptr %out) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x float>, ptr [[IN:%.*]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> [[TMP4]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP3]], [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> [[TMP8]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = fmul <16 x float> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> , <16 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = fadd <16 x float> [[TMP9]], [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <16 x float> [[TMP10]], [[TMP5]] +; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <16 x float> [[TMP10]], zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i1, ptr [[OUT:%.*]], i64 8 -; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 8) +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i1> [[TMP5]], <16 x i1> poison, <8 x i32> ; CHECK-NEXT: store <8 x i1> [[TMP13]], ptr [[OUT]], align 1 -; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i1> @llvm.vector.extract.v8i1.v16i1(<16 x i1> [[TMP11]], i64 0) +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i1> [[TMP5]], <16 x i1> poison, <8 x i32> ; CHECK-NEXT: store <8 x i1> [[TMP14]], ptr [[TMP12]], align 1 ; CHECK-NEXT: ret void ; @@ -151,22 +147,14 @@ define <4 x i1> @test6(ptr %in1, ptr %in2) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[IN1:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[IN2:%.*]], align 2 -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP7]], <4 x i32> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP8]], <4 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> [[TMP9]], <4 x i32> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]] -; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> [[TMP1]], i64 0) -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i16> [[TMP15]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP17]], <4 x i16> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP18]], <4 x i16> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP20:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v4i16(<16 x i16> [[TMP19]], <4 x i16> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq <16 x i16> [[TMP16]], [[TMP20]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq <16 x i16> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ugt <32 x i32> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i1> [[TMP6]], <32 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = and <16 x i1> [[TMP11]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = icmp ugt <16 x i32> [[TMP6]], [[TMP10]] +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <32 x i1> [[TMP6]], <32 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = and <16 x i1> [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <16 x i1> [[TMP24]], <16 x i1> poison, <4 x i32> ; CHECK-NEXT: [[TMP26:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP25]]) @@ -217,10 +205,7 @@ entry: define void @test7() { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> [[TMP1]], <8 x i64> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP3:%.*]] = trunc <16 x i64> [[TMP2]] to <16 x i16> -; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr null, align 2 +; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr null, align 2 ; CHECK-NEXT: ret void ; %1 = getelementptr i8, ptr null, i64 16 @@ -234,18 +219,12 @@ define void @test7() { define void @test8() { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP0]], <2 x float> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP1]], <2 x float> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v2f32(<8 x float> [[TMP2]], <2 x float> zeroinitializer, i64 6) -; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP4]], <2 x float> zeroinitializer, i64 2) ; CHECK-NEXT: br i1 false, label [[FOR0:%.*]], label [[FOR_BODY:%.*]] ; CHECK: for0: -; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x float> [ [[TMP3]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = phi <8 x float> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP5]], [[ENTRY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ [[TMP7]], [[FOR_BODY]] ], [ zeroinitializer, [[ENTRY]] ] ; CHECK-NEXT: [[TMP8]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <8 x i32> ; CHECK-NEXT: br i1 false, label [[FOR0]], label [[FOR_BODY]] ; @@ -268,13 +247,9 @@ for.body: define void @test9() { ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> poison, <4 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.vector.insert.v8i16.v4i16(<8 x i16> [[TMP0]], <4 x i16> zeroinitializer, i64 4) ; CHECK-NEXT: br label [[FOR_BODY13:%.*]] ; CHECK: for.body13: -; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i16> [[TMP1]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i1> [[TMP2]] to <8 x i32> -; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr null, align 4 +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr null, align 4 ; CHECK-NEXT: br label [[FOR_BODY13]] ; entry: @@ -293,9 +268,8 @@ define void @test10() { ; CHECK-LABEL: @test10( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr null, align 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i8> [[TMP4]] to <16 x i16> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <32 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i16> [[TMP5]], <16 x i16> poison, <16 x i32> @@ -334,14 +308,13 @@ define void @test11(<2 x i64> %0, i64 %1, <2 x i64> %2) { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i64> , [[TMP2:%.*]] ; CHECK-NEXT: [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> poison, <2 x i16> [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i16> [[TMP5]], <2 x i16> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i16> -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.insert.v4i16.v2i16(<4 x i16> [[TMP6]], <2 x i16> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i16> [[TMP7]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i8> -; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> poison, <2 x i8> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <4 x i8> @llvm.vector.insert.v4i8.v2i8(<4 x i8> [[TMP10]], <2 x i8> zeroinitializer, i64 2) -; CHECK-NEXT: [[TMP12:%.*]] = urem <4 x i8> [[TMP9]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <4 x i8> [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[TMP11:%.*]] = urem <4 x i8> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[TMP11]], zeroinitializer ; CHECK-NEXT: ret void ; entry: @@ -365,21 +338,15 @@ define void @test12() { ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x float>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> [[TMP4]], i64 0) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP6]], <8 x float> [[TMP3]], i64 8) -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v16f32(<32 x float> [[TMP7]], <16 x float> [[TMP5]], i64 16) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x float> [[TMP6]], <32 x float> [[TMP7]], <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x float> [[TMP10]], <32 x float> [[TMP11]], <32 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = fpext <32 x float> [[TMP8]] to <32 x double> -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> poison, <8 x double> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP10]], <8 x double> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP12:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP11]], <8 x double> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP13:%.*]] = call <32 x double> @llvm.vector.insert.v32f64.v8f64(<32 x double> [[TMP12]], <8 x double> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> [[TMP13]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <32 x double> zeroinitializer, [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = fptrunc <32 x double> [[TMP14]] to <32 x float> -; CHECK-NEXT: [[TMP16:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> poison, <8 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP17:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP16]], <8 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP18:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP17]], <8 x float> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP19:%.*]] = call <32 x float> @llvm.vector.insert.v32f32.v8f32(<32 x float> [[TMP18]], <8 x float> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP20:%.*]] = fcmp ogt <32 x float> [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <32 x float> zeroinitializer, [[TMP15]] ; CHECK-NEXT: ret void ; entry: @@ -413,22 +380,17 @@ entry: define void @test13(<8 x i32> %0, ptr %out0, ptr %out1, ptr %out2) { ; CHECK-LABEL: @test13( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> [[TMP0:%.*]], i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <32 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> poison, <8 x i32> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP9]], <8 x i32> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP10]], <8 x i32> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP5]], <8 x i32> zeroinitializer, i64 24) -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i32> [[TMP1]], <32 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[TMP0:%.*]], <8 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <16 x i32> ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: [[TMP4:%.*]] = phi <16 x i32> [ [[TMP3]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP4]], i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[OR0:%.*]] = or <4 x i32> [[TMP6]], zeroinitializer ; CHECK-NEXT: store <4 x i32> [[OR0]], ptr [[OUT0:%.*]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i32> [[TMP3]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[OUT1:%.*]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v16i32(<16 x i32> [[TMP3]], i64 12) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i32> [[TMP3]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[OUT2:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -454,19 +416,14 @@ for.end.loopexit: define void @test14(<8 x i1> %0) { ; CHECK-LABEL: @test14( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.vector.insert.v16i1.v8i1(<16 x i1> poison, <8 x i1> [[TMP0:%.*]], i64 0) -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i1> [[TMP1]], <16 x i1> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0:%.*]], <8 x i1> poison, <16 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sext <16 x i1> [[TMP2]] to <16 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <32 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> poison, <8 x i16> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP9]], <8 x i16> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP10]], <8 x i16> zeroinitializer, i64 16) -; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.vector.insert.v32i16.v8i16(<32 x i16> [[TMP7]], <8 x i16> zeroinitializer, i64 24) ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: [[TMP6:%.*]] = phi <16 x i16> [ [[TMP5]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i16> @llvm.vector.extract.v4i16.v16i16(<16 x i16> [[TMP6]], i64 4) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> poison, <4 x i32> ; CHECK-NEXT: [[OR0:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer ; CHECK-NEXT: ret void ; @@ -496,15 +453,9 @@ define i32 @test15() { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, ptr [[TMP1]], align 16 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[TMP1]], align 16 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr null, align 16 -; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 0) -; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP4]], <4 x float> zeroinitializer, i64 4) -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP5]], <4 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP6]], <4 x float> zeroinitializer, i64 12) -; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> poison, <4 x float> zeroinitializer, i64 8) -; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v4f32(<16 x float> [[TMP8]], <4 x float> zeroinitializer, i64 12) ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> [[TMP9]], <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = fadd <16 x float> [[TMP7]], [[TMP11]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> , <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = fadd <16 x float> zeroinitializer, [[TMP5]] ; CHECK-NEXT: store <16 x float> [[TMP12]], ptr [[TMP0]], align 16 ; CHECK-NEXT: ret i32 0 ;