diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5b398d3b75f59..5d76ee5a01dce 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include @@ -3478,6 +3479,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { unsigned SubVecNumElts = SubVecTy->getNumElements(); unsigned IdxN = cast(Idx)->getZExtValue(); + if ((IdxN % SubVecNumElts != 0) || (IdxN + SubVecNumElts > DstNumElts)) + return II; + // An insert that entirely overwrites Vec with SubVec is a nop. if (VecNumElts == SubVecNumElts) return replaceInstUsesWith(CI, SubVec); @@ -3486,22 +3490,15 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // shufflevector requires the two input vectors to be the same width. // Elements beyond the bounds of SubVec within the widened vector are // undefined. - SmallVector WidenMask; - unsigned i; - for (i = 0; i != SubVecNumElts; ++i) - WidenMask.push_back(i); - for (; i != VecNumElts; ++i) - WidenMask.push_back(PoisonMaskElem); + SmallVector WidenMask(VecNumElts, PoisonMaskElem); + std::iota(WidenMask.begin(), WidenMask.begin() + SubVecNumElts, 0); Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); - SmallVector Mask; - for (unsigned i = 0; i != IdxN; ++i) - Mask.push_back(i); - for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i) - Mask.push_back(i); - for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i) - Mask.push_back(i); + SmallVector Mask(DstNumElts); + std::iota(Mask.begin(), Mask.end(), 0); + std::iota(Mask.begin() + IdxN, Mask.begin() + IdxN + SubVecNumElts, + DstNumElts); Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); return replaceInstUsesWith(CI, Shuffle); diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 19e82099e87f0..d1ecb8eebbcbb 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -112,6 +112,7 @@ class VectorCombine { bool foldExtractExtract(Instruction &I); bool foldInsExtFNeg(Instruction &I); bool foldInsExtBinop(Instruction &I); + bool foldVectorInsertToShuffle(Instruction &I); bool foldInsExtVectorToShuffle(Instruction &I); bool foldBitOpOfBitcasts(Instruction &I); bool foldBitcastShuffle(Instruction &I); @@ -804,6 +805,65 @@ bool VectorCombine::foldInsExtBinop(Instruction &I) { return true; } +/// Try to fold vector_insert intrinsics into shufflevector instructions. +bool VectorCombine::foldVectorInsertToShuffle(Instruction &I) { + auto *II = dyn_cast(&I); + // This optimization only applies to vector_insert intrinsics. + if (!II || II->getIntrinsicID() != Intrinsic::vector_insert) + return false; + + Value *Vec = II->getArgOperand(0); + Value *SubVec = II->getArgOperand(1); + Value *Idx = II->getArgOperand(2); + + // Caller guarantees DstTy is a fixed vector. + auto *DstTy = cast(II->getType()); + auto *VecTy = dyn_cast(Vec->getType()); + auto *SubVecTy = dyn_cast(SubVec->getType()); + + // Only canonicalize if Vec and SubVec are both fixed vectors. + if (!VecTy || !SubVecTy) + return false; + + unsigned DstNumElts = DstTy->getNumElements(); + unsigned VecNumElts = VecTy->getNumElements(); + unsigned SubVecNumElts = SubVecTy->getNumElements(); + auto *SubVecPtr = dyn_cast(Idx); + if (!SubVecPtr) + return false; + + unsigned IdxN = SubVecPtr->getZExtValue(); + + // Ensure insertion of SubVec doesn't exceed Dst bounds. + if ((IdxN % SubVecNumElts != 0) || (IdxN + SubVecNumElts > DstNumElts)) + return false; + + // An insert that entirely overwrites Vec with SubVec is a nop. + if (VecNumElts == SubVecNumElts) { + replaceValue(I, *SubVec); + return true; + } + + // Widen SubVec into a vector of the same width as Vec, since + // shufflevector requires the two input vectors to be the same width. + // Elements beyond the bounds of SubVec within the widened vector are + // undefined. + SmallVector WidenMask(VecNumElts, PoisonMaskElem); + std::iota(WidenMask.begin(), WidenMask.begin() + SubVecNumElts, 0); + + auto *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask); + Worklist.pushValue(WidenShuffle); + + SmallVector Mask(DstNumElts); + std::iota(Mask.begin(), Mask.end(), 0); + std::iota(Mask.begin() + IdxN, Mask.begin() + IdxN + SubVecNumElts, + DstNumElts); + + auto *InsertShuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask); + replaceValue(I, *InsertShuffle); + return true; +} + bool VectorCombine::foldBitOpOfBitcasts(Instruction &I) { // Match: bitop(bitcast(x), bitcast(y)) -> bitcast(bitop(x, y)) Value *LHSSrc, *RHSSrc; @@ -3639,6 +3699,9 @@ bool VectorCombine::run() { // dispatching to folding functions if there's no chance of matching. if (IsFixedVectorType) { switch (Opcode) { + case Instruction::Call: + MadeChange |= foldVectorInsertToShuffle(I); + break; case Instruction::InsertElement: MadeChange |= vectorizeLoadInsert(I); break; diff --git a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll index ab7a50e55db0f..ee8a97504dbe0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll @@ -108,6 +108,17 @@ define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) { ret <8 x i32> %1 } +; Tests insertion at middle index +define <8 x i32> @valid_insertion_i(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_i( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) + ret <8 x i32> %result +} + ; ============================================================================ ; ; Scalable cases ; ============================================================================ ; diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll index 5cb2c4530aa57..8e25c9c5547d6 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll @@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D, ; ; SSE4-LABEL: @buildvector_mul_subadd_ps256( ; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] -; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] -; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> -; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] ; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> -; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> -; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> -; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP5:%.*]] = fsub <8 x float> [[A]], [[B]] +; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> +; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> ; SSE4-NEXT: ret <8 x float> [[TMP6]] ; ; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256( ; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]] -; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]] -; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> -; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]] ; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> -; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> +; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]] +; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> ; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> ; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> ; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]] @@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float> ; ; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512( ; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]] -; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]] -; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]] ; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> -; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> -; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP5]], <16 x i32> -; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6]], <16 x float> poison, <16 x i32> +; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <16 x i32> +; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP6]], <16 x i32> ; AVX_FMA-NEXT: ret <16 x float> [[TMP7]] ; ; AVX512-LABEL: @buildvector_mul_subadd_ps512( @@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double> ; ; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512( ; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]] -; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]] -; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> -; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]] ; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <8 x i32> -; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP5]], <8 x i32> -; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> +; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]] +; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> +; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP6]], <8 x i32> ; AVX_FMA-NEXT: ret <8 x double> [[TMP7]] ; ; AVX512-LABEL: @buildvector_mul_subadd_pd512( diff --git a/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll new file mode 100644 index 0000000000000..af6fe52c07920 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/canonicalize-vector-insert.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S | FileCheck %s + +; llvm.vector.insert canonicalizes to shufflevector in the fixed case. In the +; scalable case, we lower to the INSERT_SUBVECTOR ISD node. + +declare <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 %idx) +declare <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 %idx) +declare <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 %idx) +declare <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 %idx) +declare @llvm.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 %idx) + +; ============================================================================ ; +; Trivial cases +; ============================================================================ ; + +; An insert that entirely overwrites an with another is a +; nop. +define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) { +; CHECK-LABEL: @trivial_nop( +; CHECK-NEXT: ret <8 x i32> [[SUBVEC:%.*]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +; ============================================================================ ; +; Valid canonicalizations +; ============================================================================ ; + +define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_a( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_b( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_c( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_d( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 6) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_e( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_f( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_g( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0) + ret <8 x i32> %1 +} + +define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_h( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 3) + ret <8 x i32> %1 +} + +; Tests insertion at middle index +define <8 x i32> @valid_insertion_i(<8 x i32> %vec, <2 x i32> %subvec) { +; CHECK-LABEL: @valid_insertion_i( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %result = call <8 x i32> @llvm.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2) + ret <8 x i32> %result +} + +; ============================================================================ ; +; Scalable cases +; ============================================================================ ; + +; Scalable insertions should not be canonicalized. This will be lowered to the +; INSERT_SUBVECTOR ISD node later. +define @scalable_insert( %vec, <4 x i32> %subvec) { +; CHECK-LABEL: @scalable_insert( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.vector.insert.nxv4i32.v4i32( [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]], i64 0) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = call @llvm.vector.insert.nxv4i32.v4i32( %vec, <4 x i32> %subvec, i64 0) + ret %1 +}