diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index afba099f51541..53da78ee599b7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6584,6 +6584,12 @@ static bool areTwoInsertFromSameBuildVector( return false; } +/// Checks if the specified instruction \p I is an alternate operation for +/// the given \p MainOp and \p AltOp instructions. +static bool isAlternateInstruction(Instruction *I, Instruction *MainOp, + Instruction *AltOp, + const TargetLibraryInfo &TLI); + std::optional BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom, bool IgnoreReorder) { @@ -6726,6 +6732,28 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom, "BinaryOperator and CastInst."); return TE.ReorderIndices; } + if (!TopToBottom && IgnoreReorder && TE.State == TreeEntry::Vectorize && + TE.isAltShuffle()) { + assert(TE.ReuseShuffleIndices.empty() && + "ReuseShuffleIndices should be " + "empty for alternate instructions."); + SmallVector Mask; + TE.buildAltOpShuffleMask( + [&](Instruction *I) { + assert(TE.getMatchingMainOpOrAltOp(I) && + "Unexpected main/alternate opcode"); + return isAlternateInstruction(I, TE.getMainOp(), TE.getAltOp(), *TLI); + }, + Mask); + const int VF = TE.getVectorFactor(); + OrdersType ResOrder(VF, VF); + for (unsigned I : seq(VF)) { + if (Mask[I] == PoisonMaskElem) + continue; + ResOrder[Mask[I] % VF] = I; + } + return std::move(ResOrder); + } if (!TE.ReorderIndices.empty()) return TE.ReorderIndices; if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) { @@ -7796,13 +7824,18 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { } // Reorder operands of the user node and set the ordering for the user // node itself. + auto IsNotProfitableAltCodeNode = [](const TreeEntry &TE) { + return TE.isAltShuffle() && + (!TE.ReuseShuffleIndices.empty() || TE.getVectorFactor() == 2 || + TE.ReorderIndices.empty()); + }; if (Data.first->State != TreeEntry::Vectorize || !isa( Data.first->getMainOp()) || - Data.first->isAltShuffle()) + IsNotProfitableAltCodeNode(*Data.first)) Data.first->reorderOperands(Mask); if (!isa(Data.first->getMainOp()) || - Data.first->isAltShuffle() || + IsNotProfitableAltCodeNode(*Data.first) || Data.first->State == TreeEntry::StridedVectorize || Data.first->State == TreeEntry::CompressVectorize) { reorderScalars(Data.first->Scalars, Mask); @@ -7810,7 +7843,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { /*BottomOrder=*/true); if (Data.first->ReuseShuffleIndices.empty() && !Data.first->ReorderIndices.empty() && - !Data.first->isAltShuffle()) { + !IsNotProfitableAltCodeNode(*Data.first)) { // Insert user node to the list to try to sink reordering deeper in // the graph. Queue.push(Data.first); @@ -8785,12 +8818,6 @@ static std::pair generateKeySubkey( static bool isMainInstruction(Instruction *I, Instruction *MainOp, Instruction *AltOp, const TargetLibraryInfo &TLI); -/// Checks if the specified instruction \p I is an alternate operation for -/// the given \p MainOp and \p AltOp instructions. -static bool isAlternateInstruction(Instruction *I, Instruction *MainOp, - Instruction *AltOp, - const TargetLibraryInfo &TLI); - bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, ArrayRef VL) const { Type *ScalarTy = S.getMainOp()->getType(); diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll index a2019836098e8..26573a3e613da 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll @@ -80,33 +80,29 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32 ; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP48:%.*]] = add nsw <16 x i32> [[TMP45]], [[TMP47]] ; CHECK-NEXT: [[TMP49:%.*]] = sub nsw <16 x i32> [[TMP44]], [[TMP46]] -; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> -; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> -; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> -; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> -; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP53]] -; CHECK-NEXT: [[TMP55:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP52]] -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> -; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> -; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> -; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> [[TMP55]], <16 x i32> -; CHECK-NEXT: [[TMP60:%.*]] = sub nsw <16 x i32> [[TMP57]], [[TMP59]] -; CHECK-NEXT: [[TMP61:%.*]] = add nsw <16 x i32> [[TMP56]], [[TMP58]] -; CHECK-NEXT: [[TMP62:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> -; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> -; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> -; CHECK-NEXT: [[TMP65:%.*]] = shufflevector <16 x i32> [[TMP60]], <16 x i32> [[TMP61]], <16 x i32> -; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]] -; CHECK-NEXT: [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]] -; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> -; CHECK-NEXT: [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15) -; CHECK-NEXT: [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537) -; CHECK-NEXT: [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535) -; CHECK-NEXT: [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]] -; CHECK-NEXT: [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]] -; CHECK-NEXT: [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]]) -; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP74]], 65535 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP74]], 16 +; CHECK-NEXT: [[TMP50:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP48]], <16 x i32> [[TMP49]], <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = add nsw <16 x i32> [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = sub nsw <16 x i32> [[TMP50]], [[TMP51]] +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP52]], <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP54]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = sub nsw <16 x i32> [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP54]], [[TMP55]] +; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP56]], <16 x i32> [[TMP57]], <16 x i32> +; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub nsw <16 x i32> [[TMP58]], [[TMP60]] +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP63]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = lshr <16 x i32> [[TMP64]], splat (i32 15) +; CHECK-NEXT: [[TMP66:%.*]] = and <16 x i32> [[TMP65]], splat (i32 65537) +; CHECK-NEXT: [[TMP67:%.*]] = mul nuw <16 x i32> [[TMP66]], splat (i32 65535) +; CHECK-NEXT: [[TMP68:%.*]] = add <16 x i32> [[TMP67]], [[TMP64]] +; CHECK-NEXT: [[TMP69:%.*]] = xor <16 x i32> [[TMP68]], [[TMP67]] +; CHECK-NEXT: [[TMP70:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP69]]) +; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP70]], 65535 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP70]], 16 ; CHECK-NEXT: [[RDD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[RDD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll index 5ad676537f9c4..f138065101c4b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1259,19 +1259,19 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1, ; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]] ; CHECK-NEXT: [[TMP46:%.*]] = sub nsw <16 x i32> [[TMP43]], [[TMP44]] -; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP46]], <16 x i32> -; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i32> [[TMP46]], <16 x i32> [[TMP45]], <16 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = shufflevector <16 x i32> [[TMP47]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP49:%.*]] = add nsw <16 x i32> [[TMP47]], [[TMP48]] ; CHECK-NEXT: [[TMP50:%.*]] = sub nsw <16 x i32> [[TMP47]], [[TMP48]] -; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP49]], <16 x i32> [[TMP50]], <16 x i32> -; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <16 x i32> [[TMP50]], <16 x i32> [[TMP49]], <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP53:%.*]] = sub nsw <16 x i32> [[TMP51]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = add nsw <16 x i32> [[TMP51]], [[TMP52]] -; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> -; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i32> [[TMP53]], <16 x i32> [[TMP54]], <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = shufflevector <16 x i32> [[TMP55]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]] -; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> +; CHECK-NEXT: [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP58]], <16 x i32> [[TMP57]], <16 x i32> ; CHECK-NEXT: [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], splat (i32 15) ; CHECK-NEXT: [[TMP61:%.*]] = and <16 x i32> [[TMP60]], splat (i32 65537) ; CHECK-NEXT: [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], splat (i32 65535) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll index ab6c7443f80e8..c84c333391350 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -62,10 +62,11 @@ define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @build_vec_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP6]] ; %v0.0 = extractelement <4 x i32> %v0, i32 0 %v0.1 = extractelement <4 x i32> %v0, i32 1 @@ -196,8 +197,8 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @reduction_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537) diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll index 3063d85e122d8..e4fcb1ed08be9 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -62,10 +62,11 @@ define <4 x i32> @build_vec_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @build_vec_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP6]] ; %v0.0 = extractelement <4 x i32> %v0, i32 0 %v0.1 = extractelement <4 x i32> %v0, i32 1 @@ -196,8 +197,8 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @reduction_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15) ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll index 0f56862446a9d..15425c38bbb04 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll @@ -34,11 +34,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP14]], [[TMP13]] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP17]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> +; CHECK-NEXT: [[TMP88:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; CHECK-NEXT: [[TMP23:%.*]] = zext <4 x i8> [[TMP22]] to <4 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 @@ -54,11 +54,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP35:%.*]] = add <4 x i32> [[TMP34]], [[TMP33]] ; CHECK-NEXT: [[TMP36:%.*]] = sub <4 x i32> [[TMP34]], [[TMP33]] -; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> ; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP39:%.*]] = add <4 x i32> [[TMP37]], [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = sub <4 x i32> [[TMP37]], [[TMP38]] -; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> +; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> ; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 ; CHECK-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32> ; CHECK-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 @@ -74,11 +74,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP53]] ; CHECK-NEXT: [[TMP56:%.*]] = sub <4 x i32> [[TMP54]], [[TMP53]] -; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> ; CHECK-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP59:%.*]] = add <4 x i32> [[TMP57]], [[TMP58]] ; CHECK-NEXT: [[TMP60:%.*]] = sub <4 x i32> [[TMP57]], [[TMP58]] -; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> ; CHECK-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: [[TMP63:%.*]] = load <4 x i8>, ptr null, align 1 ; CHECK-NEXT: [[TMP64:%.*]] = zext <4 x i8> [[TMP63]] to <4 x i32> @@ -104,10 +104,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP82]], [[TMP83]] ; CHECK-NEXT: [[TMP85:%.*]] = sub <4 x i32> [[TMP82]], [[TMP83]] ; CHECK-NEXT: [[TMP86:%.*]] = shufflevector <4 x i32> [[TMP84]], <4 x i32> [[TMP85]], <4 x i32> +; CHECK-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP89]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP41]], [[TMP21]] -; CHECK-NEXT: [[TMP88:%.*]] = sub <4 x i32> [[TMP21]], [[TMP41]] -; CHECK-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP90:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP89]], <4 x i32> [[TMP87]], i64 4) +; CHECK-NEXT: [[TMP106:%.*]] = sub <4 x i32> [[TMP88]], [[TMP89]] +; CHECK-NEXT: [[TMP90:%.*]] = shufflevector <4 x i32> [[TMP106]], <4 x i32> [[TMP87]], <8 x i32> ; CHECK-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]] ; CHECK-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]] ; CHECK-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> @@ -115,7 +116,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]] ; CHECK-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]] ; CHECK-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> -; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> +; CHECK-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> ; CHECK-NEXT: [[TMP99:%.*]] = shufflevector <4 x i32> [[TMP43]], <4 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP100:%.*]] = shufflevector <16 x i32> [[TMP98]], <16 x i32> [[TMP99]], <16 x i32> ; CHECK-NEXT: [[TMP101:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <16 x i32> @@ -123,9 +124,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; CHECK-NEXT: [[TMP103:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP104:%.*]] = shufflevector <16 x i32> [[TMP102]], <16 x i32> [[TMP103]], <16 x i32> ; CHECK-NEXT: [[TMP105:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP106:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> +; CHECK-NEXT: [[TMP116:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> ; CHECK-NEXT: [[TMP107:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP106]], <16 x i32> [[TMP107]], <16 x i32> +; CHECK-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP116]], <16 x i32> [[TMP107]], <16 x i32> ; CHECK-NEXT: [[TMP109:%.*]] = lshr <16 x i32> [[TMP108]], splat (i32 15) ; CHECK-NEXT: [[TMP110:%.*]] = and <16 x i32> [[TMP109]], splat (i32 65537) ; CHECK-NEXT: [[TMP111:%.*]] = mul <16 x i32> [[TMP110]], splat (i32 65535) @@ -165,11 +166,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP15:%.*]] = add <4 x i32> [[TMP14]], [[TMP13]] ; THR15-NEXT: [[TMP16:%.*]] = sub <4 x i32> [[TMP14]], [[TMP13]] -; THR15-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> +; THR15-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP15]], <4 x i32> [[TMP16]], <4 x i32> ; THR15-NEXT: [[TMP18:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP19:%.*]] = add <4 x i32> [[TMP17]], [[TMP18]] ; THR15-NEXT: [[TMP20:%.*]] = sub <4 x i32> [[TMP17]], [[TMP18]] -; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> +; THR15-NEXT: [[TMP88:%.*]] = shufflevector <4 x i32> [[TMP19]], <4 x i32> [[TMP20]], <4 x i32> ; THR15-NEXT: [[TMP22:%.*]] = load <4 x i8>, ptr [[ADD_PTR3]], align 1 ; THR15-NEXT: [[TMP23:%.*]] = zext <4 x i8> [[TMP22]] to <4 x i32> ; THR15-NEXT: [[TMP24:%.*]] = load <4 x i8>, ptr [[ADD_PTR644]], align 1 @@ -185,11 +186,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP35:%.*]] = add <4 x i32> [[TMP34]], [[TMP33]] ; THR15-NEXT: [[TMP36:%.*]] = sub <4 x i32> [[TMP34]], [[TMP33]] -; THR15-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> +; THR15-NEXT: [[TMP37:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> [[TMP36]], <4 x i32> ; THR15-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP39:%.*]] = add <4 x i32> [[TMP37]], [[TMP38]] ; THR15-NEXT: [[TMP40:%.*]] = sub <4 x i32> [[TMP37]], [[TMP38]] -; THR15-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> +; THR15-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP39]], <4 x i32> [[TMP40]], <4 x i32> ; THR15-NEXT: [[TMP42:%.*]] = load <4 x i8>, ptr [[ADD_PTR_1]], align 1 ; THR15-NEXT: [[TMP43:%.*]] = zext <4 x i8> [[TMP42]] to <4 x i32> ; THR15-NEXT: [[TMP44:%.*]] = load <4 x i8>, ptr [[ADD_PTR64_1]], align 1 @@ -205,11 +206,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP54:%.*]] = shufflevector <4 x i32> [[TMP53]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP55:%.*]] = add <4 x i32> [[TMP54]], [[TMP53]] ; THR15-NEXT: [[TMP56:%.*]] = sub <4 x i32> [[TMP54]], [[TMP53]] -; THR15-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> +; THR15-NEXT: [[TMP57:%.*]] = shufflevector <4 x i32> [[TMP55]], <4 x i32> [[TMP56]], <4 x i32> ; THR15-NEXT: [[TMP58:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP59:%.*]] = add <4 x i32> [[TMP57]], [[TMP58]] ; THR15-NEXT: [[TMP60:%.*]] = sub <4 x i32> [[TMP57]], [[TMP58]] -; THR15-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> +; THR15-NEXT: [[TMP61:%.*]] = shufflevector <4 x i32> [[TMP59]], <4 x i32> [[TMP60]], <4 x i32> ; THR15-NEXT: [[TMP62:%.*]] = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr align 1 null, i64 4, <2 x i1> splat (i1 true), i32 2) ; THR15-NEXT: [[TMP63:%.*]] = load <4 x i8>, ptr null, align 1 ; THR15-NEXT: [[TMP64:%.*]] = zext <4 x i8> [[TMP63]] to <4 x i32> @@ -235,10 +236,11 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP84:%.*]] = add <4 x i32> [[TMP82]], [[TMP83]] ; THR15-NEXT: [[TMP85:%.*]] = sub <4 x i32> [[TMP82]], [[TMP83]] ; THR15-NEXT: [[TMP86:%.*]] = shufflevector <4 x i32> [[TMP84]], <4 x i32> [[TMP85]], <4 x i32> +; THR15-NEXT: [[TMP41:%.*]] = shufflevector <4 x i32> [[TMP89]], <4 x i32> poison, <4 x i32> +; THR15-NEXT: [[TMP21:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <4 x i32> ; THR15-NEXT: [[TMP87:%.*]] = add <4 x i32> [[TMP41]], [[TMP21]] -; THR15-NEXT: [[TMP88:%.*]] = sub <4 x i32> [[TMP21]], [[TMP41]] -; THR15-NEXT: [[TMP89:%.*]] = shufflevector <4 x i32> [[TMP88]], <4 x i32> poison, <8 x i32> -; THR15-NEXT: [[TMP90:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP89]], <4 x i32> [[TMP87]], i64 4) +; THR15-NEXT: [[TMP106:%.*]] = sub <4 x i32> [[TMP88]], [[TMP89]] +; THR15-NEXT: [[TMP90:%.*]] = shufflevector <4 x i32> [[TMP106]], <4 x i32> [[TMP87]], <8 x i32> ; THR15-NEXT: [[TMP91:%.*]] = add <4 x i32> [[TMP86]], [[TMP61]] ; THR15-NEXT: [[TMP92:%.*]] = sub <4 x i32> [[TMP61]], [[TMP86]] ; THR15-NEXT: [[TMP93:%.*]] = shufflevector <4 x i32> [[TMP92]], <4 x i32> poison, <8 x i32> @@ -246,7 +248,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP95:%.*]] = add <8 x i32> [[TMP94]], [[TMP90]] ; THR15-NEXT: [[TMP96:%.*]] = sub <8 x i32> [[TMP90]], [[TMP94]] ; THR15-NEXT: [[TMP97:%.*]] = shufflevector <8 x i32> [[TMP95]], <8 x i32> [[TMP96]], <16 x i32> -; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> +; THR15-NEXT: [[TMP98:%.*]] = shufflevector <4 x i32> [[TMP57]], <4 x i32> [[TMP64]], <16 x i32> ; THR15-NEXT: [[TMP99:%.*]] = shufflevector <4 x i32> [[TMP43]], <4 x i32> poison, <16 x i32> ; THR15-NEXT: [[TMP100:%.*]] = shufflevector <16 x i32> [[TMP98]], <16 x i32> [[TMP99]], <16 x i32> ; THR15-NEXT: [[TMP101:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> poison, <16 x i32> @@ -254,9 +256,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt ; THR15-NEXT: [[TMP103:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <16 x i32> ; THR15-NEXT: [[TMP104:%.*]] = shufflevector <16 x i32> [[TMP102]], <16 x i32> [[TMP103]], <16 x i32> ; THR15-NEXT: [[TMP105:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP106:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> +; THR15-NEXT: [[TMP115:%.*]] = shufflevector <16 x i32> [[TMP104]], <16 x i32> [[TMP105]], <16 x i32> ; THR15-NEXT: [[TMP107:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> poison, <16 x i32> -; THR15-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP106]], <16 x i32> [[TMP107]], <16 x i32> +; THR15-NEXT: [[TMP108:%.*]] = shufflevector <16 x i32> [[TMP115]], <16 x i32> [[TMP107]], <16 x i32> ; THR15-NEXT: [[TMP109:%.*]] = lshr <16 x i32> [[TMP108]], splat (i32 15) ; THR15-NEXT: [[TMP110:%.*]] = and <16 x i32> [[TMP109]], splat (i32 65537) ; THR15-NEXT: [[TMP111:%.*]] = mul <16 x i32> [[TMP110]], splat (i32 65535) diff --git a/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll index 631adf1fa81d5..f0f8377d637f9 100644 --- a/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/vectorize-reorder-alt-shuffle.ll @@ -11,11 +11,12 @@ define void @foo(ptr %c, ptr %d) { ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> undef, [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <4 x i32> [[TMP6]] to <4 x float> ; CHECK-NEXT: [[TMP8:%.*]] = fdiv <4 x float> [[TMP7]], undef -; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[ADD_PTR53]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <4 x i32> +; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[ADD_PTR53]], align 4 ; CHECK-NEXT: ret void ; entry: