-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[SLP]Remove emission of vector_insert/vector_extract intrinsics #148007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Remove emission of vector_insert/vector_extract intrinsics #148007
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-backend-systemz Author: Alexey Bataev (alexey-bataev) ChangesReplaced by the regular shuffles. Patch is 389.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148007.diff 94 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d3761ff43f437..18781be3b4aae 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5815,26 +5815,30 @@ static InstructionCost getExtractWithExtendCost(
static Value *createInsertVector(
IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index,
function_ref<Value *(Value *, Value *, ArrayRef<int>)> Generator = {}) {
+ if (isa<PoisonValue>(Vec) && isa<PoisonValue>(V))
+ return Vec;
const unsigned SubVecVF = getNumElements(V->getType());
- if (Index % SubVecVF == 0) {
- Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, Index);
+ // Create shuffle, insertvector requires that index is multiple of
+ // the subvector length.
+ const unsigned VecVF = getNumElements(Vec->getType());
+ SmallVector<int> Mask(VecVF, PoisonMaskElem);
+ if (isa<PoisonValue>(Vec)) {
+ auto *Begin = std::next(Mask.begin(), Index);
+ std::iota(Begin, std::next(Begin, getNumElements(V->getType())), 0);
+ Vec = Builder.CreateShuffleVector(V, Mask);
+ return Vec;
+ }
+ std::iota(Mask.begin(), Mask.end(), 0);
+ for (unsigned I : seq<unsigned>(SubVecVF))
+ Mask[I + Index] = I + VecVF;
+ if (Generator) {
+ Vec = Generator(Vec, V, Mask);
} else {
- // Create shuffle, insertvector requires that index is multiple of
- // the subvector length.
- const unsigned VecVF = getNumElements(Vec->getType());
- SmallVector<int> Mask(VecVF, PoisonMaskElem);
- std::iota(Mask.begin(), Mask.end(), 0);
- for (unsigned I : seq<unsigned>(SubVecVF))
- Mask[I + Index] = I + VecVF;
- if (Generator) {
- Vec = Generator(Vec, V, Mask);
- } else {
- // 1. Resize V to the size of Vec.
- SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
- std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
- V = Builder.CreateShuffleVector(V, ResizeMask);
- Vec = Builder.CreateShuffleVector(Vec, V, Mask);
- }
+ // 1. Resize V to the size of Vec.
+ SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
+ std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
+ V = Builder.CreateShuffleVector(V, ResizeMask);
+ Vec = Builder.CreateShuffleVector(Vec, V, Mask);
}
return Vec;
}
@@ -5844,11 +5848,6 @@ static Value *createInsertVector(
/// using default shuffle.
static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec,
unsigned SubVecVF, unsigned Index) {
- if (Index % SubVecVF == 0) {
- VectorType *SubVecTy =
- getWidenedType(Vec->getType()->getScalarType(), SubVecVF);
- return Builder.CreateExtractVector(SubVecTy, Vec, Index);
- }
// Create shuffle, extract_subvector requires that index is multiple of
// the subvector length.
SmallVector<int> Mask(SubVecVF, PoisonMaskElem);
@@ -16275,8 +16274,8 @@ Value *BoUpSLP::gather(
assert(SLPReVec && "FixedVectorType is not expected.");
Vec =
createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy));
- auto *II = dyn_cast<IntrinsicInst>(Vec);
- if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
+ auto *II = dyn_cast<Instruction>(Vec);
+ if (!II)
return Vec;
InsElt = II;
} else {
@@ -16296,6 +16295,27 @@ Value *BoUpSLP::gather(
if (auto *SI = dyn_cast<Instruction>(Scalar))
UserOp = SI;
} else {
+ if (V->getType()->isVectorTy()) {
+ if (auto *SV = dyn_cast<ShuffleVectorInst>(InsElt);
+ SV && SV->getOperand(0) != V && SV->getOperand(1) != V) {
+ // Find shufflevector, caused by resize.
+ auto FindOperand = [&](Value *Vec, Value *V) -> Instruction * {
+ if (auto *SV = dyn_cast<ShuffleVectorInst>(Vec)) {
+ if (SV->getOperand(0) == V)
+ return SV;
+ if (SV->getOperand(1) == V)
+ return SV;
+ }
+ return nullptr;
+ };
+ if (Instruction *User = FindOperand(SV->getOperand(0), V))
+ InsElt = User;
+ else if (Instruction *User = FindOperand(SV->getOperand(1), V))
+ InsElt = User;
+ assert(InsElt &&
+ "Failed to find shufflevector, caused by resize.");
+ }
+ }
UserOp = InsElt;
}
if (UserOp) {
@@ -16864,10 +16884,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
V, SimplifyQuery(*R.DL));
}));
unsigned InsertionIndex = Idx * getNumElements(ScalarTy);
+ Type *OrigScalarTy = ScalarTy;
+ ScalarTy = ScalarTy->getScalarType();
Vec = createInsertVector(
Builder, Vec, V, InsertionIndex,
std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2,
_3));
+ ScalarTy = OrigScalarTy;
if (!CommonMask.empty()) {
std::iota(std::next(CommonMask.begin(), Idx),
std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
index 5cb2c4530aa57..8e25c9c5547d6 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
@@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
;
; SSE4-LABEL: @buildvector_mul_subadd_ps256(
; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
-; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
-; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; SSE4-NEXT: [[TMP5:%.*]] = fsub <8 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
; SSE4-NEXT: ret <8 x float> [[TMP6]]
;
; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256(
; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
-; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
-; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]]
+; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]]
@@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
;
; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512(
; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
-; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]]
-; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]]
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6]], <16 x float> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
; AVX_FMA-NEXT: ret <16 x float> [[TMP7]]
;
; AVX512-LABEL: @buildvector_mul_subadd_ps512(
@@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
;
; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512(
; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
-; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]]
-; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]]
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP6]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
; AVX_FMA-NEXT: ret <8 x double> [[TMP7]]
;
; AVX512-LABEL: @buildvector_mul_subadd_pd512(
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
index 9f9e9d84108e6..9c615bb4757fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
@@ -12,9 +12,10 @@ define void @foo(ptr %0) {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> poison, <4 x ptr> [[TMP3]], i64 0)
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> [[TMP11]], <4 x ptr> [[TMP5]], i64 4)
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP7]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP12]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x ptr> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
index 9327fe8995d45..8d44d03e0e5cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
@@ -11,7 +11,7 @@ define i32 @test(ptr %c) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v6i64(<8 x i64> poison, <6 x i64> [[TMP2]], i64 0)
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 4, i32 5>
; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8>
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
index 89e133bb1c6a1..021edc4e27e42 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -164,7 +164,8 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T10]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T12]], i32 3
-; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP6]], i64 0)
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP14]], [[SUM_032]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 6c5220d13b7a2..bb05440910130 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -420,27 +420,26 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
-; TODO: Dead code must be removed below.
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
-; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
-; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
-; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1
-; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1
+; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4
-; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1
-; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1
+; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4
-; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1
-; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1
+; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP3]] to i32
; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 1
-; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
-; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
+; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP4]] to i32
; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 1
-; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1
-; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1
+; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT...
[truncated]
|
@llvm/pr-subscribers-vectorizers Author: Alexey Bataev (alexey-bataev) ChangesReplaced by the regular shuffles. Patch is 389.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148007.diff 94 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d3761ff43f437..18781be3b4aae 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5815,26 +5815,30 @@ static InstructionCost getExtractWithExtendCost(
static Value *createInsertVector(
IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index,
function_ref<Value *(Value *, Value *, ArrayRef<int>)> Generator = {}) {
+ if (isa<PoisonValue>(Vec) && isa<PoisonValue>(V))
+ return Vec;
const unsigned SubVecVF = getNumElements(V->getType());
- if (Index % SubVecVF == 0) {
- Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, Index);
+ // Create shuffle, insertvector requires that index is multiple of
+ // the subvector length.
+ const unsigned VecVF = getNumElements(Vec->getType());
+ SmallVector<int> Mask(VecVF, PoisonMaskElem);
+ if (isa<PoisonValue>(Vec)) {
+ auto *Begin = std::next(Mask.begin(), Index);
+ std::iota(Begin, std::next(Begin, getNumElements(V->getType())), 0);
+ Vec = Builder.CreateShuffleVector(V, Mask);
+ return Vec;
+ }
+ std::iota(Mask.begin(), Mask.end(), 0);
+ for (unsigned I : seq<unsigned>(SubVecVF))
+ Mask[I + Index] = I + VecVF;
+ if (Generator) {
+ Vec = Generator(Vec, V, Mask);
} else {
- // Create shuffle, insertvector requires that index is multiple of
- // the subvector length.
- const unsigned VecVF = getNumElements(Vec->getType());
- SmallVector<int> Mask(VecVF, PoisonMaskElem);
- std::iota(Mask.begin(), Mask.end(), 0);
- for (unsigned I : seq<unsigned>(SubVecVF))
- Mask[I + Index] = I + VecVF;
- if (Generator) {
- Vec = Generator(Vec, V, Mask);
- } else {
- // 1. Resize V to the size of Vec.
- SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
- std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
- V = Builder.CreateShuffleVector(V, ResizeMask);
- Vec = Builder.CreateShuffleVector(Vec, V, Mask);
- }
+ // 1. Resize V to the size of Vec.
+ SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
+ std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
+ V = Builder.CreateShuffleVector(V, ResizeMask);
+ Vec = Builder.CreateShuffleVector(Vec, V, Mask);
}
return Vec;
}
@@ -5844,11 +5848,6 @@ static Value *createInsertVector(
/// using default shuffle.
static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec,
unsigned SubVecVF, unsigned Index) {
- if (Index % SubVecVF == 0) {
- VectorType *SubVecTy =
- getWidenedType(Vec->getType()->getScalarType(), SubVecVF);
- return Builder.CreateExtractVector(SubVecTy, Vec, Index);
- }
// Create shuffle, extract_subvector requires that index is multiple of
// the subvector length.
SmallVector<int> Mask(SubVecVF, PoisonMaskElem);
@@ -16275,8 +16274,8 @@ Value *BoUpSLP::gather(
assert(SLPReVec && "FixedVectorType is not expected.");
Vec =
createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy));
- auto *II = dyn_cast<IntrinsicInst>(Vec);
- if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
+ auto *II = dyn_cast<Instruction>(Vec);
+ if (!II)
return Vec;
InsElt = II;
} else {
@@ -16296,6 +16295,27 @@ Value *BoUpSLP::gather(
if (auto *SI = dyn_cast<Instruction>(Scalar))
UserOp = SI;
} else {
+ if (V->getType()->isVectorTy()) {
+ if (auto *SV = dyn_cast<ShuffleVectorInst>(InsElt);
+ SV && SV->getOperand(0) != V && SV->getOperand(1) != V) {
+ // Find shufflevector, caused by resize.
+ auto FindOperand = [&](Value *Vec, Value *V) -> Instruction * {
+ if (auto *SV = dyn_cast<ShuffleVectorInst>(Vec)) {
+ if (SV->getOperand(0) == V)
+ return SV;
+ if (SV->getOperand(1) == V)
+ return SV;
+ }
+ return nullptr;
+ };
+ if (Instruction *User = FindOperand(SV->getOperand(0), V))
+ InsElt = User;
+ else if (Instruction *User = FindOperand(SV->getOperand(1), V))
+ InsElt = User;
+ assert(InsElt &&
+ "Failed to find shufflevector, caused by resize.");
+ }
+ }
UserOp = InsElt;
}
if (UserOp) {
@@ -16864,10 +16884,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
V, SimplifyQuery(*R.DL));
}));
unsigned InsertionIndex = Idx * getNumElements(ScalarTy);
+ Type *OrigScalarTy = ScalarTy;
+ ScalarTy = ScalarTy->getScalarType();
Vec = createInsertVector(
Builder, Vec, V, InsertionIndex,
std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2,
_3));
+ ScalarTy = OrigScalarTy;
if (!CommonMask.empty()) {
std::iota(std::next(CommonMask.begin(), Idx),
std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
index 5cb2c4530aa57..8e25c9c5547d6 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
@@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
;
; SSE4-LABEL: @buildvector_mul_subadd_ps256(
; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
-; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
-; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; SSE4-NEXT: [[TMP5:%.*]] = fsub <8 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
; SSE4-NEXT: ret <8 x float> [[TMP6]]
;
; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256(
; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
-; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
-; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]]
+; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]]
@@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
;
; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512(
; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
-; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]]
-; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]]
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6]], <16 x float> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
; AVX_FMA-NEXT: ret <16 x float> [[TMP7]]
;
; AVX512-LABEL: @buildvector_mul_subadd_ps512(
@@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
;
; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512(
; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
-; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]]
-; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]]
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP6]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
; AVX_FMA-NEXT: ret <8 x double> [[TMP7]]
;
; AVX512-LABEL: @buildvector_mul_subadd_pd512(
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
index 9f9e9d84108e6..9c615bb4757fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
@@ -12,9 +12,10 @@ define void @foo(ptr %0) {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> poison, <4 x ptr> [[TMP3]], i64 0)
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> [[TMP11]], <4 x ptr> [[TMP5]], i64 4)
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP7]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP12]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x ptr> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
index 9327fe8995d45..8d44d03e0e5cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
@@ -11,7 +11,7 @@ define i32 @test(ptr %c) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v6i64(<8 x i64> poison, <6 x i64> [[TMP2]], i64 0)
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 4, i32 5>
; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8>
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
index 89e133bb1c6a1..021edc4e27e42 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -164,7 +164,8 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T10]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T12]], i32 3
-; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP6]], i64 0)
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP14]], [[SUM_032]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 6c5220d13b7a2..bb05440910130 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -420,27 +420,26 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
-; TODO: Dead code must be removed below.
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
-; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
-; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
-; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1
-; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1
+; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4
-; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1
-; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1
+; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4
-; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1
-; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1
+; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP3]] to i32
; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 1
-; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
-; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
+; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP4]] to i32
; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 1
-; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1
-; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1
+; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesReplaced by the regular shuffles. Patch is 389.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148007.diff 94 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d3761ff43f437..18781be3b4aae 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5815,26 +5815,30 @@ static InstructionCost getExtractWithExtendCost(
static Value *createInsertVector(
IRBuilderBase &Builder, Value *Vec, Value *V, unsigned Index,
function_ref<Value *(Value *, Value *, ArrayRef<int>)> Generator = {}) {
+ if (isa<PoisonValue>(Vec) && isa<PoisonValue>(V))
+ return Vec;
const unsigned SubVecVF = getNumElements(V->getType());
- if (Index % SubVecVF == 0) {
- Vec = Builder.CreateInsertVector(Vec->getType(), Vec, V, Index);
+ // Create shuffle, insertvector requires that index is multiple of
+ // the subvector length.
+ const unsigned VecVF = getNumElements(Vec->getType());
+ SmallVector<int> Mask(VecVF, PoisonMaskElem);
+ if (isa<PoisonValue>(Vec)) {
+ auto *Begin = std::next(Mask.begin(), Index);
+ std::iota(Begin, std::next(Begin, getNumElements(V->getType())), 0);
+ Vec = Builder.CreateShuffleVector(V, Mask);
+ return Vec;
+ }
+ std::iota(Mask.begin(), Mask.end(), 0);
+ for (unsigned I : seq<unsigned>(SubVecVF))
+ Mask[I + Index] = I + VecVF;
+ if (Generator) {
+ Vec = Generator(Vec, V, Mask);
} else {
- // Create shuffle, insertvector requires that index is multiple of
- // the subvector length.
- const unsigned VecVF = getNumElements(Vec->getType());
- SmallVector<int> Mask(VecVF, PoisonMaskElem);
- std::iota(Mask.begin(), Mask.end(), 0);
- for (unsigned I : seq<unsigned>(SubVecVF))
- Mask[I + Index] = I + VecVF;
- if (Generator) {
- Vec = Generator(Vec, V, Mask);
- } else {
- // 1. Resize V to the size of Vec.
- SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
- std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
- V = Builder.CreateShuffleVector(V, ResizeMask);
- Vec = Builder.CreateShuffleVector(Vec, V, Mask);
- }
+ // 1. Resize V to the size of Vec.
+ SmallVector<int> ResizeMask(VecVF, PoisonMaskElem);
+ std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), SubVecVF), 0);
+ V = Builder.CreateShuffleVector(V, ResizeMask);
+ Vec = Builder.CreateShuffleVector(Vec, V, Mask);
}
return Vec;
}
@@ -5844,11 +5848,6 @@ static Value *createInsertVector(
/// using default shuffle.
static Value *createExtractVector(IRBuilderBase &Builder, Value *Vec,
unsigned SubVecVF, unsigned Index) {
- if (Index % SubVecVF == 0) {
- VectorType *SubVecTy =
- getWidenedType(Vec->getType()->getScalarType(), SubVecVF);
- return Builder.CreateExtractVector(SubVecTy, Vec, Index);
- }
// Create shuffle, extract_subvector requires that index is multiple of
// the subvector length.
SmallVector<int> Mask(SubVecVF, PoisonMaskElem);
@@ -16275,8 +16274,8 @@ Value *BoUpSLP::gather(
assert(SLPReVec && "FixedVectorType is not expected.");
Vec =
createInsertVector(Builder, Vec, Scalar, Pos * getNumElements(VecTy));
- auto *II = dyn_cast<IntrinsicInst>(Vec);
- if (!II || II->getIntrinsicID() != Intrinsic::vector_insert)
+ auto *II = dyn_cast<Instruction>(Vec);
+ if (!II)
return Vec;
InsElt = II;
} else {
@@ -16296,6 +16295,27 @@ Value *BoUpSLP::gather(
if (auto *SI = dyn_cast<Instruction>(Scalar))
UserOp = SI;
} else {
+ if (V->getType()->isVectorTy()) {
+ if (auto *SV = dyn_cast<ShuffleVectorInst>(InsElt);
+ SV && SV->getOperand(0) != V && SV->getOperand(1) != V) {
+ // Find shufflevector, caused by resize.
+ auto FindOperand = [&](Value *Vec, Value *V) -> Instruction * {
+ if (auto *SV = dyn_cast<ShuffleVectorInst>(Vec)) {
+ if (SV->getOperand(0) == V)
+ return SV;
+ if (SV->getOperand(1) == V)
+ return SV;
+ }
+ return nullptr;
+ };
+ if (Instruction *User = FindOperand(SV->getOperand(0), V))
+ InsElt = User;
+ else if (Instruction *User = FindOperand(SV->getOperand(1), V))
+ InsElt = User;
+ assert(InsElt &&
+ "Failed to find shufflevector, caused by resize.");
+ }
+ }
UserOp = InsElt;
}
if (UserOp) {
@@ -16864,10 +16884,13 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
V, SimplifyQuery(*R.DL));
}));
unsigned InsertionIndex = Idx * getNumElements(ScalarTy);
+ Type *OrigScalarTy = ScalarTy;
+ ScalarTy = ScalarTy->getScalarType();
Vec = createInsertVector(
Builder, Vec, V, InsertionIndex,
std::bind(&ShuffleInstructionBuilder::createShuffle, this, _1, _2,
_3));
+ ScalarTy = OrigScalarTy;
if (!CommonMask.empty()) {
std::iota(std::next(CommonMask.begin(), Idx),
std::next(CommonMask.begin(), Idx + E->getVectorFactor()),
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
index 5cb2c4530aa57..8e25c9c5547d6 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
@@ -567,22 +567,19 @@ define <8 x float> @buildvector_mul_subadd_ps256(<8 x float> %C, <8 x float> %D,
;
; SSE4-LABEL: @buildvector_mul_subadd_ps256(
; SSE4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
-; SSE4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
-; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
; SSE4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; SSE4-NEXT: [[TMP5:%.*]] = fsub <8 x float> [[A]], [[B]]
+; SSE4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; SSE4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
; SSE4-NEXT: ret <8 x float> [[TMP6]]
;
; AVX_FMA4-LABEL: @buildvector_mul_subadd_ps256(
; AVX_FMA4-NEXT: [[A:%.*]] = fmul <8 x float> [[C:%.*]], [[D:%.*]]
-; AVX_FMA4-NEXT: [[TMP0:%.*]] = fsub <8 x float> [[A]], [[B:%.*]]
-; AVX_FMA4-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B]]
+; AVX_FMA4-NEXT: [[TMP2:%.*]] = fadd <8 x float> [[A]], [[B:%.*]]
; AVX_FMA4-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA4-NEXT: [[TMP7:%.*]] = fsub <8 x float> [[A]], [[B]]
+; AVX_FMA4-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
; AVX_FMA4-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX_FMA4-NEXT: [[TMP6:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
; AVX_FMA4-NEXT: ret <8 x float> [[TMP6]]
@@ -677,13 +674,11 @@ define <16 x float> @buildvector_mul_subadd_ps512(<16 x float> %C, <16 x float>
;
; AVX_FMA-LABEL: @buildvector_mul_subadd_ps512(
; AVX_FMA-NEXT: [[A:%.*]] = fmul <16 x float> [[C:%.*]], [[D:%.*]]
-; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A]], [[B:%.*]]
-; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <16 x float> [[A]], [[B:%.*]]
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP6]], <16 x float> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <16 x float> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[TMP5]], <16 x float> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <16 x float> [[TMP4]], <16 x float> [[TMP6]], <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
; AVX_FMA-NEXT: ret <16 x float> [[TMP7]]
;
; AVX512-LABEL: @buildvector_mul_subadd_ps512(
@@ -880,13 +875,11 @@ define <8 x double> @buildvector_mul_subadd_pd512(<8 x double> %C, <8 x double>
;
; AVX_FMA-LABEL: @buildvector_mul_subadd_pd512(
; AVX_FMA-NEXT: [[A:%.*]] = fmul <8 x double> [[C:%.*]], [[D:%.*]]
-; AVX_FMA-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A]], [[B:%.*]]
-; AVX_FMA-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP3:%.*]] = fadd <8 x double> [[A]], [[B:%.*]]
; AVX_FMA-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
-; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; AVX_FMA-NEXT: [[TMP5:%.*]] = fsub <8 x double> [[A]], [[B]]
+; AVX_FMA-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[TMP5]], <8 x double> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
+; AVX_FMA-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP4]], <8 x double> [[TMP6]], <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
; AVX_FMA-NEXT: ret <8 x double> [[TMP7]]
;
; AVX512-LABEL: @buildvector_mul_subadd_pd512(
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
index 9f9e9d84108e6..9c615bb4757fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll
@@ -12,9 +12,10 @@ define void @foo(ptr %0) {
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x ptr> [[TMP2]], <2 x ptr> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x ptr> [[TMP6]], <4 x ptr> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> poison, <4 x ptr> [[TMP3]], i64 0)
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x ptr> @llvm.vector.insert.v8p0.v4p0(<8 x ptr> [[TMP11]], <4 x ptr> [[TMP5]], i64 4)
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP7]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x ptr> [[TMP12]], <8 x ptr> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 3>
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <8 x ptr> [[TMP8]], zeroinitializer
; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP9]], zeroinitializer
; CHECK-NEXT: [[OP_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP10]])
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
index 9327fe8995d45..8d44d03e0e5cc 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll
@@ -11,7 +11,7 @@ define i32 @test(ptr %c) {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <6 x i32> <i32 1, i32 1, i32 1, i32 1, i32 0, i32 0>
; CHECK-NEXT: [[TMP2:%.*]] = lshr <6 x i64> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 0, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v6i64(<8 x i64> poison, <6 x i64> [[TMP2]], i64 0)
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <6 x i64> [[TMP2]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison>
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i64> [[TMP4]], <8 x i64> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 4, i32 5>
; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP5]] to <8 x i8>
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[INCDEC_PTR_3_1]], align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
index 89e133bb1c6a1..021edc4e27e42 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -164,7 +164,8 @@ define i32 @getelementptr_2x32(ptr nocapture readonly %g, i32 %n, i32 %x, i32 %y
; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[ARRAYIDX15]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[T10]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[T12]], i32 3
-; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP8]], <2 x i32> [[TMP6]], i64 0)
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
; CHECK-NEXT: [[OP_RDX]] = add i32 [[TMP14]], [[SUM_032]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 6c5220d13b7a2..bb05440910130 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -420,27 +420,26 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]]
; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]]
-; TODO: Dead code must be removed below.
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
-; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
-; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ADD_PTR_1]], align 1
+; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP0]] to i32
; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 [[IDX_EXT63]]
-; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1
-; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ADD_PTR64_1]], align 1
+; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP1]] to i32
; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 4
-; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1
-; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX3_2]], align 1
+; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP2]] to i32
; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 4
-; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1
-; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX5_2]], align 1
+; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP3]] to i32
; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR_1]], i64 1
-; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
-; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX8_2]], align 1
+; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP4]] to i32
; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64_1]], i64 1
-; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1
-; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX10_2]], align 1
+; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP5]] to i32
; CHECK-NEXT...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll llvm/test/Transforms/SLPVectorizer/AArch64/InstructionsState-is-invalid-0.ll llvm/test/Transforms/SLPVectorizer/AArch64/alternate-vectorization-split-node.ll llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll llvm/test/Transforms/SLPVectorizer/AArch64/reduce-fadd.ll llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll llvm/test/Transforms/SLPVectorizer/AArch64/vec3-calls.ll llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll llvm/test/Transforms/SLPVectorizer/RISCV/combined-loads-stored.ll llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll llvm/test/Transforms/SLPVectorizer/RISCV/gather-insert-point-restore.ll llvm/test/Transforms/SLPVectorizer/RISCV/gather-node-with-no-users.ll llvm/test/Transforms/SLPVectorizer/RISCV/horizontal-list.ll llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll llvm/test/Transforms/SLPVectorizer/RISCV/unordered-loads-operands.ll llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll llvm/test/Transforms/SLPVectorizer/SystemZ/reuse-non-power-of-2-reorder.ll llvm/test/Transforms/SLPVectorizer/SystemZ/revec-fix-128169.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll llvm/test/Transforms/SLPVectorizer/X86/bool-mask.ll llvm/test/Transforms/SLPVectorizer/X86/buildvector-reused-with-bv-subvector.ll llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll llvm/test/Transforms/SLPVectorizer/X86/ext-used-scalar-different-bitwidth.ll llvm/test/Transforms/SLPVectorizer/X86/external-reduced-value-vectorized.ll llvm/test/Transforms/SLPVectorizer/X86/extractelemets-extended-by-poison.ll llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll llvm/test/Transforms/SLPVectorizer/X86/full-matched-bv-with-subvectors.ll llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll llvm/test/Transforms/SLPVectorizer/X86/identity-match-splat-less-defined.ll llvm/test/Transforms/SLPVectorizer/X86/insert-subvector.ll llvm/test/Transforms/SLPVectorizer/X86/inst_size_bug.ll llvm/test/Transforms/SLPVectorizer/X86/landing-pad-for-split-node.ll llvm/test/Transforms/SLPVectorizer/X86/malformed_phis.ll llvm/test/Transforms/SLPVectorizer/X86/matched-nodes-updated.ll llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-transformed-operand.ll llvm/test/Transforms/SLPVectorizer/X86/multi-node-reuse-in-bv.ll llvm/test/Transforms/SLPVectorizer/X86/non-load-reduced-as-part-of-bv.ll llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-split-node.ll llvm/test/Transforms/SLPVectorizer/X86/phi-node-reshuffled-part.ll llvm/test/Transforms/SLPVectorizer/X86/phi-nodes-incoming-same-blocks.ll llvm/test/Transforms/SLPVectorizer/X86/reduced-value-vectorized-later.ll llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll llvm/test/Transforms/SLPVectorizer/X86/reduction-with-removed-extracts.ll llvm/test/Transforms/SLPVectorizer/X86/remark-partial-loads-vectorize.ll llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-subvector.ll llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll llvm/test/Transforms/SLPVectorizer/X86/resched.ll llvm/test/Transforms/SLPVectorizer/X86/resized-bv-values-non-power-of2-node.ll llvm/test/Transforms/SLPVectorizer/X86/revec-SplitVectorize.ll llvm/test/Transforms/SLPVectorizer/X86/revec-getStoreMinimumVF.ll llvm/test/Transforms/SLPVectorizer/X86/revec-load-compress.ll llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-vectorized-later.ll llvm/test/Transforms/SLPVectorizer/X86/same-values-sub-node-with-poisons.ll llvm/test/Transforms/SLPVectorizer/X86/split-load8_2-unord.ll llvm/test/Transforms/SLPVectorizer/X86/split-node-full-match.ll llvm/test/Transforms/SLPVectorizer/X86/split-node-no-reorder-copy.ll llvm/test/Transforms/SLPVectorizer/X86/split-node-parent-operands-in-spill.ll llvm/test/Transforms/SLPVectorizer/X86/split-node-reorder-node-with-ops.ll llvm/test/Transforms/SLPVectorizer/X86/split-vector-operand-with-reuses.ll llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll llvm/test/Transforms/SLPVectorizer/X86/trunc-node-reused.ll llvm/test/Transforms/SLPVectorizer/X86/vec3-calls.ll llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll llvm/test/Transforms/SLPVectorizer/gathered-consecutive-loads-different-types.ll llvm/test/Transforms/SLPVectorizer/partial-register-extract.ll llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll llvm/test/Transforms/SLPVectorizer/revec-insertelement.ll llvm/test/Transforms/SLPVectorizer/revec-reduction-logical.ll llvm/test/Transforms/SLPVectorizer/revec-shufflevector.ll llvm/test/Transforms/SLPVectorizer/revec.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
Created using spr 1.3.5
@@ -420,27 +420,26 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound | |||
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[P2:%.*]], i64 4 | |||
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IDX_EXT]] | |||
; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IDX_EXT63]] | |||
; TODO: Dead code must be removed below. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this TODO still valid? I assume the script deleted it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It just moved below, see line 549
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm surprised you had to add so much shuffle canonicalization :/
Created using spr 1.3.5
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with a few minors
if (auto *SV = dyn_cast<ShuffleVectorInst>(InsElt); | ||
SV && SV->getOperand(0) != V && SV->getOperand(1) != V) { | ||
// Find shufflevector, caused by resize. | ||
auto FindOperand = [&](Value *Vec, Value *V) -> Instruction * { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you remove the &
capture?
…nsics Replaced by the regular shuffles. Fixes #145512 Reviewers: RKSimon Reviewed By: RKSimon Pull Request: llvm/llvm-project#148007
Replaced by the regular shuffles.
Fixes #145512