diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 74a16d3fbcad6..629ddeea1534a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -18254,6 +18254,14 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI, if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts)) return false; + if (MaxVFOnly && BuildVectorOpds.size() == 2) { + R.getORE()->emit([&]() { + return OptimizationRemarkMissed(SV_NAME, "NotPossible", IVI) + << "Cannot SLP vectorize list: only 2 elements of buildvalue, " + "trying reduction first."; + }); + return false; + } LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); // Aggregate value is unlikely to be processed in vector register. return tryToVectorizeList(BuildVectorOpds, R, MaxVFOnly); @@ -18270,6 +18278,14 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI, isFixedVectorShuffle(BuildVectorOpds, Mask))) return false; + if (MaxVFOnly && BuildVectorInsts.size() == 2) { + R.getORE()->emit([&]() { + return OptimizationRemarkMissed(SV_NAME, "NotPossible", IEI) + << "Cannot SLP vectorize list: only 2 elements of buildvector, " + "trying reduction first."; + }); + return false; + } LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n"); return tryToVectorizeList(BuildVectorInsts, R, MaxVFOnly); } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll index a24cb81541d7c..c6209fd71063a 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/scalarization-overhead.ll @@ -10,33 +10,32 @@ define fastcc i64 @zot(float %arg, float %arg1, float %arg2, float %arg3, float ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[ARG3:%.*]], i32 2 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[ARG3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x float> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP6]], <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[TMP7]], +; CHECK-NEXT: [[VAL12:%.*]] = fadd fast float [[ARG3]], 1.000000e+00 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP2]], float [[VAL12]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float 0.000000e+00, i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[TMP5]], ; CHECK-NEXT: br i1 [[ARG6:%.*]], label [[BB18:%.*]], label [[BB57:%.*]] ; CHECK: bb18: -; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ [[TMP8]], [[BB:%.*]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP8]], i32 2 -; CHECK-NEXT: [[VAL23:%.*]] = fmul fast float [[TMP10]], 2.000000e+00 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP8]], i32 3 -; CHECK-NEXT: [[VAL24:%.*]] = fmul fast float [[TMP11]], 3.000000e+00 +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ [[TMP6]], [[BB:%.*]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP6]], i32 2 +; CHECK-NEXT: [[VAL23:%.*]] = fmul fast float [[TMP8]], 2.000000e+00 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i32 3 +; CHECK-NEXT: [[VAL24:%.*]] = fmul fast float [[TMP9]], 3.000000e+00 ; CHECK-NEXT: br i1 [[ARG7:%.*]], label [[BB25:%.*]], label [[BB57]] ; CHECK: bb25: -; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP9]], [[BB18]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x float> [ [[TMP7]], [[BB18]] ] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP3]], i32 1 ; CHECK-NEXT: br label [[BB30:%.*]] ; CHECK: bb30: ; CHECK-NEXT: [[VAL31:%.*]] = phi float [ [[VAL55:%.*]], [[BB30]] ], [ 0.000000e+00, [[BB25]] ] -; CHECK-NEXT: [[VAL32:%.*]] = phi float [ [[TMP13]], [[BB30]] ], [ 0.000000e+00, [[BB25]] ] -; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i8>, ptr [[ARG5:%.*]], align 1 -; CHECK-NEXT: [[TMP15:%.*]] = uitofp <4 x i8> [[TMP14]] to <4 x float> -; CHECK-NEXT: [[TMP16:%.*]] = fsub fast <4 x float> [[TMP15]], [[TMP3]] -; CHECK-NEXT: [[TMP17:%.*]] = fmul fast <4 x float> [[TMP16]], [[TMP12]] -; CHECK-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP17]]) +; CHECK-NEXT: [[VAL32:%.*]] = phi float [ [[TMP11]], [[BB30]] ], [ 0.000000e+00, [[BB25]] ] +; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARG5:%.*]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = uitofp <4 x i8> [[TMP12]] to <4 x float> +; CHECK-NEXT: [[TMP14:%.*]] = fsub fast <4 x float> [[TMP13]], [[TMP3]] +; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x float> [[TMP14]], [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP15]]) ; CHECK-NEXT: [[VAL55]] = tail call fast float @llvm.minnum.f32(float [[VAL31]], float [[ARG1:%.*]]) -; CHECK-NEXT: [[VAL56:%.*]] = tail call fast float @llvm.maxnum.f32(float [[ARG2:%.*]], float [[TMP18]]) +; CHECK-NEXT: [[VAL56:%.*]] = tail call fast float @llvm.maxnum.f32(float [[ARG2:%.*]], float [[TMP16]]) ; CHECK-NEXT: call void @ham(float [[VAL55]], float [[VAL56]]) ; CHECK-NEXT: br i1 [[ARG8:%.*]], label [[BB30]], label [[BB57]] ; CHECK: bb57: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll index bf4903fd19b09..2f5936357d0f5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-extractelements.ll @@ -7,8 +7,7 @@ ; YAML-NEXT: Name: NotPossible ; YAML-NEXT: Function: g ; YAML-NEXT: Args: -; YAML-NEXT: - String: 'Cannot SLP vectorize list: vectorization was impossible' -; YAML-NEXT: - String: ' with available vectorization factors' +; YAML-NEXT: - String: 'Cannot SLP vectorize list: only 2 elements of buildvector, trying reduction first.' define <2 x i32> @g(<2 x i32> %x, i32 %a, i32 %b) { ; CHECK-LABEL: @g(