-
Notifications
You must be signed in to change notification settings - Fork 15.2k
SelectionDAG/expandFMINNUM_FMAXNUM: skips vector if SETCC/VSELECT is not legal #109570
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-arm Author: YunQiang Su (wzssyqa) ChangesIf we are working on an vector, and the operation is legal for the elemtns of it, just skip will be better than expand it. So that, just some simple scale instructions are emit instead of some compares and selections. Patch is 31.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109570.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a2a232ed93b72f..617946e4bc2a7e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8422,6 +8422,10 @@ TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
"Wrong opcode");
+ EVT VT = Node->getValueType(0);
+ if (VT.isVector() && isOperationLegal(Opcode, VT.getScalarType()))
+ return SDValue();
+
if (Node->getFlags().hasNoNaNs()) {
ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
SDValue Op1 = Node->getOperand(0);
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 528bfe0411730a..975f3860fb1ef6 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -918,32 +918,24 @@ define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d16, d18
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vcmp.f64 d17, d19
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vminnm.f64 d19, d19, d16
+; ARMV8-NEXT: vminnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fminnumv264_intrinsic:
; ARMV8M: @ %bb.0:
; ARMV8M-NEXT: mov r12, sp
-; ARMV8M-NEXT: vmov d0, r0, r1
+; ARMV8M-NEXT: vmov d0, r2, r3
; ARMV8M-NEXT: vldrw.u32 q1, [r12]
-; ARMV8M-NEXT: vmov d1, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d2, d0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vcmp.f64 d3, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d0, d2
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r0, r1, d0
-; ARMV8M-NEXT: vselgt.f64 d1, d1, d3
-; ARMV8M-NEXT: vmov r2, r3, d1
+; ARMV8M-NEXT: vmov d1, r0, r1
+; ARMV8M-NEXT: vminnm.f64 d1, d1, d2
+; ARMV8M-NEXT: vminnm.f64 d0, d0, d3
+; ARMV8M-NEXT: vmov r0, r1, d1
+; ARMV8M-NEXT: vmov r2, r3, d0
; ARMV8M-NEXT: bx lr
%a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %a
@@ -970,32 +962,24 @@ define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d16, d18
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vcmp.f64 d17, d19
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vminnm.f64 d19, d19, d16
+; ARMV8-NEXT: vminnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fminnumv264_nsz_intrinsic:
; ARMV8M: @ %bb.0:
; ARMV8M-NEXT: mov r12, sp
-; ARMV8M-NEXT: vmov d0, r0, r1
+; ARMV8M-NEXT: vmov d0, r2, r3
; ARMV8M-NEXT: vldrw.u32 q1, [r12]
-; ARMV8M-NEXT: vmov d1, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d2, d0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vcmp.f64 d3, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d0, d2
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r0, r1, d0
-; ARMV8M-NEXT: vselgt.f64 d1, d1, d3
-; ARMV8M-NEXT: vmov r2, r3, d1
+; ARMV8M-NEXT: vmov d1, r0, r1
+; ARMV8M-NEXT: vminnm.f64 d1, d1, d2
+; ARMV8M-NEXT: vminnm.f64 d0, d0, d3
+; ARMV8M-NEXT: vmov r0, r1, d1
+; ARMV8M-NEXT: vmov r2, r3, d0
; ARMV8M-NEXT: bx lr
%a = call nnan nsz <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %a
@@ -1020,31 +1004,23 @@ define <2 x double> @fminnumv264_non_zero_intrinsic(<2 x double> %x) {
;
; ARMV8-LABEL: fminnumv264_non_zero_intrinsic:
; ARMV8: @ %bb.0:
-; ARMV8-NEXT: vmov d17, r0, r1
; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00
-; ARMV8-NEXT: vcmp.f64 d16, d17
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d18, r2, r3
-; ARMV8-NEXT: vcmp.f64 d16, d18
-; ARMV8-NEXT: vselgt.f64 d17, d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d18, r0, r1
+; ARMV8-NEXT: vmov d17, r2, r3
+; ARMV8-NEXT: vminnm.f64 d18, d18, d16
+; ARMV8-NEXT: vminnm.f64 d16, d17, d16
+; ARMV8-NEXT: vmov r0, r1, d18
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fminnumv264_non_zero_intrinsic:
; ARMV8M: @ %bb.0:
-; ARMV8M-NEXT: vmov d1, r0, r1
; ARMV8M-NEXT: vmov.f64 d0, #1.000000e+00
-; ARMV8M-NEXT: vcmp.f64 d0, d1
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov d2, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d0, d2
-; ARMV8M-NEXT: vselgt.f64 d1, d1, d0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r0, r1, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d2, d0
+; ARMV8M-NEXT: vmov d2, r0, r1
+; ARMV8M-NEXT: vmov d1, r2, r3
+; ARMV8M-NEXT: vminnm.f64 d2, d2, d0
+; ARMV8M-NEXT: vminnm.f64 d0, d1, d0
+; ARMV8M-NEXT: vmov r0, r1, d2
; ARMV8M-NEXT: vmov r2, r3, d0
; ARMV8M-NEXT: bx lr
%a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double><double 1.0, double 1.0>)
@@ -1070,34 +1046,26 @@ define <2 x double> @fminnumv264_one_zero_intrinsic(<2 x double> %x) {
;
; ARMV8-LABEL: fminnumv264_one_zero_intrinsic:
; ARMV8: @ %bb.0:
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, #0
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d18, r0, r1
; ARMV8-NEXT: vmov.f64 d16, #-1.000000e+00
-; ARMV8-NEXT: vcmp.f64 d16, d18
+; ARMV8-NEXT: vmov d18, r0, r1
; ARMV8-NEXT: vmov.i32 d17, #0x0
-; ARMV8-NEXT: vmovlt.f64 d17, d19
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r2, r3, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vminnm.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d19, r2, r3
+; ARMV8-NEXT: vminnm.f64 d17, d19, d17
; ARMV8-NEXT: vmov r0, r1, d16
+; ARMV8-NEXT: vmov r2, r3, d17
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fminnumv264_one_zero_intrinsic:
; ARMV8M: @ %bb.0:
-; ARMV8M-NEXT: vmov d3, r2, r3
+; ARMV8M-NEXT: vmov.f64 d0, #-1.000000e+00
; ARMV8M-NEXT: vldr d1, .LCPI27_0
-; ARMV8M-NEXT: vcmp.f64 d3, #0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
; ARMV8M-NEXT: vmov d2, r0, r1
-; ARMV8M-NEXT: vmov.f64 d0, #-1.000000e+00
-; ARMV8M-NEXT: vcmp.f64 d0, d2
-; ARMV8M-NEXT: vmovlt.f64 d1, d3
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r2, r3, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d2, d0
+; ARMV8M-NEXT: vmov d3, r2, r3
+; ARMV8M-NEXT: vminnm.f64 d0, d2, d0
+; ARMV8M-NEXT: vminnm.f64 d1, d3, d1
; ARMV8M-NEXT: vmov r0, r1, d0
+; ARMV8M-NEXT: vmov r2, r3, d1
; ARMV8M-NEXT: bx lr
; ARMV8M-NEXT: .p2align 3
; ARMV8M-NEXT: @ %bb.1:
@@ -1129,31 +1097,23 @@ define <2 x double> @fmaxnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, d17
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d19, d19, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fmaxnumv264_intrinsic:
; ARMV8M: @ %bb.0:
; ARMV8M-NEXT: mov r12, sp
-; ARMV8M-NEXT: vmov d1, r0, r1
-; ARMV8M-NEXT: vldrw.u32 q1, [r12]
; ARMV8M-NEXT: vmov d0, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d1, d2
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vcmp.f64 d0, d3
-; ARMV8M-NEXT: vselgt.f64 d1, d1, d2
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
+; ARMV8M-NEXT: vldrw.u32 q1, [r12]
+; ARMV8M-NEXT: vmov d1, r0, r1
+; ARMV8M-NEXT: vmaxnm.f64 d1, d1, d2
+; ARMV8M-NEXT: vmaxnm.f64 d0, d0, d3
; ARMV8M-NEXT: vmov r0, r1, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d0, d3
; ARMV8M-NEXT: vmov r2, r3, d0
; ARMV8M-NEXT: bx lr
%a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y)
@@ -1181,31 +1141,23 @@ define <2 x double> @fmaxnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
; ARMV8: @ %bb.0:
; ARMV8-NEXT: mov r12, sp
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
-; ARMV8-NEXT: vmov d18, r0, r1
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, d17
-; ARMV8-NEXT: vselgt.f64 d18, d18, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d18
-; ARMV8-NEXT: vselgt.f64 d16, d19, d17
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmov d18, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d19, d19, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d18, d17
+; ARMV8-NEXT: vmov r0, r1, d19
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fmaxnumv264_nsz_intrinsic:
; ARMV8M: @ %bb.0:
; ARMV8M-NEXT: mov r12, sp
-; ARMV8M-NEXT: vmov d1, r0, r1
-; ARMV8M-NEXT: vldrw.u32 q1, [r12]
; ARMV8M-NEXT: vmov d0, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d1, d2
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vcmp.f64 d0, d3
-; ARMV8M-NEXT: vselgt.f64 d1, d1, d2
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
+; ARMV8M-NEXT: vldrw.u32 q1, [r12]
+; ARMV8M-NEXT: vmov d1, r0, r1
+; ARMV8M-NEXT: vmaxnm.f64 d1, d1, d2
+; ARMV8M-NEXT: vmaxnm.f64 d0, d0, d3
; ARMV8M-NEXT: vmov r0, r1, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d0, d3
; ARMV8M-NEXT: vmov r2, r3, d0
; ARMV8M-NEXT: bx lr
%a = call nnan nsz <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y)
@@ -1236,18 +1188,14 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) {
;
; ARMV8-LABEL: fmaxnumv264_zero_intrinsic:
; ARMV8: @ %bb.0:
-; ARMV8-NEXT: vmov d18, r0, r1
; ARMV8-NEXT: vldr d16, .LCPI30_0
-; ARMV8-NEXT: vcmp.f64 d18, #0
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d19, r2, r3
-; ARMV8-NEXT: vcmp.f64 d19, d16
+; ARMV8-NEXT: vmov d18, r2, r3
; ARMV8-NEXT: vmov.i32 d17, #0x0
-; ARMV8-NEXT: vselgt.f64 d17, d18, d17
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d19, d16
+; ARMV8-NEXT: vmov d19, r0, r1
+; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16
+; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17
; ARMV8-NEXT: vmov r2, r3, d16
+; ARMV8-NEXT: vmov r0, r1, d17
; ARMV8-NEXT: bx lr
; ARMV8-NEXT: .p2align 3
; ARMV8-NEXT: @ %bb.1:
@@ -1257,18 +1205,14 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) {
;
; ARMV8M-LABEL: fmaxnumv264_zero_intrinsic:
; ARMV8M: @ %bb.0:
-; ARMV8M-NEXT: vmov d2, r0, r1
; ARMV8M-NEXT: vldr d0, .LCPI30_0
-; ARMV8M-NEXT: vcmp.f64 d2, #0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov d3, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d3, d0
+; ARMV8M-NEXT: vmov d2, r2, r3
; ARMV8M-NEXT: vldr d1, .LCPI30_1
-; ARMV8M-NEXT: vselgt.f64 d1, d2, d1
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r0, r1, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d3, d0
+; ARMV8M-NEXT: vmov d3, r0, r1
+; ARMV8M-NEXT: vmaxnm.f64 d0, d2, d0
+; ARMV8M-NEXT: vmaxnm.f64 d1, d3, d1
; ARMV8M-NEXT: vmov r2, r3, d0
+; ARMV8M-NEXT: vmov r0, r1, d1
; ARMV8M-NEXT: bx lr
; ARMV8M-NEXT: .p2align 3
; ARMV8M-NEXT: @ %bb.1:
@@ -1307,15 +1251,11 @@ define <2 x double> @fmaxnumv264_minus_zero_intrinsic(<2 x double> %x) {
; ARMV8-LABEL: fmaxnumv264_minus_zero_intrinsic:
; ARMV8: @ %bb.0:
; ARMV8-NEXT: vldr d16, .LCPI31_0
-; ARMV8-NEXT: vmov d17, r0, r1
-; ARMV8-NEXT: vmov d18, r2, r3
-; ARMV8-NEXT: vcmp.f64 d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vselgt.f64 d17, d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d18, r0, r1
+; ARMV8-NEXT: vmov d17, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16
+; ARMV8-NEXT: vmov r0, r1, d18
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
; ARMV8-NEXT: .p2align 3
@@ -1327,15 +1267,11 @@ define <2 x double> @fmaxnumv264_minus_zero_intrinsic(<2 x double> %x) {
; ARMV8M-LABEL: fmaxnumv264_minus_zero_intrinsic:
; ARMV8M: @ %bb.0:
; ARMV8M-NEXT: vldr d0, .LCPI31_0
-; ARMV8M-NEXT: vmov d1, r0, r1
-; ARMV8M-NEXT: vmov d2, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d1, d0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vcmp.f64 d2, d0
-; ARMV8M-NEXT: vselgt.f64 d1, d1, d0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r0, r1, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d2, d0
+; ARMV8M-NEXT: vmov d2, r0, r1
+; ARMV8M-NEXT: vmov d1, r2, r3
+; ARMV8M-NEXT: vmaxnm.f64 d2, d2, d0
+; ARMV8M-NEXT: vmaxnm.f64 d0, d1, d0
+; ARMV8M-NEXT: vmov r0, r1, d2
; ARMV8M-NEXT: vmov r2, r3, d0
; ARMV8M-NEXT: bx lr
; ARMV8M-NEXT: .p2align 3
@@ -1367,30 +1303,22 @@ define <2 x double> @fmaxnumv264_non_zero_intrinsic(<2 x double> %x) {
; ARMV8-LABEL: fmaxnumv264_non_zero_intrinsic:
; ARMV8: @ %bb.0:
; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00
-; ARMV8-NEXT: vmov d17, r0, r1
-; ARMV8-NEXT: vcmp.f64 d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov d18, r2, r3
-; ARMV8-NEXT: vcmp.f64 d18, d16
-; ARMV8-NEXT: vselgt.f64 d17, d17, d16
-; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8-NEXT: vmov r0, r1, d17
-; ARMV8-NEXT: vselgt.f64 d16, d18, d16
+; ARMV8-NEXT: vmov d18, r0, r1
+; ARMV8-NEXT: vmov d17, r2, r3
+; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16
+; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16
+; ARMV8-NEXT: vmov r0, r1, d18
; ARMV8-NEXT: vmov r2, r3, d16
; ARMV8-NEXT: bx lr
;
; ARMV8M-LABEL: fmaxnumv264_non_zero_intrinsic:
; ARMV8M: @ %bb.0:
; ARMV8M-NEXT: vmov.f64 d0, #1.000000e+00
-; ARMV8M-NEXT: vmov d1, r0, r1
-; ARMV8M-NEXT: vcmp.f64 d1, d0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov d2, r2, r3
-; ARMV8M-NEXT: vcmp.f64 d2, d0
-; ARMV8M-NEXT: vselgt.f64 d1, d1, d0
-; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr
-; ARMV8M-NEXT: vmov r0, r1, d1
-; ARMV8M-NEXT: vselgt.f64 d0, d2, d0
+; ARMV8M-NEXT: vmov d2, r0, r1
+; ARMV8M-NEXT: vmov d1, r2, r3
+; ARMV8M-NEXT: vmaxnm.f64 d2, d2, d0
+; ARMV8M-NEXT: vmaxnm.f64 d0, d1, d0
+; ARMV8M-NEXT: vmov r0, r1, d2
; ARMV8M-NEXT: vmov r2, r3, d0
; ARMV8M-NEXT: bx lr
%a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double><double 1.0, double 1.0>)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
index 7cafb7262f460d..20a90033659f64 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fminmax.ll
@@ -43,21 +43,13 @@ define arm_aapcs_vfpcc float @fmin_v8f32(<8 x float> %x) {
;
; CHECK-NOFP-LABEL: fmin_v8f32:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vcmp.f32 s5, s1
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
-; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s5
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s6, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s7, s3
+; CHECK-NOFP-NEXT: vminnm.f32 s8, s1, s5
+; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s8
-; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
+; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
+; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
-; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
-; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
; CHECK-NOFP-NEXT: bx lr
entry:
%z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
@@ -129,44 +121,28 @@ define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) {
;
; CHECK-NOFP-LABEL: fmin_v16f16:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
-; CHECK-NOFP-NEXT: vcmp.f16 s8, s10
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
-; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s5, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
+; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
+; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s8
-; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s8
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s6, s2
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s8, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s8, s4
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
-; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
-; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
-; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f16 s7, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
+; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
-; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
+; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
; CHECK-NOFP-NEXT: bx lr
entry:
@@ -196,12 +172,8 @@ entry:
define arm_aapcs_vfpcc double @fmin_v4f64(<4 x double> %x) {
; CHECK-LABEL: fmin_v4f64:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vcmp.f64 d3, d1
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f64 d2, d0
-; CHECK-NEXT: vselgt.f64 d1, d1, d3
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f64 d0, d0, d2
+; CHECK-NEXT: vminnm.f64 d1, d1, d3
+; CHECK-NEXT: vminnm.f64 d0, d0, d2
; CHECK-NEXT: vminnm.f64 d0, d0, d1
; CHECK-NEXT: bx lr
entry:
@@ -435,21 +407,13 @@ define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
;
; CHECK-NOFP-LABEL: fmin_v8f32_acc:
; CHECK-NOFP: @ %bb.0: @ %entry
-; CHECK-NOFP-NEXT: vcmp.f32 s5, s1
-; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
-; CHECK-NOFP-NEXT: ...
[truncated]
|
875e7f9 to
9c70f3d
Compare
|
We have really bad vector splitting logic if the vector type is legal. The correct vector splitting only happens by default for illegal vector types to narrower legal vector types in the vector legalization. Most legal expansions go straight to scalarization, which this also does. This is fine, but most of the other instances I see check based on which vector operations are legal, not that the scalar operation is legal. For consistency it's probably best to check the legality of the vector expansions |
9c70f3d to
e2e180b
Compare
e2e180b to
cc2f403
Compare
…not legal If SETCC or VSELECT is not legal for vector, we should not expand it, instead we can split the vectors. So that, some simple scale instructions can be emitted instead of some pairs of comparation+selection.
cc2f403 to
d50cafd
Compare
…not legal (llvm#109570) If SETCC or VSELECT is not legal for vector, we should not expand it, instead we can split the vectors. So that, some simple scale instructions can be emitted instead of some pairs of comparation+selection.
If SETCC or VSELECT is not legal for vector, we should not expand it,
instead we can split the vectors.
So that, some simple scale instructions can be emitted instead of
some pairs of comparation+selection.