diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 1c1c94e7193ae..2283f99202e2f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -421,6 +421,8 @@ struct SDNodeFlags { PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint | NonNeg | NoNaNs | NoInfs | SameSign, + FastMathFlags = NoNaNs | NoInfs | NoSignedZeros | AllowReciprocal | + AllowContract | ApproximateFuncs | AllowReassociation, }; /// Default constructor turns off all optimization flags. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2b8818482a333..70b2ad05f8b6e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5075,6 +5075,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { if (Node->getOpcode() == ISD::BR_CC || Node->getOpcode() == ISD::SELECT_CC) OVT = Node->getOperand(2).getSimpleValueType(); + // Preserve fast math flags + SDNodeFlags FastMathFlags = Node->getFlags() & SDNodeFlags::FastMathFlags; + SelectionDAG::FlagInserter FlagsInserter(DAG, FastMathFlags); MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index b6abad830c371..5210372dd935f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -233,6 +233,10 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); + // Preserve fast math flags + SDNodeFlags FastMathFlags = N->getFlags() & SDNodeFlags::FastMathFlags; + SelectionDAG::FlagInserter FlagsInserter(DAG, FastMathFlags); + LLVM_DEBUG(dbgs() << "\nLegalizing node: "; N->dump(&DAG)); if (IgnoreNodeResults(N)) { LLVM_DEBUG(dbgs() << "Ignoring node results\n"); diff --git a/llvm/test/CodeGen/AArch64/fp16_fast_math.ll b/llvm/test/CodeGen/AArch64/fp16_fast_math.ll new file mode 100644 index 0000000000000..b7d2de708a110 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fp16_fast_math.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-CVT +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-FP16 + +; Check that the output instructions have the same fast math flags as the input +; fadd, even when fadd is promoted to float type. + +define half @normal_fadd(half %x, half %y) { + ; CHECK-CVT-LABEL: name: normal_fadd + ; CHECK-CVT: bb.0.entry: + ; CHECK-CVT-NEXT: liveins: $h0, $h1 + ; CHECK-CVT-NEXT: {{ $}} + ; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK-CVT-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = nofpexcept FCVTSHr [[COPY]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = nofpexcept FCVTSHr [[COPY1]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nofpexcept FADDSrr killed [[FCVTSHr1]], killed [[FCVTSHr]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = nofpexcept FCVTHSr killed [[FADDSrr]], implicit $fpcr + ; CHECK-CVT-NEXT: $h0 = COPY [[FCVTHSr]] + ; CHECK-CVT-NEXT: RET_ReallyLR implicit $h0 + ; + ; CHECK-FP16-LABEL: name: normal_fadd + ; CHECK-FP16: bb.0.entry: + ; CHECK-FP16-NEXT: liveins: $h0, $h1 + ; CHECK-FP16-NEXT: {{ $}} + ; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK-FP16-NEXT: [[FADDHrr:%[0-9]+]]:fpr16 = nofpexcept FADDHrr [[COPY1]], [[COPY]], implicit $fpcr + ; CHECK-FP16-NEXT: $h0 = COPY [[FADDHrr]] + ; CHECK-FP16-NEXT: RET_ReallyLR implicit $h0 +entry: + %add = fadd half %x, %y + ret half %add +} + +define half @fast_fadd(half %x, half %y) { + ; CHECK-CVT-LABEL: name: fast_fadd + ; CHECK-CVT: bb.0.entry: + ; CHECK-CVT-NEXT: liveins: $h0, $h1 + ; CHECK-CVT-NEXT: {{ $}} + ; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK-CVT-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FCVTSHr [[COPY]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FCVTSHr [[COPY1]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDSrr killed [[FCVTSHr1]], killed [[FCVTSHr]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = nnan ninf nsz arcp contract afn reassoc nofpexcept FCVTHSr killed [[FADDSrr]], implicit $fpcr + ; CHECK-CVT-NEXT: $h0 = COPY [[FCVTHSr]] + ; CHECK-CVT-NEXT: RET_ReallyLR implicit $h0 + ; + ; CHECK-FP16-LABEL: name: fast_fadd + ; CHECK-FP16: bb.0.entry: + ; CHECK-FP16-NEXT: liveins: $h0, $h1 + ; CHECK-FP16-NEXT: {{ $}} + ; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK-FP16-NEXT: [[FADDHrr:%[0-9]+]]:fpr16 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDHrr [[COPY1]], [[COPY]], implicit $fpcr + ; CHECK-FP16-NEXT: $h0 = COPY [[FADDHrr]] + ; CHECK-FP16-NEXT: RET_ReallyLR implicit $h0 +entry: + %add = fadd fast half %x, %y + ret half %add +} + +define half @ninf_fadd(half %x, half %y) { + ; CHECK-CVT-LABEL: name: ninf_fadd + ; CHECK-CVT: bb.0.entry: + ; CHECK-CVT-NEXT: liveins: $h0, $h1 + ; CHECK-CVT-NEXT: {{ $}} + ; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK-CVT-NEXT: [[FCVTSHr:%[0-9]+]]:fpr32 = ninf nofpexcept FCVTSHr [[COPY]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FCVTSHr1:%[0-9]+]]:fpr32 = ninf nofpexcept FCVTSHr [[COPY1]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = ninf nofpexcept FADDSrr killed [[FCVTSHr1]], killed [[FCVTSHr]], implicit $fpcr + ; CHECK-CVT-NEXT: [[FCVTHSr:%[0-9]+]]:fpr16 = ninf nofpexcept FCVTHSr killed [[FADDSrr]], implicit $fpcr + ; CHECK-CVT-NEXT: $h0 = COPY [[FCVTHSr]] + ; CHECK-CVT-NEXT: RET_ReallyLR implicit $h0 + ; + ; CHECK-FP16-LABEL: name: ninf_fadd + ; CHECK-FP16: bb.0.entry: + ; CHECK-FP16-NEXT: liveins: $h0, $h1 + ; CHECK-FP16-NEXT: {{ $}} + ; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $h1 + ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $h0 + ; CHECK-FP16-NEXT: [[FADDHrr:%[0-9]+]]:fpr16 = ninf nofpexcept FADDHrr [[COPY1]], [[COPY]], implicit $fpcr + ; CHECK-FP16-NEXT: $h0 = COPY [[FADDHrr]] + ; CHECK-FP16-NEXT: RET_ReallyLR implicit $h0 +entry: + %add = fadd ninf half %x, %y + ret half %add +} diff --git a/llvm/test/CodeGen/ARM/fp16_fast_math.ll b/llvm/test/CodeGen/ARM/fp16_fast_math.ll new file mode 100644 index 0000000000000..b440bb97674b4 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fp16_fast_math.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=arm -mattr=+vfp4d16sp,-fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-CVT +; RUN: llc < %s -mtriple=arm -mattr=+vfp4d16sp,+fullfp16 -stop-after=finalize-isel | FileCheck %s --check-prefixes=CHECK-FP16 + +; Check that the output instructions have the same fast math flags as the input +; fadd, even when f16 is legalized to f32. +; FIXME: We don't get fast math flags on VCVTBHS because they get lost during a +; DAGCombine transformation. +; FIXME: We don't get fast math flags on VCVTBSH because the outermost node in +; the isel pattern is COPY_TO_REGCLASS and the fast math flags end up there. + +define half @normal_fadd(half %x, half %y) { + ; CHECK-CVT-LABEL: name: normal_fadd + ; CHECK-CVT: bb.0.entry: + ; CHECK-CVT-NEXT: liveins: $r0, $r1 + ; CHECK-CVT-NEXT: {{ $}} + ; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r1 + ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]] + ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]] + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[VCVTBSH]] + ; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]] + ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 + ; + ; CHECK-FP16-LABEL: name: normal_fadd + ; CHECK-FP16: bb.0.entry: + ; CHECK-FP16-NEXT: liveins: $r0, $r1 + ; CHECK-FP16-NEXT: {{ $}} + ; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:rgpr = COPY $r1 + ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 + ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg + ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]] + ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 +entry: + %add = fadd half %x, %y + ret half %add +} + +define half @fast_fadd(half %x, half %y) { + ; CHECK-CVT-LABEL: name: fast_fadd + ; CHECK-CVT: bb.0.entry: + ; CHECK-CVT-NEXT: liveins: $r0, $r1 + ; CHECK-CVT-NEXT: {{ $}} + ; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r1 + ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]] + ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]] + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[VCVTBSH]] + ; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]] + ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 + ; + ; CHECK-FP16-LABEL: name: fast_fadd + ; CHECK-FP16: bb.0.entry: + ; CHECK-FP16-NEXT: liveins: $r0, $r1 + ; CHECK-FP16-NEXT: {{ $}} + ; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:rgpr = COPY $r1 + ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 + ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg + ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]] + ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 +entry: + %add = fadd fast half %x, %y + ret half %add +} + +define half @ninf_fadd(half %x, half %y) { + ; CHECK-CVT-LABEL: name: ninf_fadd + ; CHECK-CVT: bb.0.entry: + ; CHECK-CVT-NEXT: liveins: $r0, $r1 + ; CHECK-CVT-NEXT: {{ $}} + ; CHECK-CVT-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r1 + ; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0 + ; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]] + ; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]] + ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF + ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg + ; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[VCVTBSH]] + ; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]] + ; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 + ; + ; CHECK-FP16-LABEL: name: ninf_fadd + ; CHECK-FP16: bb.0.entry: + ; CHECK-FP16-NEXT: liveins: $r0, $r1 + ; CHECK-FP16-NEXT: {{ $}} + ; CHECK-FP16-NEXT: [[COPY:%[0-9]+]]:rgpr = COPY $r1 + ; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0 + ; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg + ; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg + ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg + ; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]] + ; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0 +entry: + %add = fadd ninf half %x, %y + ret half %add +} diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll index d3518fe468607..5868bd267e26d 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmax-legalization-soft-float.ll @@ -11,37 +11,49 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r8, #255 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: orr r8, r8, #65280 +; CHECK-NEXT: mov r4, #255 +; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: orr r4, r4, #65280 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: and r0, r0, r8 +; CHECK-NEXT: and r0, r0, r4 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r6, r7 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: and r0, r5, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r5, r6 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: and r0, r0, r4 +; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r4, r8 +; CHECK-NEXT: and r0, r8, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: and r0, r5, r8 -; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl fmaxf +; CHECK-NEXT: bl __aeabi_fcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r4, r5 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr diff --git a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll index 14644e00c94b0..2f5645a887616 100644 --- a/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll +++ b/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll @@ -11,37 +11,49 @@ define half @test_v4f16(<4 x half> %a) nounwind { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r8, #255 -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: orr r8, r8, #65280 +; CHECK-NEXT: mov r4, #255 +; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: orr r4, r4, #65280 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: and r0, r0, r8 +; CHECK-NEXT: and r0, r0, r4 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f -; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: bl fminf +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r6, r7 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: and r0, r5, r8 +; CHECK-NEXT: and r0, r5, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r6, r8 +; CHECK-NEXT: and r0, r6, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl fminf +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r5, r6 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __aeabi_f2h +; CHECK-NEXT: and r0, r0, r4 +; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: and r0, r4, r8 +; CHECK-NEXT: and r0, r8, r4 ; CHECK-NEXT: bl __aeabi_h2f ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: and r0, r5, r8 -; CHECK-NEXT: bl __aeabi_h2f +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl fminf +; CHECK-NEXT: bl __aeabi_fcmplt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r4, r5 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: bl __aeabi_f2h ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr