diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8bf0d118da575..d36eaf6cf7271 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -442,14 +442,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); // If we're enabling GP optimizations, use hardware square root - if (!Subtarget.hasFSQRT() && - !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && - Subtarget.hasFRE())) + if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); if (!Subtarget.hasFSQRT() && - !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && - Subtarget.hasFRES())) + !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); if (Subtarget.hasFCPSGN()) { @@ -565,16 +562,15 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); setOperationAction(ISD::BITCAST, MVT::f64, Legal); - if (TM.Options.UnsafeFPMath) { - setOperationAction(ISD::LRINT, MVT::f64, Legal); - setOperationAction(ISD::LRINT, MVT::f32, Legal); - setOperationAction(ISD::LLRINT, MVT::f64, Legal); - setOperationAction(ISD::LLRINT, MVT::f32, Legal); - setOperationAction(ISD::LROUND, MVT::f64, Legal); - setOperationAction(ISD::LROUND, MVT::f32, Legal); - setOperationAction(ISD::LLROUND, MVT::f64, Legal); - setOperationAction(ISD::LLROUND, MVT::f32, Legal); - } + + setOperationAction(ISD::STRICT_LRINT, MVT::f64, Custom); + setOperationAction(ISD::STRICT_LRINT, MVT::f32, Custom); + setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Custom); + setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Custom); + setOperationAction(ISD::STRICT_LROUND, MVT::f64, Custom); + setOperationAction(ISD::STRICT_LROUND, MVT::f32, Custom); + setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Custom); + setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Custom); } else { setOperationAction(ISD::BITCAST, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i32, Expand); @@ -1034,11 +1030,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); // The nearbyint variants are not allowed to raise the inexact exception - // so we can only code-gen them with unsafe math. - if (TM.Options.UnsafeFPMath) { - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - } + // so we can only code-gen them with fpexcept.ignore. + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f64, Custom); + setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f32, Custom); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); @@ -8911,11 +8905,13 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // be lost at this stage, but is below the single-precision rounding // position. // - // However, if -enable-unsafe-fp-math is in effect, accept double + // However, if afn is in effect, accept double // rounding to avoid the extra overhead. - if (Op.getValueType() == MVT::f32 && - !Subtarget.hasFPCVT() && - !DAG.getTarget().Options.UnsafeFPMath) { + // FIXME: Currently INT_TO_FP can't support fast math flags because + // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always + // false. + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() && + !Op->getFlags().hasApproximateFuncs()) { // Twiddle input to make sure the low 11 bits are zero. (If this // is the case, we are guaranteed the value will fit into the 53 bit @@ -12759,6 +12755,14 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerADDSUBO_CARRY(Op, DAG); case ISD::UCMP: return LowerUCMP(Op, DAG); + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + case ISD::STRICT_FNEARBYINT: + if (Op->getFlags().hasNoFPExcept()) + return Op; + return SDValue(); } } @@ -18707,11 +18711,12 @@ bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const { const Function *F = I->getFunction(); const DataLayout &DL = F->getDataLayout(); Type *Ty = User->getOperand(0)->getType(); + bool AllowContract = I->getFastMathFlags().allowContract() && + User->getFastMathFlags().allowContract(); - return !( - isFMAFasterThanFMulAndFAdd(*F, Ty) && - isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); + return !(isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && + (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast)); } case Instruction::Load: { // Don't break "store (load float*)" pattern, this pattern will be combined diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 979ba31b0431b..885bed670e319 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2786,14 +2786,16 @@ def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>; // Rounding without exceptions (nearbyint). Due to strange tblgen behaviour, // these need to be defined after the any_frint versions so ISEL will correctly // add the chain to the strict versions. -def : Pat<(f32 (fnearbyint f32:$S)), +// TODO: Match strict fp rounding intrinsics with instructions like xsrdpiz when +// rounding mode is propagated to CodeGen part. +def : Pat<(f32 (strict_fnearbyint f32:$S)), (f32 (COPY_TO_REGCLASS (XSRDPIC (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>; -def : Pat<(f64 (fnearbyint f64:$S)), +def : Pat<(f64 (strict_fnearbyint f64:$S)), (f64 (XSRDPIC $S))>; -def : Pat<(v2f64 (fnearbyint v2f64:$S)), +def : Pat<(v2f64 (strict_fnearbyint v2f64:$S)), (v2f64 (XVRDPIC $S))>; -def : Pat<(v4f32 (fnearbyint v4f32:$S)), +def : Pat<(v4f32 (strict_fnearbyint v4f32:$S)), (v4f32 (XVRSPIC $S))>; // Materialize a zero-vector of long long @@ -3578,25 +3580,25 @@ def : Pat<(f64 (bitconvert i64:$S)), (f64 (MTVSRD $S))>; // Rounding to integer. -def : Pat<(i64 (lrint f64:$S)), +def : Pat<(i64 (strict_lrint f64:$S)), (i64 (MFVSRD (FCTID $S)))>; -def : Pat<(i64 (lrint f32:$S)), +def : Pat<(i64 (strict_lrint f32:$S)), (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; -def : Pat<(i64 (llrint f64:$S)), +def : Pat<(i64 (strict_llrint f64:$S)), (i64 (MFVSRD (FCTID $S)))>; -def : Pat<(i64 (llrint f32:$S)), +def : Pat<(i64 (strict_llrint f32:$S)), (i64 (MFVSRD (FCTID (COPY_TO_REGCLASS $S, F8RC))))>; -def : Pat<(i64 (lround f64:$S)), +def : Pat<(i64 (strict_lround f64:$S)), (i64 (MFVSRD (FCTID (XSRDPI $S))))>; -def : Pat<(i64 (lround f32:$S)), +def : Pat<(i64 (strict_lround f32:$S)), (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; -def : Pat<(i32 (lround f64:$S)), +def : Pat<(i32 (strict_lround f64:$S)), (i32 (MFVSRWZ (FCTIW (XSRDPI $S))))>; -def : Pat<(i32 (lround f32:$S)), +def : Pat<(i32 (strict_lround f32:$S)), (i32 (MFVSRWZ (FCTIW (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; -def : Pat<(i64 (llround f64:$S)), +def : Pat<(i64 (strict_llround f64:$S)), (i64 (MFVSRD (FCTID (XSRDPI $S))))>; -def : Pat<(i64 (llround f32:$S)), +def : Pat<(i64 (strict_llround f32:$S)), (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead diff --git a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll index f7df003fcc3f8..b1fb907e0ee2d 100644 --- a/llvm/test/CodeGen/PowerPC/i64_fp_round.ll +++ b/llvm/test/CodeGen/PowerPC/i64_fp_round.ll @@ -4,10 +4,9 @@ ; for minor code generation differences. ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -mattr=-isel < %s | FileCheck %s --check-prefix=CHECK-NO-ISEL -; Also check that with -enable-unsafe-fp-math we do not get that extra +; Also check that with fpexcept.ignore we do not get that extra ; code sequence. Simply verify that there is no "isel" present. -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE -; CHECK-UNSAFE-NOT: isel + target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -15,9 +14,8 @@ define float @test(i64 %x) nounwind readnone { ; Verify that we get the code sequence needed to avoid double-rounding. ; Note that only parts of the sequence are checked for here, to allow ; for minor code generation differences. -; Also check that with -enable-unsafe-fp-math we do not get that extra +; Also check that with fpexcept.ignore we do not get that extra ; code sequence. Simply verify that there is no "isel" present. -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK-UNSAFE ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: clrldi 4, 3, 53 @@ -51,18 +49,10 @@ define float @test(i64 %x) nounwind readnone { ; CHECK-NO-ISEL-NEXT: xscvsxddp 0, 0 ; CHECK-NO-ISEL-NEXT: frsp 1, 0 ; CHECK-NO-ISEL-NEXT: blr -; -; CHECK-UNSAFE-LABEL: test: -; CHECK-UNSAFE: # %bb.0: # %entry -; CHECK-UNSAFE-NEXT: std 3, -8(1) -; CHECK-UNSAFE-NEXT: lfd 0, -8(1) -; CHECK-UNSAFE-NEXT: xscvsxddp 0, 0 -; CHECK-UNSAFE-NEXT: frsp 1, 0 -; CHECK-UNSAFE-NEXT: blr entry: %conv = sitofp i64 %x to float ret float %conv } - +; TODO: Add sitofp afn test. diff --git a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll index 2be370f638d5b..af48bf22a7669 100644 --- a/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll +++ b/llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll @@ -5,9 +5,6 @@ ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \ -; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST define dso_local i64 @test_lrint(double %d) local_unnamed_addr { ; BE-LABEL: test_lrint: ; BE: # %bb.0: # %entry @@ -36,17 +33,36 @@ define dso_local i64 @test_lrint(double %d) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_lrint: -; FAST: # %bb.0: # %entry -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.lrint.i64.f64(double %d) ret i64 %0 } +define dso_local i64 @test_constrained_lrint(double %d) local_unnamed_addr { +; BE-LABEL: test_constrained_lrint: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lrint +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_lrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fctid f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.lrint(double %d, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.lrint.i64.f64(double) define dso_local i64 @test_lrintf(float %f) local_unnamed_addr { @@ -77,17 +93,36 @@ define dso_local i64 @test_lrintf(float %f) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_lrintf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.lrint.i64.f32(float %f) ret i64 %0 } +define dso_local i64 @test_constrained_lrintf(float %f) local_unnamed_addr { +; BE-LABEL: test_constrained_lrintf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_lrintf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fctid f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.lrint(float %f, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.lrint.i64.f32(float) define dso_local i64 @test_llrint(double %d) local_unnamed_addr { @@ -118,17 +153,36 @@ define dso_local i64 @test_llrint(double %d) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_llrint: -; FAST: # %bb.0: # %entry -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.llrint.i64.f64(double %d) ret i64 %0 } +define dso_local i64 @test_constrained_llrint(double %d) local_unnamed_addr { +; BE-LABEL: test_constrained_llrint: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llrint +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_llrint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fctid f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.llrint(double %d, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.llrint.i64.f64(double) define dso_local i64 @test_llrintf(float %f) local_unnamed_addr { @@ -159,17 +213,36 @@ define dso_local i64 @test_llrintf(float %f) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_llrintf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.llrint.i64.f32(float %f) ret i64 %0 } +define dso_local i64 @test_constrained_llrintf(float %f) local_unnamed_addr { +; BE-LABEL: test_constrained_llrintf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llrintf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_llrintf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fctid f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.llrint(float %f, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.llrint.i64.f32(float) define dso_local i64 @test_lround(double %d) local_unnamed_addr { @@ -200,18 +273,37 @@ define dso_local i64 @test_lround(double %d) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_lround: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f0, f1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.lround.i64.f64(double %d) ret i64 %0 } +define dso_local i64 @test_constrained_lround(double %d) local_unnamed_addr { +; BE-LABEL: test_constrained_lround: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lround +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_lround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f0, f1 +; CHECK-NEXT: fctid f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.lround(double %d, metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.lround.i64.f64(double) define dso_local i32 @test_lroundi32f64(double %d) local_unnamed_addr { @@ -242,18 +334,37 @@ define dso_local i32 @test_lroundi32f64(double %d) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_lroundi32f64: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f0, f1 -; FAST-NEXT: fctiw f0, f0 -; FAST-NEXT: mffprwz r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i32 @llvm.lround.i32.f64(double %d) ret i32 %0 } +define dso_local i32 @test_constrained_lroundi32f64(double %d) local_unnamed_addr { +; BE-LABEL: test_constrained_lroundi32f64: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lround +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_lroundi32f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f0, f1 +; CHECK-NEXT: fctiw f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.experimental.constrained.lround(double %d, metadata !"fpexcept.ignore") + ret i32 %0 +} + declare i32 @llvm.lround.i32.f64(double) define dso_local i64 @test_lroundf(float %f) local_unnamed_addr { @@ -284,18 +395,37 @@ define dso_local i64 @test_lroundf(float %f) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_lroundf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f0, f1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.lround.i64.f32(float %f) ret i64 %0 } +define dso_local i64 @test_constrained_lroundf(float %f) local_unnamed_addr { +; BE-LABEL: test_constrained_lroundf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lroundf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_lroundf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f0, f1 +; CHECK-NEXT: fctid f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.lround(float %f, metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.lround.i64.f32(float) define dso_local i32 @test_lroundi32f32(float %d) local_unnamed_addr { @@ -326,18 +456,37 @@ define dso_local i32 @test_lroundi32f32(float %d) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_lroundi32f32: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f0, f1 -; FAST-NEXT: fctiw f0, f0 -; FAST-NEXT: mffprwz r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i32 @llvm.lround.i32.f32(float %d) ret i32 %0 } +define dso_local i32 @test_constrained_lroundi32f32(float %f) local_unnamed_addr { +; BE-LABEL: test_constrained_lroundi32f32: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl lroundf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_lroundi32f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f0, f1 +; CHECK-NEXT: fctiw f0, f0 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.experimental.constrained.lround(float %f, metadata !"fpexcept.ignore") + ret i32 %0 +} + declare i32 @llvm.lround.i32.f32(float) define dso_local i64 @test_llround(double %d) local_unnamed_addr { @@ -368,18 +517,37 @@ define dso_local i64 @test_llround(double %d) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_llround: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f0, f1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.llround.i64.f64(double %d) ret i64 %0 } +define dso_local i64 @test_constrained_llround(double %d) local_unnamed_addr { +; BE-LABEL: test_constrained_llround: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llround +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_llround: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f0, f1 +; CHECK-NEXT: fctid f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.llround(double %d, metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.llround.i64.f64(double) define dso_local i64 @test_llroundf(float %f) local_unnamed_addr { @@ -410,18 +578,37 @@ define dso_local i64 @test_llroundf(float %f) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_llroundf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f0, f1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr entry: %0 = tail call i64 @llvm.llround.i64.f32(float %f) ret i64 %0 } +define dso_local i64 @test_constrained_llroundf(float %f) local_unnamed_addr { +; BE-LABEL: test_constrained_llroundf: +; BE: # %bb.0: # %entry +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: .cfi_def_cfa_offset 112 +; BE-NEXT: .cfi_offset lr, 16 +; BE-NEXT: bl llroundf +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_llroundf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f0, f1 +; CHECK-NEXT: fctid f0, f0 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +entry: + %0 = tail call i64 @llvm.experimental.constrained.llround(float %f, metadata !"fpexcept.ignore") + ret i64 %0 +} + declare i64 @llvm.llround.i64.f32(float) define dso_local double @test_nearbyint(double %d) local_unnamed_addr { @@ -452,16 +639,26 @@ define dso_local double @test_nearbyint(double %d) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_nearbyint: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpic f1, f1 -; FAST-NEXT: blr entry: %0 = tail call double @llvm.nearbyint.f64(double %d) ret double %0 } +define dso_local double @test_constrained_nearbyint(double %d) local_unnamed_addr { +; BE-LABEL: test_constrained_nearbyint: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpic f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_nearbyint: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpic f1, f1 +; CHECK-NEXT: blr +entry: + %0 = tail call double @llvm.experimental.constrained.nearbyint(double %d, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret double %0 +} + declare double @llvm.nearbyint.f64(double) define dso_local float @test_nearbyintf(float %f) local_unnamed_addr { @@ -492,16 +689,26 @@ define dso_local float @test_nearbyintf(float %f) local_unnamed_addr { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_nearbyintf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpic f1, f1 -; FAST-NEXT: blr entry: %0 = tail call float @llvm.nearbyint.f32(float %f) ret float %0 } +define dso_local float @test_constrained_nearbyintf(float %f) local_unnamed_addr { +; BE-LABEL: test_constrained_nearbyintf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpic f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_nearbyintf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpic f1, f1 +; CHECK-NEXT: blr +entry: + %0 = tail call float @llvm.experimental.constrained.nearbyint(float %f, metadata !"round.dynamic", metadata !"fpexcept.ignore") + ret float %0 +} + declare float @llvm.nearbyint.f32(float) define dso_local double @test_round(double %d) local_unnamed_addr { @@ -514,16 +721,26 @@ define dso_local double @test_round(double %d) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpi f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_round: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f1, f1 -; FAST-NEXT: blr entry: %0 = tail call double @llvm.round.f64(double %d) ret double %0 } +define dso_local double @test_constrained_round(double %d) local_unnamed_addr { +; BE-LABEL: test_constrained_round: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpi f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_round: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f1, f1 +; CHECK-NEXT: blr +entry: + %0 = tail call double @llvm.experimental.constrained.round(double %d, metadata !"fpexcept.ignore") + ret double %0 +} + declare double @llvm.round.f64(double) define dso_local float @test_roundf(float %f) local_unnamed_addr { @@ -536,16 +753,26 @@ define dso_local float @test_roundf(float %f) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpi f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_roundf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpi f1, f1 -; FAST-NEXT: blr entry: %0 = tail call float @llvm.round.f32(float %f) ret float %0 } +define dso_local float @test_constrained_roundf(float %f) local_unnamed_addr { +; BE-LABEL: test_constrained_roundf: +; BE: # %bb.0: # %entry +; BE-NEXT: xsrdpi f1, f1 +; BE-NEXT: blr +; +; CHECK-LABEL: test_constrained_roundf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xsrdpi f1, f1 +; CHECK-NEXT: blr +entry: + %0 = tail call float @llvm.experimental.constrained.round(float %f, metadata !"fpexcept.ignore") + ret float %0 +} + declare float @llvm.round.f32(float) define dso_local double @test_trunc(double %d) local_unnamed_addr { @@ -558,11 +785,6 @@ define dso_local double @test_trunc(double %d) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpiz f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_trunc: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpiz f1, f1 -; FAST-NEXT: blr entry: %0 = tail call double @llvm.trunc.f64(double %d) ret double %0 @@ -580,11 +802,6 @@ define dso_local float @test_truncf(float %f) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpiz f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_truncf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpiz f1, f1 -; FAST-NEXT: blr entry: %0 = tail call float @llvm.trunc.f32(float %f) ret float %0 @@ -602,11 +819,6 @@ define dso_local double @test_floor(double %d) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpim f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_floor: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpim f1, f1 -; FAST-NEXT: blr entry: %0 = tail call double @llvm.floor.f64(double %d) ret double %0 @@ -624,11 +836,6 @@ define dso_local float @test_floorf(float %f) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpim f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_floorf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpim f1, f1 -; FAST-NEXT: blr entry: %0 = tail call float @llvm.floor.f32(float %f) ret float %0 @@ -646,11 +853,6 @@ define dso_local double @test_ceil(double %d) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpip f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_ceil: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpip f1, f1 -; FAST-NEXT: blr entry: %0 = tail call double @llvm.ceil.f64(double %d) ret double %0 @@ -668,11 +870,6 @@ define dso_local float @test_ceilf(float %f) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpip f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_ceilf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpip f1, f1 -; FAST-NEXT: blr entry: %0 = tail call float @llvm.ceil.f32(float %f) ret float %0 @@ -690,11 +887,6 @@ define dso_local double @test_rint(double %d) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpic f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_rint: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpic f1, f1 -; FAST-NEXT: blr entry: %0 = tail call double @llvm.rint.f64(double %d) ret double %0 @@ -712,11 +904,6 @@ define dso_local float @test_rintf(float %f) local_unnamed_addr { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xsrdpic f1, f1 ; CHECK-NEXT: blr -; -; FAST-LABEL: test_rintf: -; FAST: # %bb.0: # %entry -; FAST-NEXT: xsrdpic f1, f1 -; FAST-NEXT: blr entry: %0 = tail call float @llvm.rint.f32(float %f) ret float %0 diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll index 8a9e48e002381..d2fb6ca1ec5b7 100644 --- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll @@ -9,9 +9,6 @@ ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s | \ ; RUN: FileCheck %s -; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ -; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \ -; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f16: @@ -47,23 +44,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v1i64_v1f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -32(r1) -; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: addi r1, r1, 32 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half> %x) ret <1 x i64> %a } @@ -147,41 +127,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v1i64_v2f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -48(r1) -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f2 -; FAST-NEXT: std r0, 64(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: fctid f1, f30 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: addi r1, r1, 48 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> %x) ret <2 x i64> %a } @@ -341,68 +286,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v4i64_v4f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -64(r1) -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f4 -; FAST-NEXT: std r0, 80(r1) -; FAST-NEXT: fmr f31, f3 -; FAST-NEXT: fmr f30, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f2, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f28 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs0, vs2 -; FAST-NEXT: addi r1, r1, 64 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: blr %a = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a } @@ -714,122 +597,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v8i64_v8f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -96(r1) -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f8 -; FAST-NEXT: std r0, 112(r1) -; FAST-NEXT: fmr f30, f7 -; FAST-NEXT: fmr f29, f6 -; FAST-NEXT: fmr f28, f5 -; FAST-NEXT: fmr f27, f4 -; FAST-NEXT: fmr f26, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f26 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f27 -; FAST-NEXT: fctid f4, f28 -; FAST-NEXT: fctid f5, f29 -; FAST-NEXT: fctid f6, f30 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v5, vs0, vs6 -; FAST-NEXT: addi r1, r1, 96 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload -; FAST-NEXT: blr %a = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a } @@ -1439,228 +1206,6 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v16i64_v16f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -160(r1) -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 312(r1) -; FAST-NEXT: std r0, 176(r1) -; FAST-NEXT: fmr f28, f13 -; FAST-NEXT: fmr f27, f12 -; FAST-NEXT: fmr f24, f11 -; FAST-NEXT: fmr f21, f10 -; FAST-NEXT: fmr f19, f9 -; FAST-NEXT: fmr f18, f8 -; FAST-NEXT: fmr f17, f7 -; FAST-NEXT: fmr f16, f6 -; FAST-NEXT: fmr f20, f5 -; FAST-NEXT: fmr f22, f4 -; FAST-NEXT: fmr f23, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 304(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 296(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f21 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f19 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: fmr f1, f18 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: fmr f1, f17 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: fmr f1, f20 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: fmr f1, f22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: fmr f1, f23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f22 -; FAST-NEXT: fctid f4, f20 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f17 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f9, f21 -; FAST-NEXT: fctid f10, f24 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v5, vs7, vs6 -; FAST-NEXT: xxmrghd v6, vs9, vs8 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f27 -; FAST-NEXT: fctid f1, f29 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v7, vs0, vs10 -; FAST-NEXT: fctid f0, f28 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v8, vs1, vs0 -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f1, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v9, vs1, vs0 -; FAST-NEXT: addi r1, r1, 160 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload -; FAST-NEXT: blr %a = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a } @@ -2839,523 +2384,6 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v32i64_v32f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: li r4, 128 -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill -; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 -; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 144 -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 160 -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 176 -; FAST-NEXT: xxlor v22, f3, f3 -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f29, f9 -; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 192 -; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 208 -; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 224 -; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 240 -; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 256 -; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 272 -; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 288 -; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 304 -; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 44 -; FAST-NEXT: xxlor v31, f6, f6 -; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill -; FAST-NEXT: lfs f1, 768(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 120 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 760(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 112 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 752(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 104 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 744(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 96 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 736(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 88 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 728(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 80 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 720(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 72 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 712(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 704(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 56 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 696(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 48 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 688(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: xxlor v21, f1, f1 -; FAST-NEXT: lfs f1, 680(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: xxlor v20, f1, f1 -; FAST-NEXT: lfs f1, 672(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: xxlor v24, f1, f1 -; FAST-NEXT: lfs f1, 664(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 656(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 648(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: lfs f1, 640(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: lfs f1, 632(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 624(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: xxlor f1, v25, v25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: xxlor f1, v26, v26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: xxlor f1, v27, v27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: xxlor f1, v28, v28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: xxlor f1, v29, v29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: xxlor f1, v30, v30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: xxlor f1, v31, v31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f14 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f14, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: xxlor f1, v22, v22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: xxlor f1, v23, v23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 44 -; FAST-NEXT: fmr f15, f1 -; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f3, f15 -; FAST-NEXT: fctid f4, f17 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f14 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f13, f1 -; FAST-NEXT: fctid f9, f20 -; FAST-NEXT: fctid f10, f22 -; FAST-NEXT: fctid f11, f24 -; FAST-NEXT: fctid f12, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: fctid f0, f21 -; FAST-NEXT: mtvsrd v2, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtvsrd v3, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: fctid f11, f31 -; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: xxlor f12, v24, v24 -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: fctid f13, f13 -; FAST-NEXT: xxmrghd v3, vs5, v3 -; FAST-NEXT: fctid f5, f26 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: xxmrghd v4, vs7, vs6 -; FAST-NEXT: fctid f6, f27 -; FAST-NEXT: fctid f7, f28 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: xxmrghd v2, v2, vs10 -; FAST-NEXT: fctid f10, f30 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: xxmrghd v5, vs12, vs11 -; FAST-NEXT: xxlor f11, v20, v20 -; FAST-NEXT: xxlor f12, v21, v21 -; FAST-NEXT: fctid f11, f11 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: mtfprd f13, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v0, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload -; FAST-NEXT: mtvsrd v1, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v6, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v7, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: xxmrghd v10, vs12, vs11 -; FAST-NEXT: xxmrghd v0, v0, vs13 -; FAST-NEXT: xxswapd vs12, v0 -; FAST-NEXT: xxmrghd v0, vs9, vs8 -; FAST-NEXT: xxmrghd v7, v8, v7 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: xxswapd v7, v7 -; FAST-NEXT: xxmrghd v8, v9, v8 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: xxswapd v8, v8 -; FAST-NEXT: xxmrghd v6, v9, v6 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: li r3, 240 -; FAST-NEXT: stxvd2x v8, r30, r3 -; FAST-NEXT: li r3, 224 -; FAST-NEXT: stxvd2x v7, r30, r3 -; FAST-NEXT: li r3, 208 -; FAST-NEXT: xxswapd vs11, v6 -; FAST-NEXT: xxmrghd v6, vs10, vs7 -; FAST-NEXT: stxvd2x vs11, r30, r3 -; FAST-NEXT: li r3, 192 -; FAST-NEXT: xxmrghd v1, v9, v1 -; FAST-NEXT: xxswapd vs11, v1 -; FAST-NEXT: xxmrghd v1, vs6, vs5 -; FAST-NEXT: xxswapd vs5, v10 -; FAST-NEXT: xxswapd vs6, v5 -; FAST-NEXT: stxvd2x vs11, r30, r3 -; FAST-NEXT: li r3, 176 -; FAST-NEXT: stxvd2x vs12, r30, r3 -; FAST-NEXT: li r3, 160 -; FAST-NEXT: stxvd2x vs5, r30, r3 -; FAST-NEXT: li r3, 144 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: li r3, 128 -; FAST-NEXT: xxswapd vs5, v6 -; FAST-NEXT: stxvd2x vs5, r30, r3 -; FAST-NEXT: li r3, 112 -; FAST-NEXT: xxswapd vs2, v1 -; FAST-NEXT: xxswapd vs6, v0 -; FAST-NEXT: stxvd2x vs2, r30, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: fctid f2, f29 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxmrghd v5, vs7, vs4 -; FAST-NEXT: xxswapd vs4, v2 -; FAST-NEXT: xxmrghd v0, vs0, vs3 -; FAST-NEXT: xxswapd vs0, v5 -; FAST-NEXT: xxswapd vs3, v3 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: xxswapd vs0, v0 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxmrghd v5, vs2, vs1 -; FAST-NEXT: xxswapd vs1, v4 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 32 -; FAST-NEXT: xxswapd vs2, v5 -; FAST-NEXT: stxvd2x vs2, r30, r3 -; FAST-NEXT: li r3, 16 -; FAST-NEXT: stxvd2x vs3, r30, r3 -; FAST-NEXT: li r3, 304 -; FAST-NEXT: stxvd2x vs4, 0, r30 -; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 288 -; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 272 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 256 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 240 -; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 224 -; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 208 -; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 192 -; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 176 -; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 160 -; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 144 -; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 128 -; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 480 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a } @@ -3385,12 +2413,6 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v1i64_v1f32: -; FAST: # %bb.0: -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a } @@ -3444,21 +2466,6 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v2i64_v2f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xscvspdpn f1, vs1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a } @@ -3537,32 +2544,6 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v4i64_v4f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xxsldwi vs2, v2, v2, 1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v4, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v2 -; FAST-NEXT: vmr v2, v4 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs2 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs1, vs0 -; FAST-NEXT: blr %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a } @@ -3695,54 +2676,6 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v8i64_v8f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xxsldwi vs2, v2, v2, 1 -; FAST-NEXT: xxsldwi vs3, v3, v3, 3 -; FAST-NEXT: xxswapd vs4, v3 -; FAST-NEXT: xxsldwi vs5, v3, v3, 1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v0, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v2 -; FAST-NEXT: vmr v2, v0 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs2 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v1, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, vs3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs4 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v4, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v3 -; FAST-NEXT: vmr v3, v1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs5 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v5, vs1, vs0 -; FAST-NEXT: blr %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a } @@ -3983,98 +2916,6 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v16i64_v16f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xxsldwi vs2, v2, v2, 1 -; FAST-NEXT: xxsldwi vs3, v3, v3, 3 -; FAST-NEXT: xxswapd vs4, v3 -; FAST-NEXT: xxsldwi vs5, v3, v3, 1 -; FAST-NEXT: xxsldwi vs6, v4, v4, 3 -; FAST-NEXT: xxswapd vs7, v4 -; FAST-NEXT: xxsldwi vs8, v4, v4, 1 -; FAST-NEXT: xxsldwi vs9, v5, v5, 3 -; FAST-NEXT: xxswapd vs10, v5 -; FAST-NEXT: xxsldwi vs11, v5, v5, 1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v0, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v2 -; FAST-NEXT: vmr v2, v0 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs2 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v1, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, vs3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs4 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v10, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v3 -; FAST-NEXT: vmr v3, v1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs5 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v11, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, vs6 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs7 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v6, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v4 -; FAST-NEXT: xscvspdpn f1, vs8 -; FAST-NEXT: vmr v4, v10 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v7, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, vs9 -; FAST-NEXT: xscvspdpn f1, vs10 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v8, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, v5 -; FAST-NEXT: xscvspdpn f1, vs11 -; FAST-NEXT: vmr v5, v11 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v9, vs0, vs1 -; FAST-NEXT: blr %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a } @@ -4104,12 +2945,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v1i64_v1f64: -; FAST: # %bb.0: -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) ret <1 x i64> %a } @@ -4164,19 +2999,6 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v2i64_v2f64: -; FAST: # %bb.0: -; FAST-NEXT: xxlor f1, v2, v2 -; FAST-NEXT: xxswapd vs0, v2 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) ret <2 x i64> %a } @@ -4261,28 +3083,6 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v4i64_v4f64: -; FAST: # %bb.0: -; FAST-NEXT: xxswapd vs0, v2 -; FAST-NEXT: xxlor f2, v2, v2 -; FAST-NEXT: xxswapd vs1, v3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f2, f2 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r4, f0 -; FAST-NEXT: xxlor f0, v3, v3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mtfprd f2, r4 -; FAST-NEXT: mffprd r5, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs0, vs2 -; FAST-NEXT: mtfprd f0, r5 -; FAST-NEXT: xxmrghd v3, vs0, vs1 -; FAST-NEXT: blr %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) ret <4 x i64> %a } @@ -4427,46 +3227,6 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v8i64_v8f64: -; FAST: # %bb.0: -; FAST-NEXT: xxswapd vs0, v2 -; FAST-NEXT: xxswapd vs1, v3 -; FAST-NEXT: xxlor f4, v2, v2 -; FAST-NEXT: xxswapd vs2, v4 -; FAST-NEXT: xxswapd vs3, v5 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f4, f4 -; FAST-NEXT: mffprd r4, f0 -; FAST-NEXT: xxlor f0, v3, v3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r5, f0 -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mtfprd f1, r4 -; FAST-NEXT: mffprd r6, f0 -; FAST-NEXT: xxlor f0, v4, v4 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mtfprd f4, r6 -; FAST-NEXT: mffprd r7, f0 -; FAST-NEXT: fctid f0, f2 -; FAST-NEXT: mtfprd f2, r5 -; FAST-NEXT: mtfprd f5, r7 -; FAST-NEXT: mffprd r8, f0 -; FAST-NEXT: xxlor f0, v5, v5 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mtfprd f6, r8 -; FAST-NEXT: mffprd r9, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs2, vs4 -; FAST-NEXT: xxmrghd v4, vs5, vs6 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f1, f3 -; FAST-NEXT: mtfprd f0, r9 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v5, vs0, vs1 -; FAST-NEXT: blr %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) ret <8 x i64> %a } @@ -4496,18 +3256,6 @@ define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v1i64_v1f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -32(r1) -; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: addi r1, r1, 32 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) ret <1 x i64> %a } @@ -4565,33 +3313,6 @@ define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v2i64_v2f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -80(r1) -; FAST-NEXT: li r3, 48 -; FAST-NEXT: std r0, 96(r1) -; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: vmr v31, v3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: mtvsrd v30, r3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxmrghd v2, vs0, v30 -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 80 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) ret <2 x i64> %a } @@ -4689,53 +3410,6 @@ define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v4i64_v4f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -112(r1) -; FAST-NEXT: li r3, 48 -; FAST-NEXT: std r0, 128(r1) -; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 80 -; FAST-NEXT: vmr v29, v3 -; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 96 -; FAST-NEXT: vmr v30, v4 -; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: vmr v31, v5 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: mtvsrd v28, r3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v30 -; FAST-NEXT: xxmrghd v29, vs0, v28 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: mtvsrd v30, r3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxmrghd v3, vs0, v30 -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 48 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 112 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) ret <4 x i64> %a } @@ -4913,93 +3587,6 @@ define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: llrint_v8i64_v8f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -176(r1) -; FAST-NEXT: li r3, 48 -; FAST-NEXT: std r0, 192(r1) -; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 80 -; FAST-NEXT: vmr v25, v3 -; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 96 -; FAST-NEXT: vmr v26, v4 -; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 112 -; FAST-NEXT: vmr v27, v5 -; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 128 -; FAST-NEXT: vmr v28, v6 -; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 144 -; FAST-NEXT: vmr v29, v7 -; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 160 -; FAST-NEXT: vmr v30, v8 -; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: vmr v31, v9 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v25 -; FAST-NEXT: mtvsrd v24, r3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v26 -; FAST-NEXT: xxmrghd v25, vs0, v24 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v27 -; FAST-NEXT: mtvsrd v26, r3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v28 -; FAST-NEXT: xxmrghd v27, vs0, v26 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: mtvsrd v28, r3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v30 -; FAST-NEXT: xxmrghd v29, vs0, v28 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: mtvsrd v30, r3 -; FAST-NEXT: bl llrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 160 -; FAST-NEXT: vmr v4, v29 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 144 -; FAST-NEXT: vmr v3, v27 -; FAST-NEXT: vmr v2, v25 -; FAST-NEXT: xxmrghd v5, vs0, v30 -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 128 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 112 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 96 -; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 80 -; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 48 -; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 176 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) ret <8 x i64> %a } diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll index f4375362f861c..af5704bf3c852 100644 --- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -9,10 +9,6 @@ ; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ -; RUN: FileCheck %s --check-prefixes=FAST ; FIXME: crash "Input type needs to be promoted!" ; SKIP: sed 's/iXLen/i64/g' %s | llc -ppc-asm-full-reg-names \ ; SKIP: -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \ @@ -23,10 +19,6 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ -; RUN: FileCheck %s --check-prefixes=FAST define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind { ; BE-LABEL: lrint_v1f16: @@ -62,23 +54,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v1f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -32(r1) -; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: addi r1, r1, 32 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half> %x) ret <1 x i64> %a } @@ -162,41 +137,6 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v2f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -48(r1) -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f2 -; FAST-NEXT: std r0, 64(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: fctid f1, f30 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: addi r1, r1, 48 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> %x) ret <2 x i64> %a } @@ -356,68 +296,6 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v4f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -64(r1) -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f4 -; FAST-NEXT: std r0, 80(r1) -; FAST-NEXT: fmr f31, f3 -; FAST-NEXT: fmr f30, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f2, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f28 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs0, vs2 -; FAST-NEXT: addi r1, r1, 64 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: blr %a = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> %x) ret <4 x i64> %a } @@ -729,122 +607,6 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v8f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -96(r1) -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f8 -; FAST-NEXT: std r0, 112(r1) -; FAST-NEXT: fmr f30, f7 -; FAST-NEXT: fmr f29, f6 -; FAST-NEXT: fmr f28, f5 -; FAST-NEXT: fmr f27, f4 -; FAST-NEXT: fmr f26, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: fmr f1, f30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f26 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f27 -; FAST-NEXT: fctid f4, f28 -; FAST-NEXT: fctid f5, f29 -; FAST-NEXT: fctid f6, f30 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v5, vs0, vs6 -; FAST-NEXT: addi r1, r1, 96 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload -; FAST-NEXT: blr %a = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> %x) ret <8 x i64> %a } @@ -1454,228 +1216,6 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v16i64_v16f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill -; FAST-NEXT: stdu r1, -160(r1) -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 312(r1) -; FAST-NEXT: std r0, 176(r1) -; FAST-NEXT: fmr f28, f13 -; FAST-NEXT: fmr f27, f12 -; FAST-NEXT: fmr f24, f11 -; FAST-NEXT: fmr f21, f10 -; FAST-NEXT: fmr f19, f9 -; FAST-NEXT: fmr f18, f8 -; FAST-NEXT: fmr f17, f7 -; FAST-NEXT: fmr f16, f6 -; FAST-NEXT: fmr f20, f5 -; FAST-NEXT: fmr f22, f4 -; FAST-NEXT: fmr f23, f3 -; FAST-NEXT: fmr f25, f2 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 304(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 296(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: fmr f1, f27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: fmr f1, f24 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: fmr f1, f21 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f19 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: fmr f1, f18 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: fmr f1, f17 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: fmr f1, f20 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: fmr f1, f22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: fmr f1, f23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: fmr f1, f25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: fmr f1, f26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f0, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: fctid f3, f22 -; FAST-NEXT: fctid f4, f20 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f17 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f9, f21 -; FAST-NEXT: fctid f10, f24 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v3, vs3, vs2 -; FAST-NEXT: xxmrghd v4, vs5, vs4 -; FAST-NEXT: xxmrghd v5, vs7, vs6 -; FAST-NEXT: xxmrghd v6, vs9, vs8 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f0, f27 -; FAST-NEXT: fctid f1, f29 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v7, vs0, vs10 -; FAST-NEXT: fctid f0, f28 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v8, vs1, vs0 -; FAST-NEXT: fctid f0, f30 -; FAST-NEXT: fctid f1, f31 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v9, vs1, vs0 -; FAST-NEXT: addi r1, r1, 160 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload -; FAST-NEXT: mtlr r0 -; FAST-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload -; FAST-NEXT: blr %a = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> %x) ret <16 x i64> %a } @@ -2854,523 +2394,6 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v32i64_v32f16: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: li r4, 128 -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill -; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 -; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 144 -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 160 -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 176 -; FAST-NEXT: xxlor v22, f3, f3 -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f29, f9 -; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 192 -; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 208 -; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 224 -; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 240 -; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 256 -; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 272 -; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 288 -; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 304 -; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill -; FAST-NEXT: li r4, 44 -; FAST-NEXT: xxlor v31, f6, f6 -; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill -; FAST-NEXT: lfs f1, 768(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 120 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 760(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 112 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 752(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 104 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 744(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 96 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 736(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 88 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 728(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 80 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 720(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 72 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 712(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 704(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 56 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 696(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 48 -; FAST-NEXT: stxsdx f1, r1, r3 # 8-byte Folded Spill -; FAST-NEXT: lfs f1, 688(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: xxlor v21, f1, f1 -; FAST-NEXT: lfs f1, 680(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: xxlor v20, f1, f1 -; FAST-NEXT: lfs f1, 672(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: xxlor v24, f1, f1 -; FAST-NEXT: lfs f1, 664(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f31, f1 -; FAST-NEXT: lfs f1, 656(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f30, f1 -; FAST-NEXT: lfs f1, 648(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f28, f1 -; FAST-NEXT: lfs f1, 640(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f27, f1 -; FAST-NEXT: lfs f1, 632(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f26, f1 -; FAST-NEXT: lfs f1, 624(r1) -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f25, f1 -; FAST-NEXT: xxlor f1, v25, v25 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f24, f1 -; FAST-NEXT: xxlor f1, v26, v26 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f23, f1 -; FAST-NEXT: xxlor f1, v27, v27 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f22, f1 -; FAST-NEXT: xxlor f1, v28, v28 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f21, f1 -; FAST-NEXT: fmr f1, f29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f20, f1 -; FAST-NEXT: xxlor f1, v29, v29 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f19, f1 -; FAST-NEXT: xxlor f1, v30, v30 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f18, f1 -; FAST-NEXT: xxlor f1, v31, v31 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f29, f1 -; FAST-NEXT: fmr f1, f14 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f14, f1 -; FAST-NEXT: fmr f1, f16 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f16, f1 -; FAST-NEXT: xxlor f1, v22, v22 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fmr f17, f1 -; FAST-NEXT: xxlor f1, v23, v23 -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: li r3, 44 -; FAST-NEXT: fmr f15, f1 -; FAST-NEXT: lxsspx f1, r1, r3 # 4-byte Folded Reload -; FAST-NEXT: bl __truncsfhf2 -; FAST-NEXT: nop -; FAST-NEXT: clrldi r3, r3, 48 -; FAST-NEXT: bl __extendhfsf2 -; FAST-NEXT: nop -; FAST-NEXT: fctid f3, f15 -; FAST-NEXT: fctid f4, f17 -; FAST-NEXT: mffprd r3, f3 -; FAST-NEXT: fctid f5, f16 -; FAST-NEXT: fctid f6, f14 -; FAST-NEXT: fctid f7, f18 -; FAST-NEXT: fctid f8, f19 -; FAST-NEXT: fctid f13, f1 -; FAST-NEXT: fctid f9, f20 -; FAST-NEXT: fctid f10, f22 -; FAST-NEXT: fctid f11, f24 -; FAST-NEXT: fctid f12, f25 -; FAST-NEXT: fctid f2, f23 -; FAST-NEXT: fctid f0, f21 -; FAST-NEXT: mtvsrd v2, r3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: mtvsrd v3, r3 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f8 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: mffprd r3, f9 -; FAST-NEXT: mtfprd f3, r3 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: mtfprd f4, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: fctid f11, f31 -; FAST-NEXT: lfd f31, 56(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f8, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: xxlor f12, v24, v24 -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mtfprd f9, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: lfd f13, 48(r1) # 8-byte Folded Reload -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: fctid f13, f13 -; FAST-NEXT: xxmrghd v3, vs5, v3 -; FAST-NEXT: fctid f5, f26 -; FAST-NEXT: mffprd r3, f5 -; FAST-NEXT: mtfprd f5, r3 -; FAST-NEXT: xxmrghd v4, vs7, vs6 -; FAST-NEXT: fctid f6, f27 -; FAST-NEXT: fctid f7, f28 -; FAST-NEXT: mffprd r3, f6 -; FAST-NEXT: lfd f28, 96(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtfprd f6, r3 -; FAST-NEXT: mffprd r3, f7 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: xxmrghd v2, v2, vs10 -; FAST-NEXT: fctid f10, f30 -; FAST-NEXT: mffprd r3, f10 -; FAST-NEXT: lfd f30, 80(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: mtfprd f10, r3 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: xxmrghd v5, vs12, vs11 -; FAST-NEXT: xxlor f11, v20, v20 -; FAST-NEXT: xxlor f12, v21, v21 -; FAST-NEXT: fctid f11, f11 -; FAST-NEXT: fctid f12, f12 -; FAST-NEXT: mffprd r3, f11 -; FAST-NEXT: mtfprd f11, r3 -; FAST-NEXT: mffprd r3, f12 -; FAST-NEXT: mtfprd f12, r3 -; FAST-NEXT: mffprd r3, f13 -; FAST-NEXT: mtfprd f13, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 64(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v0, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: lfd f31, 72(r1) # 8-byte Folded Reload -; FAST-NEXT: mtvsrd v1, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: lfd f30, 88(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f31, f31 -; FAST-NEXT: mtvsrd v6, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 104(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f30, f30 -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v7, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 112(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: lfd f28, 120(r1) # 8-byte Folded Reload -; FAST-NEXT: fctid f28, f28 -; FAST-NEXT: xxmrghd v10, vs12, vs11 -; FAST-NEXT: xxmrghd v0, v0, vs13 -; FAST-NEXT: xxswapd vs12, v0 -; FAST-NEXT: xxmrghd v0, vs9, vs8 -; FAST-NEXT: xxmrghd v7, v8, v7 -; FAST-NEXT: mtvsrd v8, r3 -; FAST-NEXT: mffprd r3, f28 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f30 -; FAST-NEXT: xxswapd v7, v7 -; FAST-NEXT: xxmrghd v8, v9, v8 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: mffprd r3, f31 -; FAST-NEXT: xxswapd v8, v8 -; FAST-NEXT: xxmrghd v6, v9, v6 -; FAST-NEXT: mtvsrd v9, r3 -; FAST-NEXT: li r3, 240 -; FAST-NEXT: stxvd2x v8, r30, r3 -; FAST-NEXT: li r3, 224 -; FAST-NEXT: stxvd2x v7, r30, r3 -; FAST-NEXT: li r3, 208 -; FAST-NEXT: xxswapd vs11, v6 -; FAST-NEXT: xxmrghd v6, vs10, vs7 -; FAST-NEXT: stxvd2x vs11, r30, r3 -; FAST-NEXT: li r3, 192 -; FAST-NEXT: xxmrghd v1, v9, v1 -; FAST-NEXT: xxswapd vs11, v1 -; FAST-NEXT: xxmrghd v1, vs6, vs5 -; FAST-NEXT: xxswapd vs5, v10 -; FAST-NEXT: xxswapd vs6, v5 -; FAST-NEXT: stxvd2x vs11, r30, r3 -; FAST-NEXT: li r3, 176 -; FAST-NEXT: stxvd2x vs12, r30, r3 -; FAST-NEXT: li r3, 160 -; FAST-NEXT: stxvd2x vs5, r30, r3 -; FAST-NEXT: li r3, 144 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f7, r3 -; FAST-NEXT: li r3, 128 -; FAST-NEXT: xxswapd vs5, v6 -; FAST-NEXT: stxvd2x vs5, r30, r3 -; FAST-NEXT: li r3, 112 -; FAST-NEXT: xxswapd vs2, v1 -; FAST-NEXT: xxswapd vs6, v0 -; FAST-NEXT: stxvd2x vs2, r30, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: fctid f2, f29 -; FAST-NEXT: stxvd2x vs6, r30, r3 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: mtfprd f2, r3 -; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxmrghd v5, vs7, vs4 -; FAST-NEXT: xxswapd vs4, v2 -; FAST-NEXT: xxmrghd v0, vs0, vs3 -; FAST-NEXT: xxswapd vs0, v5 -; FAST-NEXT: xxswapd vs3, v3 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: xxswapd vs0, v0 -; FAST-NEXT: stxvd2x vs0, r30, r3 -; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxmrghd v5, vs2, vs1 -; FAST-NEXT: xxswapd vs1, v4 -; FAST-NEXT: stxvd2x vs1, r30, r3 -; FAST-NEXT: li r3, 32 -; FAST-NEXT: xxswapd vs2, v5 -; FAST-NEXT: stxvd2x vs2, r30, r3 -; FAST-NEXT: li r3, 16 -; FAST-NEXT: stxvd2x vs3, r30, r3 -; FAST-NEXT: li r3, 304 -; FAST-NEXT: stxvd2x vs4, 0, r30 -; FAST-NEXT: lfd f31, 472(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f30, 464(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f29, 456(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f28, 448(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f27, 440(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f26, 432(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f25, 424(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f24, 416(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f23, 408(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f22, 400(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f21, 392(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f20, 384(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f19, 376(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f18, 368(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f17, 360(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload -; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 288 -; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 272 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 256 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 240 -; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 224 -; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 208 -; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 192 -; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 176 -; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 160 -; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 144 -; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 128 -; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 480 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half> %x) ret <32 x i64> %a } @@ -3400,12 +2423,6 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v1f32: -; FAST: # %bb.0: -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float> %x) ret <1 x i64> %a } @@ -3459,21 +2476,6 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v2f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xscvspdpn f1, vs1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %x) ret <2 x i64> %a } @@ -3552,32 +2554,6 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v4f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xxsldwi vs2, v2, v2, 1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v4, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v2 -; FAST-NEXT: vmr v2, v4 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs2 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs1, vs0 -; FAST-NEXT: blr %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) ret <4 x i64> %a } @@ -3710,54 +2686,6 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v8f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xxsldwi vs2, v2, v2, 1 -; FAST-NEXT: xxsldwi vs3, v3, v3, 3 -; FAST-NEXT: xxswapd vs4, v3 -; FAST-NEXT: xxsldwi vs5, v3, v3, 1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v0, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v2 -; FAST-NEXT: vmr v2, v0 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs2 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v1, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, vs3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs4 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v4, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v3 -; FAST-NEXT: vmr v3, v1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs5 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v5, vs1, vs0 -; FAST-NEXT: blr %a = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> %x) ret <8 x i64> %a } @@ -3998,98 +2926,6 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v16i64_v16f32: -; FAST: # %bb.0: -; FAST-NEXT: xxsldwi vs0, v2, v2, 3 -; FAST-NEXT: xxswapd vs1, v2 -; FAST-NEXT: xscvspdpn f0, vs0 -; FAST-NEXT: xxsldwi vs2, v2, v2, 1 -; FAST-NEXT: xxsldwi vs3, v3, v3, 3 -; FAST-NEXT: xxswapd vs4, v3 -; FAST-NEXT: xxsldwi vs5, v3, v3, 1 -; FAST-NEXT: xxsldwi vs6, v4, v4, 3 -; FAST-NEXT: xxswapd vs7, v4 -; FAST-NEXT: xxsldwi vs8, v4, v4, 1 -; FAST-NEXT: xxsldwi vs9, v5, v5, 3 -; FAST-NEXT: xxswapd vs10, v5 -; FAST-NEXT: xxsldwi vs11, v5, v5, 1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v0, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v2 -; FAST-NEXT: vmr v2, v0 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs2 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v1, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, vs3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs4 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v10, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v3 -; FAST-NEXT: vmr v3, v1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs5 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v11, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, vs6 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: xscvspdpn f0, vs7 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v6, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, v4 -; FAST-NEXT: xscvspdpn f1, vs8 -; FAST-NEXT: vmr v4, v10 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v7, vs0, vs1 -; FAST-NEXT: xscvspdpn f0, vs9 -; FAST-NEXT: xscvspdpn f1, vs10 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v8, vs1, vs0 -; FAST-NEXT: xscvspdpn f0, v5 -; FAST-NEXT: xscvspdpn f1, vs11 -; FAST-NEXT: vmr v5, v11 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v9, vs0, vs1 -; FAST-NEXT: blr %a = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> %x) ret <16 x i64> %a } @@ -4119,12 +2955,6 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v1f64: -; FAST: # %bb.0: -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double> %x) ret <1 x i64> %a } @@ -4179,19 +3009,6 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v2f64: -; FAST: # %bb.0: -; FAST-NEXT: xxlor f1, v2, v2 -; FAST-NEXT: xxswapd vs0, v2 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: mffprd r3, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v2, vs1, vs0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> %x) ret <2 x i64> %a } @@ -4276,28 +3093,6 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v4f64: -; FAST: # %bb.0: -; FAST-NEXT: xxswapd vs0, v2 -; FAST-NEXT: xxlor f2, v2, v2 -; FAST-NEXT: xxswapd vs1, v3 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f2, f2 -; FAST-NEXT: fctid f1, f1 -; FAST-NEXT: mffprd r4, f0 -; FAST-NEXT: xxlor f0, v3, v3 -; FAST-NEXT: mffprd r3, f2 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mtfprd f2, r4 -; FAST-NEXT: mffprd r5, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v2, vs0, vs2 -; FAST-NEXT: mtfprd f0, r5 -; FAST-NEXT: xxmrghd v3, vs0, vs1 -; FAST-NEXT: blr %a = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> %x) ret <4 x i64> %a } @@ -4442,46 +3237,6 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v8f64: -; FAST: # %bb.0: -; FAST-NEXT: xxswapd vs0, v2 -; FAST-NEXT: xxswapd vs1, v3 -; FAST-NEXT: xxlor f4, v2, v2 -; FAST-NEXT: xxswapd vs2, v4 -; FAST-NEXT: xxswapd vs3, v5 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: fctid f4, f4 -; FAST-NEXT: mffprd r4, f0 -; FAST-NEXT: xxlor f0, v3, v3 -; FAST-NEXT: mffprd r3, f4 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mffprd r5, f0 -; FAST-NEXT: fctid f0, f1 -; FAST-NEXT: mtfprd f1, r4 -; FAST-NEXT: mffprd r6, f0 -; FAST-NEXT: xxlor f0, v4, v4 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mtfprd f4, r6 -; FAST-NEXT: mffprd r7, f0 -; FAST-NEXT: fctid f0, f2 -; FAST-NEXT: mtfprd f2, r5 -; FAST-NEXT: mtfprd f5, r7 -; FAST-NEXT: mffprd r8, f0 -; FAST-NEXT: xxlor f0, v5, v5 -; FAST-NEXT: fctid f0, f0 -; FAST-NEXT: mtfprd f6, r8 -; FAST-NEXT: mffprd r9, f0 -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: xxmrghd v3, vs2, vs4 -; FAST-NEXT: xxmrghd v4, vs5, vs6 -; FAST-NEXT: xxmrghd v2, vs0, vs1 -; FAST-NEXT: fctid f1, f3 -; FAST-NEXT: mtfprd f0, r9 -; FAST-NEXT: mffprd r3, f1 -; FAST-NEXT: mtfprd f1, r3 -; FAST-NEXT: xxmrghd v5, vs0, vs1 -; FAST-NEXT: blr %a = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> %x) ret <8 x i64> %a } @@ -4511,18 +3266,6 @@ define <1 x i64> @lrint_v1f128(<1 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v1f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -32(r1) -; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: addi r1, r1, 32 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128> %x) ret <1 x i64> %a } @@ -4580,33 +3323,6 @@ define <2 x i64> @lrint_v2f128(<2 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v2f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -80(r1) -; FAST-NEXT: li r3, 48 -; FAST-NEXT: std r0, 96(r1) -; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: vmr v31, v3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: mtvsrd v30, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxmrghd v2, vs0, v30 -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 80 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128> %x) ret <2 x i64> %a } @@ -4704,53 +3420,6 @@ define <4 x i64> @lrint_v4f128(<4 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v4f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -112(r1) -; FAST-NEXT: li r3, 48 -; FAST-NEXT: std r0, 128(r1) -; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 80 -; FAST-NEXT: vmr v29, v3 -; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 96 -; FAST-NEXT: vmr v30, v4 -; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: vmr v31, v5 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: mtvsrd v28, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v30 -; FAST-NEXT: xxmrghd v29, vs0, v28 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: mtvsrd v30, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxmrghd v3, vs0, v30 -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 48 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 112 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128> %x) ret <4 x i64> %a } @@ -4928,93 +3597,6 @@ define <8 x i64> @lrint_v8f128(<8 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v8f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -176(r1) -; FAST-NEXT: li r3, 48 -; FAST-NEXT: std r0, 192(r1) -; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 80 -; FAST-NEXT: vmr v25, v3 -; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 96 -; FAST-NEXT: vmr v26, v4 -; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 112 -; FAST-NEXT: vmr v27, v5 -; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 128 -; FAST-NEXT: vmr v28, v6 -; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 144 -; FAST-NEXT: vmr v29, v7 -; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 160 -; FAST-NEXT: vmr v30, v8 -; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: vmr v31, v9 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v25 -; FAST-NEXT: mtvsrd v24, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v26 -; FAST-NEXT: xxmrghd v25, vs0, v24 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v27 -; FAST-NEXT: mtvsrd v26, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v28 -; FAST-NEXT: xxmrghd v27, vs0, v26 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: mtvsrd v28, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v30 -; FAST-NEXT: xxmrghd v29, vs0, v28 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: mtvsrd v30, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 160 -; FAST-NEXT: vmr v4, v29 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 144 -; FAST-NEXT: vmr v3, v27 -; FAST-NEXT: vmr v2, v25 -; FAST-NEXT: xxmrghd v5, vs0, v30 -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 128 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 112 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 96 -; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 80 -; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 48 -; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 176 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128> %x) ret <8 x i64> %a } @@ -5355,176 +3937,6 @@ define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) nounwind { ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr -; -; FAST-LABEL: lrint_v16i64_v16f128: -; FAST: # %bb.0: -; FAST-NEXT: mflr r0 -; FAST-NEXT: stdu r1, -304(r1) -; FAST-NEXT: li r3, 112 -; FAST-NEXT: std r0, 320(r1) -; FAST-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 128 -; FAST-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 144 -; FAST-NEXT: vmr v21, v4 -; FAST-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 160 -; FAST-NEXT: vmr v22, v6 -; FAST-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 176 -; FAST-NEXT: vmr v23, v8 -; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 192 -; FAST-NEXT: vmr v24, v9 -; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 208 -; FAST-NEXT: vmr v25, v7 -; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 224 -; FAST-NEXT: vmr v26, v10 -; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 240 -; FAST-NEXT: vmr v27, v5 -; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 256 -; FAST-NEXT: vmr v28, v11 -; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 272 -; FAST-NEXT: vmr v29, v12 -; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 288 -; FAST-NEXT: vmr v30, v3 -; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 64 -; FAST-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: addi r3, r1, 576 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: addi r3, r1, 560 -; FAST-NEXT: lxvd2x vs1, 0, r3 -; FAST-NEXT: addi r3, r1, 544 -; FAST-NEXT: lxvd2x vs2, 0, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: xxswapd vs0, vs0 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 80 -; FAST-NEXT: xxswapd vs0, vs1 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: li r3, 48 -; FAST-NEXT: xxswapd vs0, vs2 -; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill -; FAST-NEXT: addi r3, r1, 528 -; FAST-NEXT: lxvd2x vs0, 0, r3 -; FAST-NEXT: xxswapd v31, vs0 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v30 -; FAST-NEXT: mtvsrd v20, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v21 -; FAST-NEXT: xxmrghd v30, vs0, v20 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v27 -; FAST-NEXT: mtvsrd v21, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v22 -; FAST-NEXT: xxmrghd v27, vs0, v21 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v25 -; FAST-NEXT: mtvsrd v22, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v23 -; FAST-NEXT: xxmrghd v25, vs0, v22 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v24 -; FAST-NEXT: mtvsrd v23, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v26 -; FAST-NEXT: xxmrghd v24, vs0, v23 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: vmr v2, v28 -; FAST-NEXT: mtvsrd v26, r3 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v29 -; FAST-NEXT: xxmrghd v28, vs0, v26 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v29, r3 -; FAST-NEXT: li r3, 64 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: vmr v2, v31 -; FAST-NEXT: xxmrghd v29, vs0, v29 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v31, r3 -; FAST-NEXT: li r3, 48 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 80 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: xxmrghd v31, vs0, v31 -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtvsrd v26, r3 -; FAST-NEXT: li r3, 96 -; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: bl lrintf128 -; FAST-NEXT: nop -; FAST-NEXT: mtfprd f0, r3 -; FAST-NEXT: li r3, 288 -; FAST-NEXT: vmr v8, v31 -; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 272 -; FAST-NEXT: vmr v2, v30 -; FAST-NEXT: vmr v7, v29 -; FAST-NEXT: vmr v6, v28 -; FAST-NEXT: vmr v3, v27 -; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 256 -; FAST-NEXT: vmr v4, v25 -; FAST-NEXT: vmr v5, v24 -; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 240 -; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 224 -; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 208 -; FAST-NEXT: xxmrghd v9, vs0, v26 -; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 192 -; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 176 -; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 160 -; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 144 -; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 128 -; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: li r3, 112 -; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload -; FAST-NEXT: addi r1, r1, 304 -; FAST-NEXT: ld r0, 16(r1) -; FAST-NEXT: mtlr r0 -; FAST-NEXT: blr %a = call <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128> %x) ret <16 x i64> %a }