From bd6ef1ab588962e06ca295ddf788eb85e751d657 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 24 Sep 2024 16:01:49 +0000 Subject: [PATCH 1/9] [IR] Add `llvm.sincos` intrinsic This adds the `llvm.sincos` intrinsic, legalization, and lowering. The `llvm.sincos` intrinsic takes a floating-point value and returns both the sine and cosine (as a struct). ``` declare { float, float } @llvm.sincos.f32(float %Val) declare { double, double } @llvm.sincos.f64(double %Val) declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val) declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val) declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val) declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val) ``` The lowering is built on top of the existing FSINCOS ISD node, with additional type legalization to allow for f16, f128, and vector values. --- llvm/docs/LangRef.rst | 45 ++ llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 + .../CodeGen/GlobalISel/MachineIRBuilder.h | 7 + llvm/include/llvm/IR/Intrinsics.td | 2 + llvm/include/llvm/Support/TargetOpcodes.def | 3 + llvm/include/llvm/Target/GenericOpcodes.td | 7 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 7 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 10 + .../SelectionDAG/LegalizeFloatTypes.cpp | 80 +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 9 +- .../SelectionDAG/LegalizeVectorOps.cpp | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 30 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 15 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 5 +- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 1 + .../GlobalISel/legalizer-info-validation.mir | 3 + llvm/test/CodeGen/AArch64/llvm.sincos.ll | 516 ++++++++++++++++++ llvm/test/CodeGen/ARM/llvm.sincos.ll | 464 ++++++++++++++++ 18 files changed, 1197 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/llvm.sincos.ll create mode 100644 llvm/test/CodeGen/ARM/llvm.sincos.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b83675c6ed97a..7dfa394c8b81b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15512,6 +15512,8 @@ Semantics: This function returns the first value raised to the second power with an unspecified sequence of rounding operations. +.. _t_llvm_sin: + '``llvm.sin.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -15549,6 +15551,8 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +.. _t_llvm_cos: + '``llvm.cos.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -15882,6 +15886,47 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. + +'``llvm.sincos.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.sincos`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare { float, float } @llvm.sincos.f32(float %Val) + declare { double, double } @llvm.sincos.f64(double %Val) + declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val) + declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val) + declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val) + declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val) + +Overview: +""""""""" + +The '``llvm.sincos.*``' intrinsics returns the sine and cosine of the operand. + +Arguments: +"""""""""" + +The argument is a :ref:`floating-point ` or :ref:`vector ` +of floating-point values. Returns two values matching the argument type in a +struct. + +Semantics: +"""""""""" + +This intrinsic is equivalent to a calling both :ref:`llvm.sin ` +and :ref:`llvm.cos ` on the argument. + +The first result is the sine of the argument and the second result is the cosine +of the argument. + '``llvm.pow.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index db3b5cddd7c1c..b0316e67654db 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1986,6 +1986,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::sincos: + ISD = ISD::FSINCOS; + break; case Intrinsic::tan: ISD = ISD::FTAN; break; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 9b993482c8cc0..ab3025e4923cd 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -2009,6 +2009,13 @@ class MachineIRBuilder { return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags); } + /// Build and insert \p Sin, \p Cos = G_FSINCOS \p Src + MachineInstrBuilder + buildFSincos(const DstOp &Sin, const DstOp &Cos, const SrcOp &Src, + std::optional Flags = std::nullopt) { + return buildInstr(TargetOpcode::G_FSINCOS, {Sin, Cos}, {Src}, Flags); + } + /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1 MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 94e53f372127d..e91758ed34eb3 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1050,6 +1050,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_nearbyint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_round : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_anyfloat_ty]>; // Truncate a floating point number with a specific rounding mode def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 3556a253d875f..0c4c6ccd5c568 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -809,6 +809,9 @@ HANDLE_TARGET_OPCODE(G_FCOS) /// Floating point sine. HANDLE_TARGET_OPCODE(G_FSIN) +/// Floating point combined sine and cosine. +HANDLE_TARGET_OPCODE(G_FSINCOS) + /// Floating point tangent. HANDLE_TARGET_OPCODE(G_FTAN) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 8b8bc9a0e9cf5..62bb9789afe5d 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1020,6 +1020,13 @@ def G_FSIN : GenericInstruction { let hasSideEffects = false; } +// Floating point combined sine and cosine. +def G_FSINCOS : GenericInstruction { + let OutOperandList = (outs type0:$dst1, type0:$dst2); + let InOperandList = (ins type0:$src1); + let hasSideEffects = false; +} + // Floating point tangent of a value. def G_FTAN : GenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 563a826441345..5381dce58f9e6 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2343,6 +2343,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineInstr::copyFlagsFromInstruction(CI)); return true; } + case Intrinsic::sincos: { + ArrayRef VRegs = getOrCreateVRegs(CI); + MIRBuilder.buildFSincos(VRegs[0], VRegs[1], + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } case Intrinsic::fptosi_sat: MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI), getOrCreateVReg(*CI.getArgOperand(0))); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e0a03383358b7..3534c1ca941a9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5586,6 +5586,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp2.getValue(1)); break; } + case ISD::FSINCOS: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FSINCOS, dl, {NVT, NVT}, Tmp1); + + for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++) + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); + break; + } case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 73c258f0f6f18..9009c8ab4fc9a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -129,6 +129,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLDEXP: case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break; case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break; + case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break; case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::STRICT_FRINT: @@ -774,6 +775,45 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) { return ReturnVal; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) { + assert(!N->isStrictFPOpcode() && "strictfp not implemented for fsincos"); + EVT VT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFSINCOS(VT); + + if (!TLI.getLibcallName(LC)) + return SDValue(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue StackSlotSin = DAG.CreateStackTemporary(NVT); + SDValue StackSlotCos = DAG.CreateStackTemporary(NVT); + + SDLoc DL(N); + + TargetLowering::MakeLibCallOptions CallOptions; + std::array Ops{GetSoftenedFloat(N->getOperand(0)), StackSlotSin, + StackSlotCos}; + std::array OpsVT{VT, StackSlotSin.getValueType(), + StackSlotCos.getValueType()}; + + // TODO: setTypeListBeforeSoften can't properly express multiple return types, + // but since both returns have the same type for sincos it should be okay. + CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true); + + auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL, + /*Chain=*/SDValue()); + unsigned ResNo = 0; + for (SDValue OutPtr : {StackSlotSin, StackSlotCos}) { + int FrameIdx = cast(OutPtr)->getIndex(); + auto PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + + SDValue LoadExp = DAG.getLoad(NVT, DL, Chain, OutPtr, PtrInfo); + SetSoftenedFloat(SDValue(N, ResNo++), LoadExp); + } + + return SDValue(); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, @@ -2704,6 +2744,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break; case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break; + case ISD::FSINCOS: + R = PromoteFloatRes_FSINCOS(N); + break; + case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; case ISD::STRICT_FP_ROUND: R = PromoteFloatRes_STRICT_FP_ROUND(N); @@ -2899,6 +2943,18 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) { return Res; } +SDValue DAGTypeLegalizer::PromoteFloatRes_FSINCOS(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, NVT}, Op); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + SetPromotedFloat(SDValue(N, ResNum), Res.getValue(ResNum)); + + return SDValue(); +} + // Explicit operation to reduce precision. Reduce the value to half precision // and promote it back to the legal type. SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { @@ -3148,6 +3204,10 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break; + case ISD::FSINCOS: + R = SoftPromoteHalfRes_FSINCOS(N); + break; + case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break; case ISD::ATOMIC_LOAD: R = SoftPromoteHalfRes_ATOMIC_LOAD(N); @@ -3304,6 +3364,26 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FSINCOS(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op); + SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, NVT), Op); + + // Convert back to FP16 as an integer. + ISD::NodeType Truncate = GetPromotionOpcode(NVT, OVT); + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) { + SDValue Trunc = DAG.getNode(Truncate, dl, MVT::i16, Res.getValue(ResNum)); + SetSoftPromotedHalf(SDValue(N, ResNum), Trunc); + } + + return SDValue(); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { EVT RVT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 868da25ca8cb4..33befb8d4ac0e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -597,6 +597,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_ExpOp(SDNode *N); SDValue SoftenFloatRes_FFREXP(SDNode *N); + SDValue SoftenFloatRes_FSINCOS(SDNode *N); SDValue SoftenFloatRes_FREEZE(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); @@ -744,6 +745,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatRes_FMAD(SDNode *N); SDValue PromoteFloatRes_ExpOp(SDNode *N); SDValue PromoteFloatRes_FFREXP(SDNode *N); + SDValue PromoteFloatRes_FSINCOS(SDNode *N); SDValue PromoteFloatRes_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_LOAD(SDNode *N); @@ -792,6 +794,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfRes_FMAD(SDNode *N); SDValue SoftPromoteHalfRes_ExpOp(SDNode *N); SDValue SoftPromoteHalfRes_FFREXP(SDNode *N); + SDValue SoftPromoteHalfRes_FSINCOS(SDNode *N); SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N); SDValue SoftPromoteHalfRes_LOAD(SDNode *N); SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N); @@ -863,7 +866,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N); SDValue ScalarizeVecRes_FIX(SDNode *N); - SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -917,7 +920,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo, SDValue &Lo, + SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -1068,6 +1072,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); + SDValue WidenVecRes_FSINCOS(SDNode *N); // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index a8042fc3e7a69..c80da28b3dc34 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -452,6 +452,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMULO: case ISD::FCANONICALIZE: case ISD::FFREXP: + case ISD::FSINCOS: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 50e2a923699c8..65c9bb64b3cc1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -130,7 +130,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_ADDRSPACECAST(N); break; case ISD::FFREXP: - R = ScalarizeVecRes_FFREXP(N, ResNo); + case ISD::FSINCOS: + R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo); break; case ISD::ADD: case ISD::AND: @@ -276,7 +277,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { Op2, N->getFlags()); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) { +SDValue +DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, + unsigned ResNo) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexpected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); @@ -1253,7 +1256,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_ADDRSPACECAST(N, Lo, Hi); break; case ISD::FFREXP: - SplitVecRes_FFREXP(N, ResNo, Lo, Hi); + case ISD::FSINCOS: + SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi); break; case ISD::ANY_EXTEND: @@ -2615,8 +2619,10 @@ void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS); } -void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_UnaryOpWithTwoResults(SDNode *N, + unsigned ResNo, + SDValue &Lo, + SDValue &Hi) { SDLoc dl(N); auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1)); @@ -4752,6 +4758,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FSHR: Res = WidenVecRes_Ternary(N); break; + case ISD::FSINCOS: { + if (!unrollExpandedOp()) + Res = WidenVecRes_FSINCOS(N); + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + SetWidenedVector(SDValue(N, ResNum), Res.getValue(ResNum)); + Res = SDValue(); + break; + } } // If Res is null, the sub-method took care of registering the result. @@ -5500,6 +5514,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } +SDValue DAGTypeLegalizer::WidenVecRes_FSINCOS(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT, WidenVT}, InOp); +} + SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); return GetWidenedVector(WidenVec); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8450553743074..8affa0eaaca78 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6936,12 +6936,23 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::sincos: case Intrinsic::frexp: { + unsigned Opcode; + switch (Intrinsic) { + default: + llvm_unreachable("unexpected intrinsic"); + case Intrinsic::sincos: + Opcode = ISD::FSINCOS; + break; + case Intrinsic::frexp: + Opcode = ISD::FFREXP; + break; + } SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); - setValue(&I, - DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0)))); + setValue(&I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0)))); return; } case Intrinsic::arithmetic_fence: { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 7a28f7892cbf3..56ae58030328b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -773,8 +773,9 @@ void TargetLoweringBase::initActions() { setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT, - Expand); + setOperationAction( + {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, ISD::FSINCOS}, VT, + Expand); // These operations default to expand for vector types. if (VT.isVector()) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 835ae98efb852..9dcf8259e3293 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2059,6 +2059,7 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) { case Intrinsic::powi: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::sincos: case Intrinsic::pow: case Intrinsic::log: case Intrinsic::log10: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 4d096b7231c7c..4eea1beed82d9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -701,6 +701,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: G_FSINCOS (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FTAN (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll new file mode 100644 index 0000000000000..841b0b2d66534 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll @@ -0,0 +1,516 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s + +define { half, half } @test_sincos_f16(half %a) { +; CHECK-LABEL: test_sincos_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #8] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt h1, s1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { half, half } @llvm.sincos.f16(half %a) + ret { half, half } %result +} + +define half @test_sincos_f16_only_use_sin(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #12] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_sincos_f16_only_use_cos(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #8] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: add x1, sp, #56 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s2, s0, [sp, #32] +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldp s3, s1, [sp, #24] +; CHECK-NEXT: fcvt h4, s0 +; CHECK-NEXT: fcvt h2, s2 +; CHECK-NEXT: fcvt h0, s1 +; CHECK-NEXT: fcvt h1, s3 +; CHECK-NEXT: ldp s5, s3, [sp, #40] +; CHECK-NEXT: fcvt h3, s3 +; CHECK-NEXT: mov v0.h[1], v4.h[0] +; CHECK-NEXT: fcvt h4, s5 +; CHECK-NEXT: mov v1.h[1], v2.h[0] +; CHECK-NEXT: ldp s5, s2, [sp, #56] +; CHECK-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NEXT: fcvt h2, s2 +; CHECK-NEXT: fcvt h3, s5 +; CHECK-NEXT: mov v1.h[2], v4.h[0] +; CHECK-NEXT: mov v0.h[3], v2.h[0] +; CHECK-NEXT: mov v1.h[3], v3.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16_only_use_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: add x1, sp, #56 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #36] +; CHECK-NEXT: ldr s1, [sp, #28] +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: fcvt h0, s1 +; CHECK-NEXT: ldr s1, [sp, #44] +; CHECK-NEXT: fcvt h1, s1 +; CHECK-NEXT: mov v0.h[1], v2.h[0] +; CHECK-NEXT: ldr s2, [sp, #60] +; CHECK-NEXT: mov v0.h[2], v1.h[0] +; CHECK-NEXT: fcvt h1, s2 +; CHECK-NEXT: mov v0.h[3], v1.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0 + ret <2 x half> %result.0 +} + +define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16_only_use_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: add x1, sp, #56 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #32] +; CHECK-NEXT: ldr s1, [sp, #24] +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: fcvt h0, s1 +; CHECK-NEXT: ldr s1, [sp, #40] +; CHECK-NEXT: fcvt h1, s1 +; CHECK-NEXT: mov v0.h[1], v2.h[0] +; CHECK-NEXT: ldr s2, [sp, #56] +; CHECK-NEXT: mov v0.h[2], v1.h[0] +; CHECK-NEXT: fcvt h1, s2 +; CHECK-NEXT: mov v0.h[3], v1.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1 + ret <2 x half> %result.1 +} + +define { float, float } @test_sincos_f32(float %a) { +; CHECK-LABEL: test_sincos_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #8] +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} + +define float @test_sincos_f32_only_use_sin(float %a) { +; CHECK-LABEL: test_sincos_f32_only_use_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #12] +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { float, float } @llvm.sincos.f32(float %a) + %result.0 = extractvalue { float, float } %result, 0 + ret float %result.0 +} + +define float @test_sincos_f32_only_use_cos(float %a) { +; CHECK-LABEL: test_sincos_f32_only_use_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #8] +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %result = call { float, float } @llvm.sincos.f32(float %a) + %result.1 = extractvalue { float, float } %result, 1 + ret float %result.1 +} + +define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: add x19, sp, #28 +; CHECK-NEXT: add x20, sp, #24 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #40] +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v0.s }[1], [x19] +; CHECK-NEXT: ld1 { v1.s }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32_only_use_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #20 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: add x19, sp, #20 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #28] +; CHECK-NEXT: ld1 { v0.s }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0 + ret <2 x float> %result.0 +} + +define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32_only_use_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #20 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: add x19, sp, #16 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #24] +; CHECK-NEXT: ld1 { v0.s }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1 + ret <2 x float> %result.1 +} + +define { double, double } @test_sincos_f64(double %a) { +; CHECK-LABEL: test_sincos_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #24] +; CHECK-NEXT: ldr d1, [sp, #8] +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %result = call { double, double } @llvm.sincos.f64(double %a) + ret { double, double } %result +} + +define double @test_sincos_f64_only_use_sin(double %a) { +; CHECK-LABEL: test_sincos_f64_only_use_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #24] +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %result = call { double, double } @llvm.sincos.f64(double %a) + %result.0 = extractvalue { double, double } %result, 0 + ret double %result.0 +} + +define double @test_sincos_f64_only_use_cos(double %a) { +; CHECK-LABEL: test_sincos_f64_only_use_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #8] +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %result = call { double, double } @llvm.sincos.f64(double %a) + %result.1 = extractvalue { double, double } %result, 1 + ret double %result.1 +} + +define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: add x0, sp, #56 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #32 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: add x19, sp, #32 +; CHECK-NEXT: add x20, sp, #24 +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #56] +; CHECK-NEXT: ldr d1, [sp, #40] +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v0.d }[1], [x19] +; CHECK-NEXT: ld1 { v1.d }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64_only_use_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: add x19, sp, #24 +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #40] +; CHECK-NEXT: ld1 { v0.d }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 + ret <2 x double> %result.0 +} + +define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64_only_use_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: add x19, sp, #16 +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #32] +; CHECK-NEXT: ld1 { v0.d }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 + ret <2 x double> %result.1 +} diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll new file mode 100644 index 0000000000000..b08a642fd3717 --- /dev/null +++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll @@ -0,0 +1,464 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s + +define { half, half } @test_sincos_f16(half %a) { +; CHECK-LABEL: test_sincos_f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r4, pc} + %result = call { half, half } @llvm.sincos.f16(half %a) + ret { half, half } %result +} + +define half @test_sincos_f16_only_use_sin(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_sincos_f16_only_use_cos(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #12] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: strh.w r0, [sp, #22] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #20] +; CHECK-NEXT: add r0, sp, #20 +; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32] +; CHECK-NEXT: ldr r0, [sp, #8] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr r1, [sp] +; CHECK-NEXT: strh.w r0, [sp, #18] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #16] +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vmovl.u16 q9, d8 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d18[0] +; CHECK-NEXT: vmov.32 r1, d18[1] +; CHECK-NEXT: vmov.32 r2, d16[0] +; CHECK-NEXT: vmov.32 r3, d16[1] +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, pc} + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #16 +; CHECK-NEXT: add r2, sp, #12 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #8 +; CHECK-NEXT: add r2, sp, #4 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #16] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr r1, [sp, #8] +; CHECK-NEXT: strh.w r0, [sp, #22] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #20] +; CHECK-NEXT: add r0, sp, #20 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: vmov.32 r1, d16[1] +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: pop {r4, pc} + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0 + ret <2 x half> %result.0 +} + +define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #16 +; CHECK-NEXT: add r2, sp, #12 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #8 +; CHECK-NEXT: add r2, sp, #4 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #12] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: strh.w r0, [sp, #22] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #20] +; CHECK-NEXT: add r0, sp, #20 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: vmov.32 r1, d16[1] +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: pop {r4, pc} + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1 + ret <2 x half> %result.1 +} + +define { float, float } @test_sincos_f32(float %a) { +; CHECK-LABEL: test_sincos_f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldrd r1, r0, [sp], #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} + +define float @test_sincos_f32_only_use_sin(float %a) { +; CHECK-LABEL: test_sincos_f32_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { float, float } @llvm.sincos.f32(float %a) + %result.0 = extractvalue { float, float } %result, 0 + ret float %result.0 +} + +define float @test_sincos_f32_only_use_cos(float %a) { +; CHECK-LABEL: test_sincos_f32_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp], #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { float, float } @llvm.sincos.f32(float %a) + %result.1 = extractvalue { float, float } %result, 1 + ret float %result.1 +} + +define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vldr s1, [sp, #4] +; CHECK-NEXT: vldr s3, [sp] +; CHECK-NEXT: vldr s0, [sp, #12] +; CHECK-NEXT: vldr s2, [sp, #8] +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r7, pc} + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vldr s1, [sp, #4] +; CHECK-NEXT: vldr s0, [sp, #12] +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r7, pc} + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0 + ret <2 x float> %result.0 +} + +define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vldr s1, [sp] +; CHECK-NEXT: vldr s0, [sp, #8] +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r7, pc} + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1 + ret <2 x float> %result.1 +} + +define { double, double } @test_sincos_f64(double %a) { +; CHECK-LABEL: test_sincos_f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldrd r0, r1, [sp, #8] +; CHECK-NEXT: ldrd r2, r3, [sp], #16 +; CHECK-NEXT: pop {r7, pc} + %result = call { double, double } @llvm.sincos.f64(double %a) + ret { double, double } %result +} + +define double @test_sincos_f64_only_use_sin(double %a) { +; CHECK-LABEL: test_sincos_f64_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldrd r0, r1, [sp, #8] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: pop {r7, pc} + %result = call { double, double } @llvm.sincos.f64(double %a) + %result.0 = extractvalue { double, double } %result, 0 + ret double %result.0 +} + +define double @test_sincos_f64_only_use_cos(double %a) { +; CHECK-LABEL: test_sincos_f64_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldrd r0, r1, [sp], #16 +; CHECK-NEXT: pop {r7, pc} + %result = call { double, double } @llvm.sincos.f64(double %a) + %result.1 = extractvalue { double, double } %result, 1 + ret double %result.1 +} + +define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: mov r12, r2 +; CHECK-NEXT: add r2, sp, #24 +; CHECK-NEXT: add r3, sp, #16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldrd r0, r1, [sp, #40] +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: bl sincos +; CHECK-NEXT: vldr d19, [sp, #8] +; CHECK-NEXT: vldr d18, [sp, #24] +; CHECK-NEXT: vldr d17, [sp] +; CHECK-NEXT: vldr d16, [sp, #16] +; CHECK-NEXT: vst1.64 {d18, d19}, [r4]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r4] +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: pop {r4, pc} + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: sub sp, #36 +; CHECK-NEXT: mov r6, r3 +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: add r2, sp, #24 +; CHECK-NEXT: add r3, sp, #16 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: vldr d16, [sp, #24] +; CHECK-NEXT: ldrd r0, r1, [sp, #8] +; CHECK-NEXT: vmov r2, r3, d16 +; CHECK-NEXT: add sp, #36 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 + ret <2 x double> %result.0 +} + +define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: sub sp, #36 +; CHECK-NEXT: mov r6, r3 +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: add r2, sp, #24 +; CHECK-NEXT: add r3, sp, #16 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: vldr d16, [sp, #16] +; CHECK-NEXT: vmov r2, r3, d16 +; CHECK-NEXT: ldrd r0, r1, [sp], #36 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 + ret <2 x double> %result.1 +} + +define { fp128, fp128 } @test_sincos_f128(fp128 %a) { +; CHECK-LABEL: test_sincos_f128: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: sub sp, #40 +; CHECK-NEXT: mov r12, r3 +; CHECK-NEXT: ldr r3, [sp, #56] +; CHECK-NEXT: add.w lr, sp, #8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: add r0, sp, #24 +; CHECK-NEXT: strd r0, lr, [sp] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: bl sincosl +; CHECK-NEXT: ldrd r2, r3, [sp, #16] +; CHECK-NEXT: ldrd r12, r1, [sp, #8] +; CHECK-NEXT: str r3, [r4, #28] +; CHECK-NEXT: ldrd r3, r5, [sp, #32] +; CHECK-NEXT: ldrd lr, r0, [sp, #24] +; CHECK-NEXT: strd r1, r2, [r4, #20] +; CHECK-NEXT: add.w r1, r4, #8 +; CHECK-NEXT: stm.w r1, {r3, r5, r12} +; CHECK-NEXT: strd lr, r0, [r4] +; CHECK-NEXT: add sp, #40 +; CHECK-NEXT: pop {r4, r5, r7, pc} + %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a) + ret { fp128, fp128 } %result +} From 0f22f684f4e7c5825d3f44c85654597ec8605b7b Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 25 Sep 2024 11:48:57 +0000 Subject: [PATCH 2/9] Fixups --- llvm/docs/GlobalISel/GenericOpcode.rst | 4 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +- .../AArch64/GlobalISel/irtranslator-sincos.ll | 106 ++++++++++++++++++ 3 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 1c4e00b956bc4..8920530dc3f1a 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -633,8 +633,8 @@ G_FCEIL, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT These correspond to the standard C functions of the same name. -G_FCOS, G_FSIN, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +G_FCOS, G_FSIN, G_FSINCOS, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These correspond to the standard C trigonometry functions of the same name. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3534c1ca941a9..673a3b7affc53 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5589,11 +5589,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FSINCOS: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FSINCOS, dl, {NVT, NVT}, Tmp1); + Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true); for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++) Results.push_back( - DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), - DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), Tmp3)); break; } case ISD::FFLOOR: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll new file mode 100644 index 0000000000000..3eeddae35b62b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s + +define { half, half } @test_sincos_f16(half %a) { + ; CHECK-LABEL: name: test_sincos_f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s16), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $h0 = COPY [[FSINCOS]](s16) + ; CHECK-NEXT: $h1 = COPY [[FSINCOS1]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0, implicit $h1 + %result = call { half, half } @llvm.sincos.f16(half %a) + ret { half, half } %result +} + +define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { + ; CHECK-LABEL: name: test_sincos_v2f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s16>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[UV]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[UV3]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $d1 = COPY [[BUILD_VECTOR1]](<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_sincos_f32(float %a) { + ; CHECK-LABEL: name: test_sincos_f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $s0 = COPY [[FSINCOS]](s32) + ; CHECK-NEXT: $s1 = COPY [[FSINCOS1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0, implicit $s1 + %result = call { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} + +define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { + ; CHECK-LABEL: name: test_sincos_v2f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s32>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $d0 = COPY [[FSINCOS]](<2 x s32>) + ; CHECK-NEXT: $d1 = COPY [[FSINCOS1]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { double, double } @test_sincos_f64(double %a) { + ; CHECK-LABEL: name: test_sincos_f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s64), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $d0 = COPY [[FSINCOS]](s64) + ; CHECK-NEXT: $d1 = COPY [[FSINCOS1]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 + %result = call { double, double } @llvm.sincos.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { + ; CHECK-LABEL: name: test_sincos_v2f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s64>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FSINCOS]](<2 x s64>) + ; CHECK-NEXT: $q1 = COPY [[FSINCOS1]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +define { fp128, fp128 } @test_sincos_f128(fp128 %a) { + ; CHECK-LABEL: name: test_sincos_f128 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $q0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s128), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FSINCOS]](s128) + ; CHECK-NEXT: $q1 = COPY [[FSINCOS1]](s128) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a) + ret { fp128, fp128 } %result +} From 6a120eb2d4ac3c8f93ebd9b4dca3defde67453d4 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 2 Oct 2024 09:21:00 +0000 Subject: [PATCH 3/9] Add note about afn --- llvm/docs/LangRef.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 7dfa394c8b81b..268905881128f 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15927,6 +15927,9 @@ and :ref:`llvm.cos ` on the argument. The first result is the sine of the argument and the second result is the cosine of the argument. +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.pow.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ From 2f036f2038dc203cffcce6206cf346be0f6a50fe Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 11 Oct 2024 16:56:36 +0000 Subject: [PATCH 4/9] Fixups - Flag propagation - Legalize to individual sin/cos calls when FSINCOS unavailable - More tests --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 15 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 3 +- .../AArch64/GlobalISel/irtranslator-sincos.ll | 14 + llvm/test/CodeGen/AArch64/llvm.sincos.ll | 420 ++++++++++++++++++ 4 files changed, 449 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 673a3b7affc53..47a9ae12248cc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3714,6 +3714,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } + case ISD::FSINCOS: { + if (isSinCosLibcallAvailable(Node, TLI)) + break; + EVT VT = Node->getValueType(0); + SDValue Op = Node->getOperand(0); + SDNodeFlags Flags = Node->getFlags(); + Tmp1 = DAG.getNode(ISD::FSIN, dl, VT, Op, Flags); + Tmp2 = DAG.getNode(ISD::FCOS, dl, VT, Op, Flags); + Results.append({Tmp1, Tmp2}); + break; + } case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); @@ -5588,9 +5599,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } case ISD::FSINCOS: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); - Tmp2 = DAG.getNode(ISD::FSINCOS, dl, {NVT, NVT}, Tmp1); + Tmp2 = DAG.getNode(ISD::FSINCOS, dl, DAG.getVTList(NVT, NVT), Tmp1, + Node->getFlags()); Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true); - for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++) Results.push_back( DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), Tmp3)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8affa0eaaca78..203e80e36b46d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6952,7 +6952,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); - setValue(&I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0)))); + setValue( + &I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::arithmetic_fence: { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll index 3eeddae35b62b..69cd6ce87b5c6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll @@ -104,3 +104,17 @@ define { fp128, fp128 } @test_sincos_f128(fp128 %a) { %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a) ret { fp128, fp128 } %result } + +define { float, float } @test_sincos_f32_afn(float %a) { + ; CHECK-LABEL: name: test_sincos_f32_afn + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = afn G_FSINCOS [[COPY]] + ; CHECK-NEXT: $s0 = COPY [[FSINCOS]](s32) + ; CHECK-NEXT: $s1 = COPY [[FSINCOS1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0, implicit $s1 + %result = call afn { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll index 841b0b2d66534..2e3c02877dfcf 100644 --- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll +++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mtriple=aarch64-none-linux < %s | FileCheck -check-prefixes=NO-LIBCALL %s define { half, half } @test_sincos_f16(half %a) { ; CHECK-LABEL: test_sincos_f16: @@ -16,6 +17,27 @@ define { half, half } @test_sincos_f16(half %a) { ; CHECK-NEXT: fcvt h1, s1 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f16: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: fcvt s8, h0 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fcvt h9, s0 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: fcvt h1, s1 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ret %result = call { half, half } @llvm.sincos.f16(half %a) ret { half, half } %result } @@ -34,6 +56,17 @@ define half @test_sincos_f16_only_use_sin(half %a) { ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f16_only_use_sin: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: fcvt s0, h0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ret %result = call { half, half } @llvm.sincos.f16(half %a) %result.0 = extractvalue { half, half } %result, 0 ret half %result.0 @@ -53,6 +86,17 @@ define half @test_sincos_f16_only_use_cos(half %a) { ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f16_only_use_cos: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: fcvt s0, h0 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ret %result = call { half, half } @llvm.sincos.f16(half %a) %result.1 = extractvalue { half, half } %result, 1 ret half %result.1 @@ -112,6 +156,83 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f16: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #80 +; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 80 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: .cfi_offset b10, -40 +; NO-LIBCALL-NEXT: .cfi_offset b11, -48 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov h1, v0.h[1] +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fcvt s8, h1 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: fcvt s9, h1 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[2] +; NO-LIBCALL-NEXT: fcvt s10, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s10 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[3] +; NO-LIBCALL-NEXT: fcvt s11, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] +; NO-LIBCALL-NEXT: fmov s0, s11 +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0] +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s10 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] +; NO-LIBCALL-NEXT: fmov s0, s11 +; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: fcvt h2, s1 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0] +; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1 +; NO-LIBCALL-NEXT: add sp, sp, #80 +; NO-LIBCALL-NEXT: ret %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) ret { <2 x half>, <2 x half> } %result } @@ -162,6 +283,47 @@ define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_sin: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #48 +; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov h1, v0.h[1] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fcvt s0, h1 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: fcvt s1, h1 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s1 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h2, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[2] +; NO-LIBCALL-NEXT: fcvt s0, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0] +; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h2, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[3] +; NO-LIBCALL-NEXT: fcvt s0, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0] +; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fcvt h1, s0 +; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0] +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: add sp, sp, #48 +; NO-LIBCALL-NEXT: ret %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0 ret <2 x half> %result.0 @@ -213,6 +375,47 @@ define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_cos: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #48 +; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov h1, v0.h[1] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fcvt s0, h1 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: fcvt s1, h1 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s1 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h2, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[2] +; NO-LIBCALL-NEXT: fcvt s0, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0] +; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h2, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[3] +; NO-LIBCALL-NEXT: fcvt s0, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0] +; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h1, s0 +; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0] +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: add sp, sp, #48 +; NO-LIBCALL-NEXT: ret %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1 ret <2 x half> %result.1 @@ -230,6 +433,25 @@ define { float, float } @test_sincos_f32(float %a) { ; CHECK-NEXT: ldp s1, s0, [sp, #8] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f32: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: fmov s8, s0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fmov s9, s0 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ret %result = call { float, float } @llvm.sincos.f32(float %a) ret { float, float } %result } @@ -246,6 +468,10 @@ define float @test_sincos_f32_only_use_sin(float %a) { ; CHECK-NEXT: ldr s0, [sp, #12] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f32_only_use_sin: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: b sinf %result = call { float, float } @llvm.sincos.f32(float %a) %result.0 = extractvalue { float, float } %result, 0 ret float %result.0 @@ -263,6 +489,10 @@ define float @test_sincos_f32_only_use_cos(float %a) { ; CHECK-NEXT: ldr s0, [sp, #8] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f32_only_use_cos: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: b cosf %result = call { float, float } @llvm.sincos.f32(float %a) %result.1 = extractvalue { float, float } %result, 1 ret float %result.1 @@ -300,6 +530,45 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f32: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #64 +; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64 +; NO-LIBCALL-NEXT: .cfi_offset w30, -8 +; NO-LIBCALL-NEXT: .cfi_offset b8, -16 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov s8, v0.s[1] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: mov v1.s[1], v2.s[0] +; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1 +; NO-LIBCALL-NEXT: add sp, sp, #64 +; NO-LIBCALL-NEXT: ret %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) ret { <2 x float>, <2 x float> } %result } @@ -330,6 +599,29 @@ define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_sin: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #48 +; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: mov s0, v0.s[1] +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: add sp, sp, #48 +; NO-LIBCALL-NEXT: ret %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0 ret <2 x float> %result.0 @@ -361,6 +653,29 @@ define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_cos: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #48 +; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: mov s0, v0.s[1] +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: add sp, sp, #48 +; NO-LIBCALL-NEXT: ret %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1 ret <2 x float> %result.1 @@ -381,6 +696,25 @@ define { double, double } @test_sincos_f64(double %a) { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f64: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: fmov d8, d0 +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: fmov d9, d0 +; NO-LIBCALL-NEXT: fmov d0, d8 +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: fmov d1, d0 +; NO-LIBCALL-NEXT: fmov d0, d9 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ret %result = call { double, double } @llvm.sincos.f64(double %a) ret { double, double } %result } @@ -399,6 +733,10 @@ define double @test_sincos_f64_only_use_sin(double %a) { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f64_only_use_sin: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: b sin %result = call { double, double } @llvm.sincos.f64(double %a) %result.0 = extractvalue { double, double } %result, 0 ret double %result.0 @@ -418,6 +756,10 @@ define double @test_sincos_f64_only_use_cos(double %a) { ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f64_only_use_cos: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: b cos %result = call { double, double } @llvm.sincos.f64(double %a) %result.1 = extractvalue { double, double } %result, 1 ret double %result.1 @@ -453,6 +795,42 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f64: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #64 +; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64 +; NO-LIBCALL-NEXT: .cfi_offset w30, -8 +; NO-LIBCALL-NEXT: .cfi_offset b8, -16 +; NO-LIBCALL-NEXT: mov d8, v0.d[1] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov d0, d8 +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov d0, d8 +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: fmov d1, d0 +; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.d[1], v2.d[0] +; NO-LIBCALL-NEXT: add sp, sp, #64 +; NO-LIBCALL-NEXT: ret %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) ret { <2 x double>, <2 x double> } %result } @@ -481,6 +859,27 @@ define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) { ; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_sin: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #48 +; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: mov d0, v0.d[1] +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0] +; NO-LIBCALL-NEXT: add sp, sp, #48 +; NO-LIBCALL-NEXT: ret %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 ret <2 x double> %result.0 @@ -510,6 +909,27 @@ define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) { ; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_cos: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #48 +; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: mov d0, v0.d[1] +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0] +; NO-LIBCALL-NEXT: add sp, sp, #48 +; NO-LIBCALL-NEXT: ret %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 ret <2 x double> %result.1 From d44e4d90ecbc4b6561469ff29c9fb3762f92b2ef Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 11 Oct 2024 19:40:57 +0000 Subject: [PATCH 5/9] Add SDAG flags test --- llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll new file mode 100644 index 0000000000000..456b7f98974a9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=aarch64-gnu-linux -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +define { float, float } @test_sincos_f32_afn(float %a) { +; CHECK-LABEL: Initial selection DAG: %bb.0 'test_sincos_f32_afn:' +; CHECK-NEXT: SelectionDAG has 9 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: f32,ch = CopyFromReg t0, Register:f32 %0 +; CHECK-NEXT: t3: f32,f32 = fsincos afn t2 +; CHECK-NEXT: t5: ch,glue = CopyToReg t0, Register:f32 $s0, t3 +; CHECK-NEXT: t7: ch,glue = CopyToReg t5, Register:f32 $s1, t3:1, t5:1 +; CHECK-NEXT: t8: ch = AArch64ISD::RET_GLUE t7, Register:f32 $s0, Register:f32 $s1, t7:1 + %result = call afn { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} From 5a1da2573d4a07ea37368ac5e0e98f7d23574673 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 18 Oct 2024 15:15:56 +0000 Subject: [PATCH 6/9] Fixups - Test <3 x float> case and fix unrolling - Fix langref nit - Remove redundant tests --- llvm/docs/LangRef.rst | 6 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +- llvm/test/CodeGen/AArch64/llvm.sincos.ll | 561 +++--------------- llvm/test/CodeGen/ARM/llvm.sincos.ll | 241 -------- 4 files changed, 101 insertions(+), 718 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 268905881128f..076350af7ace0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15914,9 +15914,9 @@ The '``llvm.sincos.*``' intrinsics returns the sine and cosine of the operand. Arguments: """""""""" -The argument is a :ref:`floating-point ` or :ref:`vector ` -of floating-point values. Returns two values matching the argument type in a -struct. +The argument is a :ref:`floating-point ` value or +:ref:`vector ` of floating-point values. Returns two values matching +the argument type in a struct. Semantics: """""""""" diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 55cebc28e4927..3fcf957adc5f0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12534,8 +12534,15 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars1.push_back(EltOp.getValue(1)); } - SDValue Vec0 = getBuildVector(VT, dl, Scalars0); - SDValue Vec1 = getBuildVector(VT1, dl, Scalars1); + for (; i < ResNE; ++i) { + Scalars0.push_back(getUNDEF(EltVT)); + Scalars1.push_back(getUNDEF(EltVT1)); + } + + EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); + EVT VecVT1 = EVT::getVectorVT(*getContext(), EltVT1, ResNE); + SDValue Vec0 = getBuildVector(VecVT, dl, Scalars0); + SDValue Vec1 = getBuildVector(VecVT1, dl, Scalars1); return getMergeValues({Vec0, Vec1}, dl); } diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll index 2e3c02877dfcf..c5efc796e7a3c 100644 --- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll +++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll @@ -237,190 +237,6 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { ret { <2 x half>, <2 x half> } %result } -define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) { -; CHECK-LABEL: test_sincos_v2f16_only_use_sin: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: add x0, sp, #36 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: fcvt s0, h1 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #28 -; CHECK-NEXT: add x1, sp, #24 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: add x1, sp, #40 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #60 -; CHECK-NEXT: add x1, sp, #56 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr s0, [sp, #36] -; CHECK-NEXT: ldr s1, [sp, #28] -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: fcvt h2, s0 -; CHECK-NEXT: fcvt h0, s1 -; CHECK-NEXT: ldr s1, [sp, #44] -; CHECK-NEXT: fcvt h1, s1 -; CHECK-NEXT: mov v0.h[1], v2.h[0] -; CHECK-NEXT: ldr s2, [sp, #60] -; CHECK-NEXT: mov v0.h[2], v1.h[0] -; CHECK-NEXT: fcvt h1, s2 -; CHECK-NEXT: mov v0.h[3], v1.h[0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_sin: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: sub sp, sp, #48 -; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 -; NO-LIBCALL-NEXT: .cfi_offset w30, -16 -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: mov h1, v0.h[1] -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: fcvt s0, h1 -; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: fcvt s1, h1 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: fmov s0, s1 -; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: fcvt h2, s0 -; NO-LIBCALL-NEXT: mov h1, v1.h[2] -; NO-LIBCALL-NEXT: fcvt s0, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0] -; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: fcvt h2, s0 -; NO-LIBCALL-NEXT: mov h1, v1.h[3] -; NO-LIBCALL-NEXT: fcvt s0, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0] -; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: fcvt h1, s0 -; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0] -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; NO-LIBCALL-NEXT: add sp, sp, #48 -; NO-LIBCALL-NEXT: ret - %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) - %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0 - ret <2 x half> %result.0 -} - -define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) { -; CHECK-LABEL: test_sincos_v2f16_only_use_cos: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: add x0, sp, #36 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: fcvt s0, h1 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #28 -; CHECK-NEXT: add x1, sp, #24 -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #44 -; CHECK-NEXT: add x1, sp, #40 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #60 -; CHECK-NEXT: add x1, sp, #56 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcvt s0, h0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr s0, [sp, #32] -; CHECK-NEXT: ldr s1, [sp, #24] -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: fcvt h2, s0 -; CHECK-NEXT: fcvt h0, s1 -; CHECK-NEXT: ldr s1, [sp, #40] -; CHECK-NEXT: fcvt h1, s1 -; CHECK-NEXT: mov v0.h[1], v2.h[0] -; CHECK-NEXT: ldr s2, [sp, #56] -; CHECK-NEXT: mov v0.h[2], v1.h[0] -; CHECK-NEXT: fcvt h1, s2 -; CHECK-NEXT: mov v0.h[3], v1.h[0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_cos: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: sub sp, sp, #48 -; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 -; NO-LIBCALL-NEXT: .cfi_offset w30, -16 -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: mov h1, v0.h[1] -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: fcvt s0, h1 -; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: fcvt h0, s0 -; NO-LIBCALL-NEXT: fcvt s1, h1 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: fmov s0, s1 -; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: fcvt h2, s0 -; NO-LIBCALL-NEXT: mov h1, v1.h[2] -; NO-LIBCALL-NEXT: fcvt s0, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0] -; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: fcvt h2, s0 -; NO-LIBCALL-NEXT: mov h1, v1.h[3] -; NO-LIBCALL-NEXT: fcvt s0, h1 -; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0] -; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: fcvt h1, s0 -; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0] -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; NO-LIBCALL-NEXT: add sp, sp, #48 -; NO-LIBCALL-NEXT: ret - %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) - %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1 - ret <2 x half> %result.1 -} - define { float, float } @test_sincos_f32(float %a) { ; CHECK-LABEL: test_sincos_f32: ; CHECK: // %bb.0: @@ -456,46 +272,101 @@ define { float, float } @test_sincos_f32(float %a) { ret { float, float } %result } -define float @test_sincos_f32_only_use_sin(float %a) { -; CHECK-LABEL: test_sincos_f32_only_use_sin: +define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) { +; CHECK-LABEL: test_sincos_v3f32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #12 -; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: add x0, sp, #20 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr s0, [sp, #12] -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_f32_only_use_sin: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: b sinf - %result = call { float, float } @llvm.sincos.f32(float %a) - %result.0 = extractvalue { float, float } %result, 0 - ret float %result.0 -} - -define float @test_sincos_f32_only_use_cos(float %a) { -; CHECK-LABEL: test_sincos_f32_only_use_cos: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #12 -; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: add x19, sp, #28 +; CHECK-NEXT: add x20, sp, #24 +; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr s0, [sp, #8] -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: add x21, sp, #44 +; CHECK-NEXT: add x22, sp, #40 +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #16] +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v0.s }[1], [x19] +; CHECK-NEXT: ld1 { v1.s }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ld1 { v0.s }[2], [x21] +; CHECK-NEXT: ld1 { v1.s }[2], [x22] +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret ; -; NO-LIBCALL-LABEL: test_sincos_f32_only_use_cos: +; NO-LIBCALL-LABEL: test_sincos_v3f32: ; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: b cosf - %result = call { float, float } @llvm.sincos.f32(float %a) - %result.1 = extractvalue { float, float } %result, 1 - ret float %result.1 +; NO-LIBCALL-NEXT: sub sp, sp, #80 +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 80 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: mov s8, v0.s[1] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov s9, v0.s[2] +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0] +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s2, s0 +; NO-LIBCALL-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload +; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.s[2], v2.s[0] +; NO-LIBCALL-NEXT: add sp, sp, #80 +; NO-LIBCALL-NEXT: ret + %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a) + ret { <3 x float>, <3 x float> } %result } define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { @@ -573,114 +444,6 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { ret { <2 x float>, <2 x float> } %result } -define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) { -; CHECK-LABEL: test_sincos_v2f32_only_use_sin: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: add x0, sp, #28 -; CHECK-NEXT: add x1, sp, #24 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #20 -; CHECK-NEXT: add x1, sp, #16 -; CHECK-NEXT: add x19, sp, #20 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr s0, [sp, #28] -; CHECK-NEXT: ld1 { v0.s }[1], [x19] -; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_sin: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: sub sp, sp, #48 -; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 -; NO-LIBCALL-NEXT: .cfi_offset w30, -16 -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: mov s0, v0.s[1] -; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 -; NO-LIBCALL-NEXT: bl sinf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; NO-LIBCALL-NEXT: add sp, sp, #48 -; NO-LIBCALL-NEXT: ret - %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) - %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0 - ret <2 x float> %result.0 -} - -define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) { -; CHECK-LABEL: test_sincos_v2f32_only_use_cos: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: add x0, sp, #28 -; CHECK-NEXT: add x1, sp, #24 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #20 -; CHECK-NEXT: add x1, sp, #16 -; CHECK-NEXT: add x19, sp, #16 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr s0, [sp, #24] -; CHECK-NEXT: ld1 { v0.s }[1], [x19] -; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_cos: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: sub sp, sp, #48 -; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 -; NO-LIBCALL-NEXT: .cfi_offset w30, -16 -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: mov s0, v0.s[1] -; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 -; NO-LIBCALL-NEXT: bl cosf -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 -; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; NO-LIBCALL-NEXT: add sp, sp, #48 -; NO-LIBCALL-NEXT: ret - %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) - %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1 - ret <2 x float> %result.1 -} - define { double, double } @test_sincos_f64(double %a) { ; CHECK-LABEL: test_sincos_f64: ; CHECK: // %bb.0: @@ -719,52 +482,6 @@ define { double, double } @test_sincos_f64(double %a) { ret { double, double } %result } -define double @test_sincos_f64_only_use_sin(double %a) { -; CHECK-LABEL: test_sincos_f64_only_use_sin: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #24 -; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldr d0, [sp, #24] -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_f64_only_use_sin: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: b sin - %result = call { double, double } @llvm.sincos.f64(double %a) - %result.0 = extractvalue { double, double } %result, 0 - ret double %result.0 -} - -define double @test_sincos_f64_only_use_cos(double %a) { -; CHECK-LABEL: test_sincos_f64_only_use_cos: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #24 -; CHECK-NEXT: add x1, sp, #8 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldr d0, [sp, #8] -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_f64_only_use_cos: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: b cos - %result = call { double, double } @llvm.sincos.f64(double %a) - %result.1 = extractvalue { double, double } %result, 1 - ret double %result.1 -} - define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { ; CHECK-LABEL: test_sincos_v2f64: ; CHECK: // %bb.0: @@ -834,103 +551,3 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) ret { <2 x double>, <2 x double> } %result } - -define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) { -; CHECK-LABEL: test_sincos_v2f64_only_use_sin: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #40 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #24 -; CHECK-NEXT: add x1, sp, #16 -; CHECK-NEXT: add x19, sp, #24 -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldr d0, [sp, #40] -; CHECK-NEXT: ld1 { v0.d }[1], [x19] -; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_sin: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: sub sp, sp, #48 -; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 -; NO-LIBCALL-NEXT: .cfi_offset w30, -16 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: mov d0, v0.d[1] -; NO-LIBCALL-NEXT: bl sin -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; NO-LIBCALL-NEXT: bl sin -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0] -; NO-LIBCALL-NEXT: add sp, sp, #48 -; NO-LIBCALL-NEXT: ret - %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) - %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 - ret <2 x double> %result.0 -} - -define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) { -; CHECK-LABEL: test_sincos_v2f64_only_use_cos: -; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x0, sp, #40 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add x0, sp, #24 -; CHECK-NEXT: add x1, sp, #16 -; CHECK-NEXT: add x19, sp, #16 -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldr d0, [sp, #32] -; CHECK-NEXT: ld1 { v0.d }[1], [x19] -; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -; -; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_cos: -; NO-LIBCALL: // %bb.0: -; NO-LIBCALL-NEXT: sub sp, sp, #48 -; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill -; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48 -; NO-LIBCALL-NEXT: .cfi_offset w30, -16 -; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: mov d0, v0.d[1] -; NO-LIBCALL-NEXT: bl cos -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 -; NO-LIBCALL-NEXT: bl cos -; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 -; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload -; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0] -; NO-LIBCALL-NEXT: add sp, sp, #48 -; NO-LIBCALL-NEXT: ret - %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) - %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 - ret <2 x double> %result.1 -} diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll index b08a642fd3717..9628405df6bcb 100644 --- a/llvm/test/CodeGen/ARM/llvm.sincos.ll +++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll @@ -107,76 +107,6 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { ret { <2 x half>, <2 x half> } %result } -define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) { -; CHECK-LABEL: test_sincos_v2f16_only_use_sin: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bl __gnu_h2f_ieee -; CHECK-NEXT: add r1, sp, #16 -; CHECK-NEXT: add r2, sp, #12 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: bl __gnu_h2f_ieee -; CHECK-NEXT: add r1, sp, #8 -; CHECK-NEXT: add r2, sp, #4 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr r0, [sp, #16] -; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: ldr r1, [sp, #8] -; CHECK-NEXT: strh.w r0, [sp, #22] -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: strh.w r0, [sp, #20] -; CHECK-NEXT: add r0, sp, #20 -; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] -; CHECK-NEXT: vmovl.u16 q8, d16 -; CHECK-NEXT: vmov.32 r0, d16[0] -; CHECK-NEXT: vmov.32 r1, d16[1] -; CHECK-NEXT: add sp, #24 -; CHECK-NEXT: pop {r4, pc} - %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) - %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0 - ret <2 x half> %result.0 -} - -define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) { -; CHECK-LABEL: test_sincos_v2f16_only_use_cos: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bl __gnu_h2f_ieee -; CHECK-NEXT: add r1, sp, #16 -; CHECK-NEXT: add r2, sp, #12 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: bl __gnu_h2f_ieee -; CHECK-NEXT: add r1, sp, #8 -; CHECK-NEXT: add r2, sp, #4 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr r0, [sp, #12] -; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: ldr r1, [sp, #4] -; CHECK-NEXT: strh.w r0, [sp, #22] -; CHECK-NEXT: mov r0, r1 -; CHECK-NEXT: bl __gnu_f2h_ieee -; CHECK-NEXT: strh.w r0, [sp, #20] -; CHECK-NEXT: add r0, sp, #20 -; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] -; CHECK-NEXT: vmovl.u16 q8, d16 -; CHECK-NEXT: vmov.32 r0, d16[0] -; CHECK-NEXT: vmov.32 r1, d16[1] -; CHECK-NEXT: add sp, #24 -; CHECK-NEXT: pop {r4, pc} - %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) - %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1 - ret <2 x half> %result.1 -} - define { float, float } @test_sincos_f32(float %a) { ; CHECK-LABEL: test_sincos_f32: ; CHECK: @ %bb.0: @@ -191,37 +121,6 @@ define { float, float } @test_sincos_f32(float %a) { ret { float, float } %result } -define float @test_sincos_f32_only_use_sin(float %a) { -; CHECK-LABEL: test_sincos_f32_only_use_sin: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: add r1, sp, #4 -; CHECK-NEXT: mov r2, sp -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr r0, [sp, #4] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: pop {r7, pc} - %result = call { float, float } @llvm.sincos.f32(float %a) - %result.0 = extractvalue { float, float } %result, 0 - ret float %result.0 -} - -define float @test_sincos_f32_only_use_cos(float %a) { -; CHECK-LABEL: test_sincos_f32_only_use_cos: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: add r1, sp, #4 -; CHECK-NEXT: mov r2, sp -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: ldr r0, [sp], #8 -; CHECK-NEXT: pop {r7, pc} - %result = call { float, float } @llvm.sincos.f32(float %a) - %result.1 = extractvalue { float, float } %result, 1 - ret float %result.1 -} - define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { ; CHECK-LABEL: test_sincos_v2f32: ; CHECK: @ %bb.0: @@ -250,58 +149,6 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { ret { <2 x float>, <2 x float> } %result } -define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) { -; CHECK-LABEL: test_sincos_v2f32_only_use_sin: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vmov d8, r0, r1 -; CHECK-NEXT: add r1, sp, #4 -; CHECK-NEXT: mov r2, sp -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: add r1, sp, #12 -; CHECK-NEXT: add r2, sp, #8 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: vldr s1, [sp, #4] -; CHECK-NEXT: vldr s0, [sp, #12] -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r7, pc} - %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) - %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0 - ret <2 x float> %result.0 -} - -define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) { -; CHECK-LABEL: test_sincos_v2f32_only_use_cos: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vmov d8, r0, r1 -; CHECK-NEXT: add r1, sp, #4 -; CHECK-NEXT: mov r2, sp -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: add r1, sp, #12 -; CHECK-NEXT: add r2, sp, #8 -; CHECK-NEXT: bl sincosf -; CHECK-NEXT: vldr s1, [sp] -; CHECK-NEXT: vldr s0, [sp, #8] -; CHECK-NEXT: vmov r0, r1, d0 -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r7, pc} - %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) - %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1 - ret <2 x float> %result.1 -} - define { double, double } @test_sincos_f64(double %a) { ; CHECK-LABEL: test_sincos_f64: ; CHECK: @ %bb.0: @@ -317,37 +164,6 @@ define { double, double } @test_sincos_f64(double %a) { ret { double, double } %result } -define double @test_sincos_f64_only_use_sin(double %a) { -; CHECK-LABEL: test_sincos_f64_only_use_sin: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: add r2, sp, #8 -; CHECK-NEXT: mov r3, sp -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldrd r0, r1, [sp, #8] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: pop {r7, pc} - %result = call { double, double } @llvm.sincos.f64(double %a) - %result.0 = extractvalue { double, double } %result, 0 - ret double %result.0 -} - -define double @test_sincos_f64_only_use_cos(double %a) { -; CHECK-LABEL: test_sincos_f64_only_use_cos: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: add r2, sp, #8 -; CHECK-NEXT: mov r3, sp -; CHECK-NEXT: bl sincos -; CHECK-NEXT: ldrd r0, r1, [sp], #16 -; CHECK-NEXT: pop {r7, pc} - %result = call { double, double } @llvm.sincos.f64(double %a) - %result.1 = extractvalue { double, double } %result, 1 - ret double %result.1 -} - define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { ; CHECK-LABEL: test_sincos_v2f64: ; CHECK: @ %bb.0: @@ -376,63 +192,6 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { ret { <2 x double>, <2 x double> } %result } -define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) { -; CHECK-LABEL: test_sincos_v2f64_only_use_sin: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: sub sp, #36 -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: add r2, sp, #24 -; CHECK-NEXT: add r3, sp, #16 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: add r2, sp, #8 -; CHECK-NEXT: mov r3, sp -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: vldr d16, [sp, #24] -; CHECK-NEXT: ldrd r0, r1, [sp, #8] -; CHECK-NEXT: vmov r2, r3, d16 -; CHECK-NEXT: add sp, #36 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} - %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) - %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0 - ret <2 x double> %result.0 -} - -define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) { -; CHECK-LABEL: test_sincos_v2f64_only_use_cos: -; CHECK: @ %bb.0: -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: sub sp, #36 -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: mov r4, r2 -; CHECK-NEXT: add r2, sp, #24 -; CHECK-NEXT: add r3, sp, #16 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: add r2, sp, #8 -; CHECK-NEXT: mov r3, sp -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl sincos -; CHECK-NEXT: vldr d16, [sp, #16] -; CHECK-NEXT: vmov r2, r3, d16 -; CHECK-NEXT: ldrd r0, r1, [sp], #36 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} - %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) - %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1 - ret <2 x double> %result.1 -} - define { fp128, fp128 } @test_sincos_f128(fp128 %a) { ; CHECK-LABEL: test_sincos_f128: ; CHECK: @ %bb.0: From d8ca29309b2f48bdfee65935cf6972b83c3ab472 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 21 Oct 2024 11:34:09 +0000 Subject: [PATCH 7/9] Add and use `WidenVecRes_UnaryOpWithTwoResults` for SINCOS + FREXPR --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 46 ++++++-- llvm/test/CodeGen/AArch64/llvm.frexp.ll | 101 ++++++++++++++++++ 3 files changed, 143 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/llvm.frexp.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 33befb8d4ac0e..c7e0bd86795f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1072,7 +1072,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); - SDValue WidenVecRes_FSINCOS(SDNode *N); + SDValue WidenVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo); + void ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode, + unsigned WidenResNo); // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 65c9bb64b3cc1..18480327217e1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4435,6 +4435,22 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) { // Result Vector Widening //===----------------------------------------------------------------------===// +void DAGTypeLegalizer::ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode, + unsigned WidenResNo) { + assert(N->getNumValues() == 2 && "expected node with two results"); + unsigned OtherNo = 1 - WidenResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) { + SetWidenedVector(SDValue(N, OtherNo), SDValue(WidenNode, OtherNo)); + } else { + SDLoc DL(N); + SDValue OtherVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OtherVT, + SDValue(WidenNode, OtherNo), + DAG.getVectorIdxConstant(0, DL)); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } +} + void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG)); @@ -4454,6 +4470,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) && TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + if (N->getNumValues() == 2) + ReplaceOtherWidenResult(N, Res.getNode(), ResNo); return true; } return false; @@ -4758,12 +4776,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FSHR: Res = WidenVecRes_Ternary(N); break; + case ISD::FFREXP: case ISD::FSINCOS: { if (!unrollExpandedOp()) - Res = WidenVecRes_FSINCOS(N); - for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) - SetWidenedVector(SDValue(N, ResNum), Res.getValue(ResNum)); - Res = SDValue(); + Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo); break; } } @@ -5514,10 +5530,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } -SDValue DAGTypeLegalizer::WidenVecRes_FSINCOS(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); +SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N, + unsigned ResNo) { + LLVMContext &Ctx = *DAG.getContext(); SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT, WidenVT}, InOp); + + EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo)); + ElementCount WidenEC = WidenVT.getVectorElementCount(); + + EVT VT0 = N->getValueType(0); + EVT VT1 = N->getValueType(1); + + EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC); + EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC); + + SDNode *WidenNode = + DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp) + .getNode(); + + ReplaceOtherWidenResult(N, WidenNode, ResNo); + return SDValue(WidenNode, ResNo); } SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll new file mode 100644 index 0000000000000..e4cb8ed6eaf90 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s + +define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { +; CHECK-LABEL: test_frexp_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x19, sp, #36 +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: add x0, sp, #32 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: mov h1, v1.h[2] +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.h[1], v1.h[0] +; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: mov h1, v1.h[3] +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov v1.h[2], v2.h[0] +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: ldr s1, [sp, #32] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ld1 { v1.s }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: mov v0.h[3], v2.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + ret { <2 x half>, <2 x i32> } %result +} + +define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) { +; CHECK-LABEL: test_frexp_v3f16_v3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: add x0, sp, #56 +; CHECK-NEXT: add x19, sp, #56 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: add x20, sp, #60 +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr s1, [sp, #44] +; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v1.s }[1], [x19] +; CHECK-NEXT: mov v2.s[2], v0.s[0] +; CHECK-NEXT: ld1 { v1.s }[2], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret + %result = call { <3 x float>, <3 x i32> } @llvm.frexp.v3float.v3i32(<3 x float> %a) + ret { <3 x float>, <3 x i32> } %result +} From 92037f8db2b70f4ad6e5648ed88d6f726086501b Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 28 Oct 2024 12:01:25 +0000 Subject: [PATCH 8/9] Fixups --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 43 +++++++++++-------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c7e0bd86795f6..8d3458aaab9f8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1073,8 +1073,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); SDValue WidenVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo); - void ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode, - unsigned WidenResNo); + void ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode, + unsigned WidenResNo); // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 18480327217e1..5409ae7d9671c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4435,19 +4435,22 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) { // Result Vector Widening //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode, - unsigned WidenResNo) { - assert(N->getNumValues() == 2 && "expected node with two results"); - unsigned OtherNo = 1 - WidenResNo; - EVT OtherVT = N->getValueType(OtherNo); - if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) { - SetWidenedVector(SDValue(N, OtherNo), SDValue(WidenNode, OtherNo)); - } else { - SDLoc DL(N); - SDValue OtherVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OtherVT, - SDValue(WidenNode, OtherNo), +void DAGTypeLegalizer::ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode, + unsigned WidenResNo) { + unsigned NumResults = N->getNumValues(); + for (unsigned ResNo = 0; ResNo < NumResults; ResNo++) { + if (ResNo == WidenResNo) + continue; + EVT ResVT = N->getValueType(ResNo); + if (getTypeAction(ResVT) == TargetLowering::TypeWidenVector) { + SetWidenedVector(SDValue(N, ResNo), SDValue(WidenNode, ResNo)); + } else { + SDLoc DL(N); + SDValue ResVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, + SDValue(WidenNode, ResNo), DAG.getVectorIdxConstant(0, DL)); - ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + ReplaceValueWith(SDValue(N, ResNo), ResVal); + } } } @@ -4470,8 +4473,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) && TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); - if (N->getNumValues() == 2) - ReplaceOtherWidenResult(N, Res.getNode(), ResNo); + if (N->getNumValues() > 1) + ReplaceOtherWidenResults(N, Res.getNode(), ResNo); return true; } return false; @@ -5532,15 +5535,19 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo) { + EVT VT0 = N->getValueType(0); + EVT VT1 = N->getValueType(1); + + assert(VT0.isVector() && VT1.isVector() && + VT0.getVectorElementCount() == VT1.getVectorElementCount() && + "expected both results to be vectors of matching element count"); + LLVMContext &Ctx = *DAG.getContext(); SDValue InOp = GetWidenedVector(N->getOperand(0)); EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo)); ElementCount WidenEC = WidenVT.getVectorElementCount(); - EVT VT0 = N->getValueType(0); - EVT VT1 = N->getValueType(1); - EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC); EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC); @@ -5548,7 +5555,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N, DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp) .getNode(); - ReplaceOtherWidenResult(N, WidenNode, ResNo); + ReplaceOtherWidenResults(N, WidenNode, ResNo); return SDValue(WidenNode, ResNo); } From 87c96573c879497218abdf3ef28d98c37c6de419 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 28 Oct 2024 15:22:07 +0000 Subject: [PATCH 9/9] Style fixups --- .../SelectionDAG/LegalizeFloatTypes.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 9009c8ab4fc9a..fa2731ff7dbda 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -801,15 +801,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) { auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL, /*Chain=*/SDValue()); - unsigned ResNo = 0; - for (SDValue OutPtr : {StackSlotSin, StackSlotCos}) { - int FrameIdx = cast(OutPtr)->getIndex(); + + auto CreateStackLoad = [&, Chain = Chain](SDValue StackSlot) { + int FrameIdx = cast(StackSlot)->getIndex(); auto PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); - - SDValue LoadExp = DAG.getLoad(NVT, DL, Chain, OutPtr, PtrInfo); - SetSoftenedFloat(SDValue(N, ResNo++), LoadExp); - } + return DAG.getLoad(NVT, DL, Chain, StackSlot, PtrInfo); + }; + SetSoftenedFloat(SDValue(N, 0), CreateStackLoad(StackSlotSin)); + SetSoftenedFloat(SDValue(N, 1), CreateStackLoad(StackSlotCos)); return SDValue(); } @@ -2949,8 +2949,10 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FSINCOS(SDNode *N) { SDValue Op = GetPromotedFloat(N->getOperand(0)); SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, NVT}, Op); - for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + for (unsigned ResNum = 0, NumValues = N->getNumValues(); ResNum < NumValues; + ++ResNum) { SetPromotedFloat(SDValue(N, ResNum), Res.getValue(ResNum)); + } return SDValue(); } @@ -3376,7 +3378,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FSINCOS(SDNode *N) { // Convert back to FP16 as an integer. ISD::NodeType Truncate = GetPromotionOpcode(NVT, OVT); - for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) { + for (unsigned ResNum = 0, NumValues = N->getNumValues(); ResNum < NumValues; + ++ResNum) { SDValue Trunc = DAG.getNode(Truncate, dl, MVT::i16, Res.getValue(ResNum)); SetSoftPromotedHalf(SDValue(N, ResNum), Trunc); }