diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 051fd28c57759..e8ce113b0a97b 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -373,6 +373,10 @@ CODEGENOPT(VirtualFunctionElimination, 1, 0) ///< Whether to apply the dead /// virtual function elimination /// optimization. +/// Whether accuracy levels for math library functions are requested by the +/// user. These accuracy levels will then be expressed in terms of ULPs. +CODEGENOPT(FPAccuracy, 1, 0) + /// Whether to use public LTO visibility for entities in std and stdext /// namespaces. This is enabled by clang-cl's /MT and /MTd flags. CODEGENOPT(LTOVisibilityPublicStd, 1, 0) diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index eb1649cc238a5..1e2d7a1c83b33 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -301,6 +301,9 @@ def warn_stack_clash_protection_inline_asm : Warning< def warn_slh_does_not_support_asm_goto : Warning< "speculative load hardening does not protect functions with asm goto">, InGroup>; + +def err_drv_incompatible_options : Error< + "the combination of '%0' and '%1' is incompatible">; } // Sema && Serialization diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index e2c54549d495d..8f65523effeb5 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -63,6 +63,10 @@ def err_drv_no_cuda_libdevice : Error< "via '--cuda-path', or pass '-nocudalib' to build without linking with " "libdevice">; +def warn_function_fp_accuracy_already_set : Warning < + "floating point accuracy value of '%0' has already been assigned to " + "function '%1'">, + InGroup>; def err_drv_no_rocm_device_lib : Error< "cannot find ROCm device library%select{| for %1|for ABI version %1}0; provide its path via " "'--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build " @@ -141,8 +145,9 @@ def err_drv_invalid_unwindlib_name : Error< "invalid unwind library name in argument '%0'">; def err_drv_incompatible_unwindlib : Error< "--rtlib=libgcc requires --unwindlib=libgcc">; -def err_drv_incompatible_options : Error< - "the combination of '%0' and '%1' is incompatible">; +def err_drv_incompatible_fp_accuracy_options : Error< + "floating point accuracy requirements cannot be guaranteed when '-fmath-errno' " + "is enabled; use '-fno-math-errno' to enable floating point accuracy control">; def err_drv_invalid_stdlib_name : Error< "invalid library name in argument '%0'">; def err_drv_invalid_output_with_multiple_archs : Error< diff --git a/clang/include/clang/Basic/FPOptions.def b/clang/include/clang/Basic/FPOptions.def index 4517be6f178d2..29cf787b4f876 100644 --- a/clang/include/clang/Basic/FPOptions.def +++ b/clang/include/clang/Basic/FPOptions.def @@ -26,5 +26,6 @@ OPTION(AllowReciprocal, bool, 1, NoSignedZero) OPTION(AllowApproxFunc, bool, 1, AllowReciprocal) OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 2, AllowApproxFunc) OPTION(Float16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod) -OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod) +OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, Float16ExcessPrecision) +OPTION(FPAccuracy, LangOptions::FPAccuracyKind, 3, BFloat16ExcessPrecision) #undef OPTION diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 6bdffb6abc32f..8d3b9a64eb126 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -338,6 +338,7 @@ BENIGN_ENUM_LANGOPT(FPExceptionMode, FPExceptionModeKind, 2, FPE_Default, "FP Ex BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "FP type used for floating point arithmetic") ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for Float16 arithmetic") ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for BFloat16 arithmetic") +BENIGN_ENUM_LANGOPT(FPAccuracy, FPAccuracyKind, 3, FPA_Default, "Accuracy for floating point operations and library functions") LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment") LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility") LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting") diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index e72dee31f7a0d..04ec54cc43973 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -303,6 +303,15 @@ class LangOptions : public LangOptionsBase { enum ExcessPrecisionKind { FPP_Standard, FPP_Fast, FPP_None }; + enum FPAccuracyKind { + FPA_Default, + FPA_High, + FPA_Medium, + FPA_Low, + FPA_Sycl, + FPA_Cuda, + }; + /// Possible exception handling behavior. enum class ExceptionHandlingKind { None, SjLj, WinEH, DwarfCFI, Wasm }; @@ -509,6 +518,10 @@ class LangOptions : public LangOptionsBase { /// records. std::string OptRecordFile; + std::string FPAccuracyVal; + using FPAccuracyFuncMapTy = std::map; + FPAccuracyFuncMapTy FPAccuracyFuncMap; + LangOptions(); /// Set language defaults for the given input language and diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 2b4546ec3b911..4eb31fc50e656 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1729,6 +1729,13 @@ def ffp_exception_behavior_EQ : Joined<["-"], "ffp-exception-behavior=">, Group< Values<"ignore,maytrap,strict">, NormalizedValuesScope<"LangOptions">, NormalizedValues<["FPE_Ignore", "FPE_MayTrap", "FPE_Strict"]>, MarshallingInfoEnum, "FPE_Default">; +def ffp_accuracy_EQ : Joined<["-"], "ffp-accuracy=">, Group, Flags<[CC1Option]>, + HelpText<"Specifies the required accuracy for floating-point operations and library calls.">, + Values<"default,high,medium,low,sycl,cuda">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["FPA_Default", "FPA_High", "FPA_Medium", "FPA_Low", "FPA_Sycl", "FPA_Cuda"]>, + MarshallingInfoEnum, "FPA_Default">; +def ffp_builtin_accuracy_EQ : Joined<["-"], "ffp-builtin-accuracy=">, Group, Flags<[CC1Option]>; + defm fast_math : BoolFOption<"fast-math", LangOpts<"FastMath">, DefaultFalse, PosFlag : Option<["/", "-"], name, // (We don't put any of these in cl_compile_Group as the options they alias are // already in the right group.) +// INTEL_CUSTOMIZATION +def _SLASH_Qfp_accuracy_EQ : CLJoined<"Qfp-accuracy=">, + Alias; +def _SLASH_Qfp_accuracy_COL : CLJoined<"Qfp-accuracy:">, + Alias,HelpText<"Specifies the required accuracy for " + "floating-point operations and library calls.">; +// END INTEL_CUSTOMIZATION + def _SLASH_Brepro : CLFlag<"Brepro">, HelpText<"Do not write current time into COFF output (breaks link.exe /incremental)">, Alias; diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index 1dbd1eda62b3f..ee147022e5c62 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -276,6 +276,9 @@ class CompilerInvocation : public CompilerInvocationRefBase, std::vector &Includes, DiagnosticsEngine &Diags); + static void ParseFpAccuracyArgs(LangOptions &Opts, llvm::opt::ArgList &Args, + DiagnosticsEngine &Diags); + /// Generate command line options from LangOptions. static void GenerateLangArgs(const LangOptions &Opts, SmallVectorImpl &Args, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 64e0751ac4f1b..38163c7946488 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -35,6 +35,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/FPAccuracy.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" @@ -504,18 +505,69 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { return Store; } +static CallInst *CreateBuiltinCallWithAttr(CodeGenFunction &CGF, StringRef Name, + llvm::Function *FPBuiltinF, + ArrayRef Args, + unsigned ID) { + llvm::CallInst *CI = CGF.Builder.CreateCall(FPBuiltinF, Args); + // TODO: Replace AttrList with a single attribute. The call can only have a + // single FPAccuracy attribute. + llvm::AttributeList AttrList; + // sincos() doesn't return a value, but it still has a type associated with + // it that corresponds to the operand type. + CGF.CGM.getFPAccuracyFuncAttributes( + Name, AttrList, ID, + Name == "sincos" ? Args[0]->getType() : FPBuiltinF->getReturnType()); + CI->setAttributes(AttrList); + return CI; +} + +static Function *getIntrinsic(CodeGenFunction &CGF, llvm::Value *Src0, + unsigned FPIntrinsicID, unsigned IntrinsicID, + bool HasAccuracyRequirement) { + return HasAccuracyRequirement + ? CGF.CGM.getIntrinsic(FPIntrinsicID, Src0->getType()) + : CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); +} + +static bool hasAccuracyRequirement(CodeGenFunction &CGF, StringRef Name) { + if (!CGF.getLangOpts().FPAccuracyVal.empty()) + return true; + auto FuncMapIt = CGF.getLangOpts().FPAccuracyFuncMap.find(Name.str()); + return FuncMapIt != CGF.getLangOpts().FPAccuracyFuncMap.end(); +} + // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. Depending on mode, this may be a constrained -// floating-point intrinsic. -static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, - const CallExpr *E, unsigned IntrinsicID, - unsigned ConstrainedIntrinsicID) { +// or an fpbuiltin floating-point intrinsic. +static Value *emitUnaryMaybeConstrainedFPBuiltin( + CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, + unsigned ConstrainedIntrinsicID, + unsigned FPAccuracyIntrinsicID = Intrinsic::not_intrinsic) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); - + if (FPAccuracyIntrinsicID != Intrinsic::not_intrinsic) { + if (CGF.CGM.getCodeGenOpts().FPAccuracy) { + if (CGF.getLangOpts().MathErrno) { + DiagnosticsEngine &Diags = CGF.CGM.getDiags(); + Diags.Report(E->getBeginLoc(), diag::err_drv_incompatible_options) + << "-ffp-accuracy" + << "-fmath-errno"; + } else { + StringRef Name = + CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID()); + // Use fpbuiltin intrinsic only when needed. + Function *Func = + getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID, + hasAccuracyRequirement(CGF, Name)); + return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0}, + FPAccuracyIntrinsicID); + } + } + } if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); - return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); + return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, Src0); @@ -524,12 +576,21 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, // Emit an intrinsic that has 2 operands of the same type as its result. // Depending on mode, this may be a constrained floating-point intrinsic. -static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, - const CallExpr *E, unsigned IntrinsicID, - unsigned ConstrainedIntrinsicID) { +static Value *emitBinaryMaybeConstrainedFPBuiltin( + CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, + unsigned ConstrainedIntrinsicID, + unsigned FPAccuracyIntrinsicID = Intrinsic::not_intrinsic) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); - + if (CGF.CGM.getCodeGenOpts().FPAccuracy) { + StringRef Name = + CGF.CGM.getContext().BuiltinInfo.getName(CGF.getCurrentBuiltinID()); + // Use fpbuiltin intrinsic only when needed. + Function *Func = getIntrinsic(CGF, Src0, FPAccuracyIntrinsicID, IntrinsicID, + hasAccuracyRequirement(CGF, Name)); + return CreateBuiltinCallWithAttr(CGF, Name, Func, {Src0, Src1}, + FPAccuracyIntrinsicID); + } if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); @@ -2231,6 +2292,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Result.Val.getFloat())); } + CurrentBuiltinIDRAII CB(*this, BuiltinID); + // If current long-double semantics is IEEE 128-bit, replace math builtins // of long-double with f128 equivalent. // TODO: This mutation should also be applied to other targets other than PPC, @@ -2291,9 +2354,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: case Builtin::BI__builtin_cosf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::cos, - Intrinsic::experimental_constrained_cos)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::cos, Intrinsic::experimental_constrained_cos, + Intrinsic::fpbuiltin_cos)); case Builtin::BIexp: case Builtin::BIexpf: @@ -2303,9 +2366,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: case Builtin::BI__builtin_expf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::exp, - Intrinsic::experimental_constrained_exp)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::exp, Intrinsic::experimental_constrained_exp, + Intrinsic::fpbuiltin_exp)); case Builtin::BIexp2: case Builtin::BIexp2f: @@ -2315,9 +2378,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: case Builtin::BI__builtin_exp2f128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::exp2, - Intrinsic::experimental_constrained_exp2)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2, + Intrinsic::fpbuiltin_exp2)); case Builtin::BIfabs: case Builtin::BIfabsf: @@ -2401,9 +2464,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: case Builtin::BI__builtin_logf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::log, - Intrinsic::experimental_constrained_log)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::log, Intrinsic::experimental_constrained_log, + Intrinsic::fpbuiltin_log)); case Builtin::BIlog10: case Builtin::BIlog10f: @@ -2413,9 +2476,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: case Builtin::BI__builtin_log10f128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::log10, - Intrinsic::experimental_constrained_log10)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::log10, Intrinsic::experimental_constrained_log10, + Intrinsic::fpbuiltin_log10)); case Builtin::BIlog2: case Builtin::BIlog2f: @@ -2425,9 +2488,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: case Builtin::BI__builtin_log2f128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::log2, - Intrinsic::experimental_constrained_log2)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::log2, Intrinsic::experimental_constrained_log2, + Intrinsic::fpbuiltin_log2)); case Builtin::BInearbyint: case Builtin::BInearbyintf: @@ -2448,9 +2511,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: case Builtin::BI__builtin_powf128: - return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::pow, - Intrinsic::experimental_constrained_pow)); + return RValue::get(emitBinaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::pow, Intrinsic::experimental_constrained_pow, + Intrinsic::fpbuiltin_pow)); case Builtin::BIrint: case Builtin::BIrintf: @@ -2496,9 +2559,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: case Builtin::BI__builtin_sinf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::sin, - Intrinsic::experimental_constrained_sin)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::sin, Intrinsic::experimental_constrained_sin, + Intrinsic::fpbuiltin_sin)); case Builtin::BIsqrt: case Builtin::BIsqrtf: @@ -2508,9 +2571,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: case Builtin::BI__builtin_sqrtf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::sqrt, - Intrinsic::experimental_constrained_sqrt)); + return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( + *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt, + Intrinsic::fpbuiltin_sqrt)); case Builtin::BItrunc: case Builtin::BItruncf: @@ -22062,6 +22125,115 @@ RValue CodeGenFunction::EmitIntelFPGAMemBuiltin(const CallExpr *E) { return RValue::get(Ann); } +llvm::CallInst *CodeGenFunction::EmitFPBuiltinIndirectCall( + llvm::FunctionType *IRFuncTy, const SmallVectorImpl &IRArgs, + llvm::Value *FnPtr, const FunctionDecl *FD) { + llvm::Function *Func; + unsigned FPAccuracyIntrinsicID = 0; + StringRef Name; + if (CurrentBuiltinID == 0) { + // Even if the current function doesn't have a clang builtin, create + // an 'fpbuiltin-max-error' attribute for it; unless it's marked with + // an NoBuiltin attribute. + if (!FD->hasAttr()) { + Name = FD->getName(); + FPAccuracyIntrinsicID = + llvm::StringSwitch(Name) + .Case("fadd", llvm::Intrinsic::fpbuiltin_fadd) + .Case("fdiv", llvm::Intrinsic::fpbuiltin_fdiv) + .Case("fmul", llvm::Intrinsic::fpbuiltin_fmul) + .Case("fsub", llvm::Intrinsic::fpbuiltin_fsub) + .Case("frem", llvm::Intrinsic::fpbuiltin_frem) + .Case("sincos", llvm::Intrinsic::fpbuiltin_sincos) + .Case("exp10", llvm::Intrinsic::fpbuiltin_exp10) + .Case("rsqrt", llvm::Intrinsic::fpbuiltin_rsqrt); + } else { + return nullptr; + } + } else { + // The function has a clang builtin. Create an attribute for it + // only if it has an fpbuiltin intrinsic. + unsigned BuiltinID = getCurrentBuiltinID(); + Name = CGM.getContext().BuiltinInfo.getName(BuiltinID); + switch (BuiltinID) { + default: + // If the function has a clang builtin but doesn't have an + // fpbuiltin, it will be generated with no 'fpbuiltin-max-error' + // attribute. + return nullptr; + case Builtin::BItan: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_tan; + break; + case Builtin::BItanh: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_tanh; + break; + case Builtin::BIlog2: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_log2; + break; + case Builtin::BIlog1p: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_log1p; + break; + case Builtin::BIcos: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_cos; + break; + case Builtin::BIcosh: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_cosh; + break; + case Builtin::BIacos: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_acos; + break; + case Builtin::BIacosh: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_acosh; + break; + case Builtin::BIsin: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_sin; + break; + case Builtin::BIsinh: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_sinh; + break; + case Builtin::BIasin: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_asin; + break; + case Builtin::BIasinh: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_asinh; + break; + case Builtin::BIatan: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_atan; + break; + case Builtin::BIatanh: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_atanh; + break; + case Builtin::BIatan2: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_atan2; + break; + case Builtin::BIerf: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_erf; + break; + case Builtin::BIerfc: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_erfc; + break; + case Builtin::BIexp: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_exp; + break; + case Builtin::BIexp2: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_exp2; + break; + case Builtin::BIexpm1: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_expm1; + break; + case Builtin::BIhypot: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_hypot; + break; + case Builtin::BIldexp: + FPAccuracyIntrinsicID = Intrinsic::fpbuiltin_ldexp; + break; + } + } + Func = CGM.getIntrinsic(FPAccuracyIntrinsicID, IRArgs[0]->getType()); + return CreateBuiltinCallWithAttr(*this, Name, Func, ArrayRef(IRArgs), + FPAccuracyIntrinsicID); +} + Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 15f887081e85e..c0adfce62d233 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/FPAccuracy.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -1836,6 +1837,44 @@ static bool HasStrictReturn(const CodeGenModule &Module, QualType RetTy, Module.getLangOpts().Sanitize.has(SanitizerKind::Return); } +static llvm::fp::FPAccuracy convertFPAccuracy(StringRef FPAccuracyStr) { + return llvm::StringSwitch(FPAccuracyStr) + .Case("high", llvm::fp::FPAccuracy::High) + .Case("medium", llvm::fp::FPAccuracy::Medium) + .Case("low", llvm::fp::FPAccuracy::Low) + .Case("sycl", llvm::fp::FPAccuracy::SYCL) + .Case("cuda", llvm::fp::FPAccuracy::CUDA); +} + +void CodeGenModule::getDefaultFunctionFPAccuracyAttributes( + StringRef Name, llvm::AttrBuilder &FuncAttrs, unsigned ID, + const llvm::Type *FuncType) { + // Priority is given to to the accuracy specific to the function. + // So, if the command line is something like this: + // 'clang -fp-accuracy = high -fp-accuracy = low:[sin]'. + // This means, all library functions will have the accuracy 'high' + // except 'sin', which should have an accuracy value of 'low'. + // To ensure that, first check if Name has a required accuracy by visiting + // the 'FPAccuracyFuncMap'; if no accuracy is mapped to Name (FuncAttrs + // is empty), then set its accuracy from the TU's accuracy value. + if (!getLangOpts().FPAccuracyFuncMap.empty()) { + auto FuncMapIt = getLangOpts().FPAccuracyFuncMap.find(Name.str()); + if (FuncMapIt != getLangOpts().FPAccuracyFuncMap.end()) { + StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( + ID, FuncType, convertFPAccuracy(FuncMapIt->second)); + assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected"); + FuncAttrs.addAttribute("fpbuiltin-max-error=", FPAccuracyVal); + } + } + if (FuncAttrs.attrs().size() == 0) + if (!getLangOpts().FPAccuracyVal.empty()) { + StringRef FPAccuracyVal = llvm::fp::getAccuracyForFPBuiltin( + ID, FuncType, convertFPAccuracy(getLangOpts().FPAccuracyVal)); + assert(!FPAccuracyVal.empty() && "A valid accuracy value is expected"); + FuncAttrs.addAttribute("fpbuiltin-max-error=", FPAccuracyVal); + } +} + /// Add denormal-fp-math and denormal-fp-math-f32 as appropriate for the /// requested denormal behavior, accounting for the overriding behavior of the /// -f32 case. @@ -5581,6 +5620,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Emit the actual call/invoke instruction. llvm::CallBase *CI; if (!InvokeDest) { + if (CGM.getCodeGenOpts().FPAccuracy) { + const auto *FD = dyn_cast_if_present(TargetDecl); + assert(FD && "expecting a function"); + CI = EmitFPBuiltinIndirectCall(IRFuncTy, IRCallArgs, CalleePtr, FD); + if (CI) + return RValue::get(CI); + } CI = Builder.CreateCall(IRFuncTy, CalleePtr, IRCallArgs, BundleList); } else { llvm::BasicBlock *Cont = createBasicBlock("invoke.cont"); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 2184a8401b21c..58709193b374a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1597,6 +1597,28 @@ class CodeGenFunction : public CodeGenTypeCache { SourceLocation LastStopPoint; public: + /// Class to manage the BuiltinID for the current builtin expression during + /// processing in EmitBuiltinExpr. + class CurrentBuiltinIDRAII { + CodeGenFunction &CGF; + unsigned SavedBuiltinID; + + public: + CurrentBuiltinIDRAII(CodeGenFunction &CGF, unsigned BuiltinID) + : CGF(CGF), SavedBuiltinID(CGF.CurrentBuiltinID) { + CGF.CurrentBuiltinID = BuiltinID; + } + ~CurrentBuiltinIDRAII() { CGF.CurrentBuiltinID = SavedBuiltinID; } + }; + +private: + unsigned CurrentBuiltinID = /*NotBuiltin*/ 0; + +public: + unsigned getCurrentBuiltinID() const { + assert(CurrentBuiltinID != /*NotBuiltin*/ 0); + return CurrentBuiltinID; + } /// Source location information about the default argument or member /// initializer expression we're evaluating, if any. CurrentSourceLocExprScope CurSourceLocExprScope; @@ -4289,6 +4311,11 @@ class CodeGenFunction : public CodeGenTypeCache { ReturnValueSlot ReturnValue); RValue EmitIntelFPGAMemBuiltin(const CallExpr *E); + llvm::CallInst * + EmitFPBuiltinIndirectCall(llvm::FunctionType *IRFuncTy, + const SmallVectorImpl &IRArgs, + llvm::Value *FnPtr, const FunctionDecl *FD); + enum class MSVCIntrin; llvm::Value *EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 4751adeb529c4..6178ce2840f79 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -7881,3 +7881,13 @@ void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx); } + +void CodeGenModule::getFPAccuracyFuncAttributes(StringRef Name, + llvm::AttributeList &AttrList, + unsigned ID, + const llvm::Type *FuncType) { + llvm::AttrBuilder FuncAttrs(getLLVMContext()); + getDefaultFunctionFPAccuracyAttributes(Name, FuncAttrs, ID, FuncType); + AttrList = llvm::AttributeList::get( + getLLVMContext(), llvm::AttributeList::FunctionIndex, FuncAttrs); +} diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index 2ccacf40ac56b..5d1521da2da63 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1593,6 +1593,10 @@ class CodeGenModule : public CodeGenTypeCache { /// because we'll lose all important information after each repl. void moveLazyEmissionStates(CodeGenModule *NewBuilder); + void getFPAccuracyFuncAttributes(StringRef Name, + llvm::AttributeList &AttrList, unsigned ID, + const llvm::Type *FuncType); + private: llvm::Constant *GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl D, bool ForVTable, @@ -1787,6 +1791,11 @@ class CodeGenModule : public CodeGenTypeCache { bool AttrOnCallSite, llvm::AttrBuilder &FuncAttrs); + void getDefaultFunctionFPAccuracyAttributes(StringRef Name, + llvm::AttrBuilder &FuncAttrs, + unsigned ID, + const llvm::Type *FuncType); + llvm::Metadata *CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, StringRef Suffix); }; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 1acb5cfe6c016..1cab8ab2ee7c6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2886,6 +2886,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, bool StrictFPModel = false; StringRef Float16ExcessPrecision = ""; StringRef BFloat16ExcessPrecision = ""; + StringRef FPAccuracy = ""; if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); @@ -2898,13 +2899,20 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, switch (optID) { default: break; + case options::OPT_ffp_accuracy_EQ: { + StringRef Val = A->getValue(); + FPAccuracy = Val; + break; + } case options::OPT_ffp_model_EQ: { // If -ffp-model= is seen, reset to fno-fast-math HonorINFs = true; HonorNaNs = true; ApproxFunc = false; - // Turning *off* -ffast-math restores the toolchain default. - MathErrno = TC.IsMathErrnoDefault(); + // Turning *off* -ffast-math restores the toolchain default, + // unless -fp-accuracy is used. + if (FPAccuracy.empty()) + MathErrno = TC.IsMathErrnoDefault(); AssociativeMath = false; ReciprocalMath = false; SignedZeros = true; @@ -3173,8 +3181,9 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, HonorNaNs = true; // Turning on -ffast-math (with either flag) removes the need for // MathErrno. However, turning *off* -ffast-math merely restores the - // toolchain default (which may be false). - MathErrno = TC.IsMathErrnoDefault(); + // toolchain default (which may be false), unless -fp-accuracy is used. + if (FPAccuracy.empty()) + MathErrno = TC.IsMathErrnoDefault(); AssociativeMath = false; ReciprocalMath = false; ApproxFunc = false; @@ -6057,6 +6066,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, << A->getAsString(Args) << TripleStr; } + std::string FpAccuracyAttr; + auto RenderFPAccuracyOptions = [&FpAccuracyAttr](const Twine &OptStr) { + // In case the value is 'default' don't add the -ffp-builtin-accuracy + // attribute. + if (OptStr.str() != "default") { + if (FpAccuracyAttr.empty()) + FpAccuracyAttr = "-ffp-builtin-accuracy="; + else + FpAccuracyAttr += " "; + FpAccuracyAttr += OptStr.str(); + } + }; + for (StringRef A : Args.getAllArgValues(options::OPT_ffp_accuracy_EQ)) + RenderFPAccuracyOptions(A); + if (!FpAccuracyAttr.empty()) + CmdArgs.push_back(Args.MakeArgString(FpAccuracyAttr)); + // Decide whether to use verbose asm. Verbose assembly is the default on // toolchains which have the integrated assembler on by default. bool IsIntegratedAssemblerDefault = TC.IsIntegratedAssemblerDefault(); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 330169ca22efd..03874178ca357 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2034,6 +2034,9 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, } } + if (Args.getLastArg(options::OPT_ffp_builtin_accuracy_EQ)) + Opts.FPAccuracy = 1; + if (auto *arg = Args.getLastArg(options::OPT_fdiagnostics_misexpect_tolerance_EQ)) { auto ResultOrErr = parseToleranceOption(arg->getValue()); @@ -3328,6 +3331,13 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, #include "clang/Driver/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING + if (!Opts.FPAccuracyVal.empty()) + GenerateArg(Args, OPT_ffp_builtin_accuracy_EQ, Opts.FPAccuracyVal, SA); + + for (const auto &F : Opts.FPAccuracyFuncMap) + GenerateArg(Args, OPT_ffp_builtin_accuracy_EQ, (F.second + ":" + F.first), + SA); + // The '-fcf-protection=' option is generated by CodeGenOpts generator. if (Opts.ObjC) { @@ -3570,6 +3580,69 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_fno_gpu_rdc, SA); } +static void checkFPAccuracyIsValid(StringRef ValElement, + DiagnosticsEngine &Diags) { + if (!llvm::StringSwitch(ValElement) + .Case("default", true) + .Case("high", true) + .Case("low", true) + .Case("medium", true) + .Case("sycl", true) + .Case("cuda", true) + .Default(false)) + Diags.Report(diag::err_drv_unsupported_option_argument) + << "-ffp-accuracy" << ValElement; +} + +void CompilerInvocation::ParseFpAccuracyArgs(LangOptions &Opts, ArgList &Args, + DiagnosticsEngine &Diags) { + for (StringRef Values : Args.getAllArgValues(OPT_ffp_builtin_accuracy_EQ)) { + if (Opts.MathErrno) { + Diags.Report(diag::err_drv_incompatible_fp_accuracy_options); + } else { + SmallVector ValuesArr; + Values.split(ValuesArr, ' '); + for (const auto &Val : ValuesArr) { + SmallVector ValElement; + Val.split(ValElement, ':'); + // The option is of the form -ffp-accuracy=value. + if (ValElement.size() == 1) { + checkFPAccuracyIsValid(ValElement[0], Diags); + Opts.FPAccuracyVal = ValElement[0].str(); + } + // The option is of the form -ffp-accuracy=value:[f1, ... fn]. + if (ValElement.size() == 2) { + SmallVector FuncList; + ValElement[1].split(FuncList, ','); + for (StringRef FuncName : FuncList) { + if (FuncName.front() == '[') + FuncName = FuncName.drop_front(1); + if (FuncName.back() == ']') + FuncName = FuncName.drop_back(1); + auto FuncMap = Opts.FPAccuracyFuncMap.find(FuncName.str()); + if (FuncMap != Opts.FPAccuracyFuncMap.end()) { + if (!FuncMap->second.empty()) { + Diags.Report(diag::warn_function_fp_accuracy_already_set) + << FuncMap->second << FuncName.str(); + } + } else { + checkFPAccuracyIsValid(ValElement[0], Diags); + if (!Opts.FPAccuracyVal.empty()) + Diags.Report(diag::warn_function_fp_accuracy_already_set) + << Opts.FPAccuracyVal << FuncName.str(); + // No need to fill the map if the FPaccuracy is 'default'. + // The default builtin will be generated. + if (!ValElement[0].equals("default")) + Opts.FPAccuracyFuncMap.insert( + {FuncName.str(), ValElement[0].str()}); + } + } + } + } + } + } +} + bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, const llvm::Triple &T, std::vector &Includes, @@ -3726,6 +3799,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, #include "clang/Driver/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING + ParseFpAccuracyArgs(Opts, Args, Diags); + if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { StringRef Name = A->getValue(); if (Name == "full" || Name == "branch") { diff --git a/clang/test/CodeGen/fp-accuracy.c b/clang/test/CodeGen/fp-accuracy.c new file mode 100644 index 0000000000000..7cc5296089adc --- /dev/null +++ b/clang/test/CodeGen/fp-accuracy.c @@ -0,0 +1,389 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ffp-builtin-accuracy=high \ +// RUN: -Wno-return-type -Wno-implicit-function-declaration -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefixes=CHECK %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: "-ffp-builtin-accuracy=high:[acosf,cos,pow] low:[tan] medium:[sincos,log10]" \ +// RUN: -Wno-return-type -Wno-implicit-function-declaration -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefix=CHECK-F1 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: "-ffp-builtin-accuracy=medium high:[tan] cuda:[cos]" \ +// RUN: -Wno-return-type -Wno-implicit-function-declaration -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefix=CHECK-F2 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: "-ffp-builtin-accuracy=high low:[tan] medium:[sincos,log10]" \ +// RUN: -Wno-return-type -Wno-implicit-function-declaration -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefix=CHECK-F3 %s + +// RUN: %clang_cc1 -triple spir64-unknown-unknown -ffp-builtin-accuracy=sycl \ +// RUN: -D SPIR -Wno-implicit-function-declaration -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefix=CHECK-SPIR %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: "-ffp-builtin-accuracy=default:[acosf,cos,pow]" \ +// RUN: -Wno-return-type -Wno-implicit-function-declaration -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefixes=CHECK-DEFAULT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -Wno-return-type -Wno-implicit-function-declaration -emit-llvm -o - %s \ +// RUN: | FileCheck --check-prefixes=CHECK-DEFAULT %s + +#ifdef SPIR +// This is a declaration when compiling with -fsycl to avoid +// the compilation error "function with no prototype cannot use +// the spir_function calling convention". +void sincos(float, float *, float *); +double exp10(double); +double fadd(double, double); +float fdiv(float, float); +float fmul(float, float); +float frem(float, float); +float fsub(float, float); +double rsqrt(double); +#endif + + +// CHECK-LABEL: define dso_local void @f1 +// CHECK: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR_HIGH:[0-9]+]] +// CHECK: call double @llvm.fpbuiltin.acosh.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.asinh.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.atan.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.atan2.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.atanh.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.cosh.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.erf.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.erfc.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.exp10.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.exp2.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.expm1.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.fadd.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.fdiv.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.fmul.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.frem.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.fsub.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.hypot.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.ldexp.f64(double {{.*}}, i32 {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.log1p.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.log2.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.pow.f64(double {{.*}}, double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.rsqrt.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call void @llvm.fpbuiltin.sincos.f64(double {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.sinh.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.tanh.f64(double {{.*}}) #[[ATTR_HIGH]] + +// CHECK-F1-LABEL: define dso_local void @f1 +// CHECK-F1: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.acosh.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.asinh.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.atan.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.atan2.f64(double {{.*}}, double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.atanh.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR_F1_HIGH:[0-9]+]] +// CHECK-F1: call double @llvm.fpbuiltin.cosh.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.erf.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.erfc.f64(double {{.*}}) +// CHECK-F1: call double @llvm.exp.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.exp10.f64(double {{.*}}) +// CHECK-F1: call double @llvm.exp2.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.expm1.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.fadd.f64(double {{.*}}, double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.fdiv.f64(double {{.*}}, double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.fmul.f64(double {{.*}}, double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.frem.f64(double {{.*}}, double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.fsub.f64(double {{.*}}, double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.hypot.f64(double {{.*}}, double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.ldexp.f64(double {{.*}}, i32 {{.*}}) +// CHECK-F1: call double @llvm.log.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_F1_MEDIUM:[0-9]+]] +// CHECK-F1: call double @llvm.fpbuiltin.log1p.f64(double {{.*}}) +// CHECK-F1: call double @llvm.log2.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.pow.f64(double {{.*}}, double {{.*}}) #[[ATTR_F1_HIGH]] +// CHECK-F1: call double @llvm.fpbuiltin.rsqrt.f64(double {{.*}}) +// CHECK-F1: call double @llvm.sin.f64(double {{.*}}) +// CHECK-F1: call void @llvm.fpbuiltin.sincos.f64(double {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_F1_MEDIUM]] +// CHECK-F1: call double @llvm.fpbuiltin.sinh.f64(double {{.*}}) +// CHECK-F1: call double @llvm.sqrt.f64(double {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_F1_LOW:[0-9]+]] +// CHECK-F1: call double @llvm.fpbuiltin.tanh.f64(double {{.*}}) +// +// CHECK-F2-LABEL: define dso_local void @f1 +// CHECK-F2: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR_F2_MEDIUM:[0-9]+]] +// CHECK-F2: call double @llvm.fpbuiltin.acosh.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.asinh.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.atan.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.atan2.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.atanh.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR_F2_CUDA:[0-9]+]] +// CHECK-F2: call double @llvm.fpbuiltin.cosh.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.erf.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.erfc.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.exp10.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.exp2.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.expm1.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.fadd.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.fdiv.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.fmul.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.frem.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.fsub.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.hypot.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.ldexp.f64(double {{.*}}, i32 {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.log1p.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.log2.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.pow.f64(double {{.*}}, double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.rsqrt.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call void @llvm.fpbuiltin.sincos.f64(double {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.sinh.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_F2_HIGH:[0-9]+]] +// CHECK-F2: call double @llvm.fpbuiltin.tanh.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// +// CHECK-F3-LABEL: define dso_local void @f1 +// CHECK-F3: call double @llvm.fpbuiltin.acos.f64(double %conv) #[[ATTR_F3_HIGH:[0-9]+]] +// CHECK-F3: call double @llvm.fpbuiltin.acosh.f64(double %conv2) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.asin.f64(double %conv4) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.asinh.f64(double %conv6) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.atan.f64(double %conv8) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.atan2.f64(double %conv10, double %conv11) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.atanh.f64(double %conv13) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.cos.f64(double %conv15) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.cosh.f64(double %conv17) #[[ATTR_F3_HIGH]] +// CHECk-F3: call double @llvm.fpbuiltin.erf.f64(double %conv19) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.erfc.f64(double %conv21) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.exp.f64(double %conv23) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.exp10.f64(double %conv25) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.exp2.f64(double %conv27) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.expm1.f64(double %conv29) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.fadd.f64(double %conv31, double %conv32) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.fdiv.f64(double %conv34, double %conv35) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.fmul.f64(double %conv37, double %conv38) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.frem.f64(double %conv40, double %conv41) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.fsub.f64(double %conv43, double %conv44) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.hypot.f64(double %conv46, double %conv47) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.ldexp.f64(double %conv49, i32 %conv50) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.log.f64(double %conv52) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.log10.f64(double %conv54) #[[ATTR_F3_MEDIUM:[0-9]+]] +// CHECK-F3: call double @llvm.fpbuiltin.log1p.f64(double %conv56) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.log2.f64(double %conv58) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.pow.f64(double %conv60, double %conv61) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.rsqrt.f64(double %conv63) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.sin.f64(double %conv65) #[[ATTR_F3_HIGH]] +// CHECK-F3: call void @llvm.fpbuiltin.sincos.f64(double %conv67, ptr %p1, ptr %p2) #[[ATTR_F3_MEDIUM]] +// CHECK-F3: call double @llvm.fpbuiltin.sinh.f64(double %conv68) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.sqrt.f64(double %conv70) #[[ATTR_F3_HIGH]] +// CHECK-F3: call double @llvm.fpbuiltin.tan.f64(double %conv72) #[[ATTR_F3_LOW:[0-9]+]] +// CHECK-F3: call double @llvm.fpbuiltin.tanh.f64(double %conv74) #[[ATTR_F3_HIGH]] + +// CHECK-F3: attributes #[[ATTR_F3_HIGH]] = {{.*}}"fpbuiltin-max-error="="1.0f" +// CHECK-F3: attributes #[[ATTR_F3_MEDIUM]] = {{.*}}"fpbuiltin-max-error="="4.0f" +// CHECK-F3: attributes #[[ATTR_F3_LOW]] = {{.*}}"fpbuiltin-max-error="="67108864.0f" +// +// CHECK-SPIR-LABEL: define dso_local spir_func void @f1 +// CHECK-SPIR: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR_SYCL1:[0-9]+]] +// CHECK-SPIR: call double @llvm.fpbuiltin.acosh.f64(double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.asinh.f64(double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.atan.f64(double {{.*}}) #[[ATTR_SYCL2:[0-9]+]] +// CHECK-SPIR: call double @llvm.fpbuiltin.atan2.f64(double {{.*}}, double {{.*}}) #[[ATTR_SYCL3:[0-9]+]] +// CHECK-SPIR: call double @llvm.fpbuiltin.atanh.f64(double {{.*}}) #[[ATTR_SYCL2]] +// CHECK-SPIR: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.cosh.f64(double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.erf.f64(double {{.*}}) #[[ATTR_SYCL4:[0-9]+]] +// CHECK-SPIR: call double @llvm.fpbuiltin.erfc.f64(double {{.*}}) #[[ATTR_SYCL4]] +// CHECK-SPIR: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR_SYCL5:[0-9]+]] +// CHECK-SPIR: call double @llvm.fpbuiltin.exp10.f64(double {{.*}}) #[[ATTR_SYCL5]] +// CHECK-SPIR: call double @llvm.fpbuiltin.exp2.f64(double {{.*}}) #[[ATTR_SYCL5]] +// CHECK-SPIR: call double @llvm.fpbuiltin.expm1.f64(double {{.*}}) #[[ATTR_SYCL5]] +// CHECK-SPIR: call double @llvm.fpbuiltin.fadd.f64(double {{.*}}, double {{.*}}) #[[ATTR_SYCL6:[0-9]+]] +// CHECK-SPIR: call float @llvm.fpbuiltin.fdiv.f32(float {{.*}}, float {{.*}}) #[[ATTR_SYCL7:[0-9]+]] +// CHECK-SPIR: call float @llvm.fpbuiltin.fmul.f32(float {{.*}}, float {{.*}}) #[[ATTR_SYCL6]] +// CHECK-SPIR: call float @llvm.fpbuiltin.frem.f32(float {{.*}}, float {{.*}}) #[[ATTR_SYCL6]] +// CHECK-SPIR: call float @llvm.fpbuiltin.fsub.f32(float {{.*}}, float {{.*}}) #[[ATTR_SYCL6]] +// CHECK-SPIR: call double @llvm.fpbuiltin.hypot.f64(double {{.*}}, double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.ldexp.f64(double {{.*}}, i32 {{.*}}) #[[ATTR_SYCL6]] +// CHECK-SPIR: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR_SYCL5]] +// CHECK-SPIR: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_SYCL5]] +// CHECK-SPIR: call double @llvm.fpbuiltin.log1p.f64(double {{.*}}) #[[ATTR_SYCL8:[0-9]+]] +// CHECK-SPIR: call double @llvm.fpbuiltin.log2.f64(double {{.*}}) #[[ATTR_SYCL5]] +// CHECK-SPIR: call double @llvm.fpbuiltin.pow.f64(double {{.*}}, double {{.*}}) #[[ATTR_SYCL4]] +// CHECK-SPIR: call double @llvm.fpbuiltin.rsqrt.f64(double {{.*}}) #[[ATTR_SYCL8]] +// CHECK-SPIR: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call void @llvm.fpbuiltin.sincos.f32(float {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.sinh.f64(double {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR_SYCL6]] +// CHECK-SPIR: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_SYCL2]] +// CHECK-SPIR: call double @llvm.fpbuiltin.tanh.f64(double {{.*}}) #[[ATTR_SYCL2]] +// +void f1(float a, float b) { + float p1 = 0.f, p2 = 0.f; + + b = acos(b); + b = acosh(b); + b = asin(b); + b = asinh(b); + b = atan(b); + b = atan2(b,b); + b = atanh(b); + b = cos(b); + b = cosh(b); + b = erf(b); + b = erfc(b); + b = exp(b); + b = exp10(b); + b = exp2(b); + b = expm1(b); + b = fadd(b,b); + b = fdiv(b,b); + b = fmul(b,b); + b = frem(b,b); + b = fsub(b,b); + b = hypot(b,b); + b = ldexp(b,b); + b = log(b); + b = log10(b); + b = log1p(b); + b = log2(b); + b = pow(b,b); + b = rsqrt(b); + b = sin(b); + sincos(b,&p1,&p2); + b = sinh(b); + b = sqrt(b); + b =tan(b); + b = tanh(b); +} +// CHECK-LABEL: define dso_local void @f2 +// CHECK: call float @llvm.fpbuiltin.cos.f32(float {{.*}}) #[[ATTR_HIGH]] +// CHECK: call float @llvm.fpbuiltin.sin.f32(float {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_HIGH]] +// CHECK: call void @llvm.fpbuiltin.sincos.f64(double {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_HIGH]] +// CHECK: call float @tanf(float noundef {{.*}}) +// +// CHECK-F1-LABEL: define dso_local void @f2 +// CHECK-F1: call float @llvm.cos.f32(float {{.*}}) +// CHECK-F1: call float @llvm.sin.f32(float {{.*}}) +// CHECK-F1: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_F1_LOW]] +// CHECK-F1: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_F1_MEDIUM]] +// CHECK-F1: call void @llvm.fpbuiltin.sincos.f64(double {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_F1_MEDIUM]] +// CHECK-F1: call float @tanf(float noundef {{.*}}) +// +// CHECK-F2-LABEL: define dso_local void @f2 +// CHECK-F2: call float @llvm.fpbuiltin.cos.f32(float {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call float @llvm.fpbuiltin.sin.f32(float {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_F2_HIGH]] +// CHECK-F2: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call void @llvm.fpbuiltin.sincos.f64(double {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_F2_MEDIUM]] +// CHECK-F2: call float @tanf(float noundef {{.*}}) +// +// CHECK-SPIR-LABEL: define dso_local spir_func void @f2 +// CHECK-SPIR: call float @llvm.fpbuiltin.cos.f32(float {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call float @llvm.fpbuiltin.sin.f32(float {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR_SYCL2]] +// CHECK-SPIR: call double @llvm.fpbuiltin.log10.f64(double {{.*}}) #[[ATTR_SYCL5]] +// CHECK-SPIR: call void @llvm.fpbuiltin.sincos.f32(float {{.*}}, ptr {{.*}}, ptr {{.*}}) #[[ATTR_SYCL1]] +// CHECK-SPIR: call spir_func float @tanf(float noundef {{.*}}) + +// CHECK-LABEL: define dso_local void @f3 +// CHECK: call float @fake_exp10(float {{.*}}) +// CHECK-F1: call float @fake_exp10(float {{.*}}) +// CHECK-F2: call float @fake_exp10(float {{.*}}) +// CHECK-SPIR-LABEL: define dso_local spir_func void @f3 +// CHECK-SPIR: call spir_func float @fake_exp10(float {{.*}}) + +// CHECK: attributes #[[ATTR_HIGH]] = {{.*}}"fpbuiltin-max-error="="1.0f" + +// CHECK-F1: attributes #[[ATTR_F1_HIGH]] = {{.*}}"fpbuiltin-max-error="="1.0f" +// CHECK-F1: attributes #[[ATTR_F1_MEDIUM]] = {{.*}}"fpbuiltin-max-error="="4.0f" +// CHECK-F1: attributes #[[ATTR_F1_LOW]] = {{.*}}"fpbuiltin-max-error="="67108864.0f" + +// CHECK-F2: attributes #[[ATTR_F2_MEDIUM]] = {{.*}}"fpbuiltin-max-error="="4.0f" +// CHECK-F2: attributes #[[ATTR_F2_CUDA]] = {{.*}}"fpbuiltin-max-error="="2.0f" +// CHECK-F2: attributes #[[ATTR_F2_HIGH]] = {{.*}}"fpbuiltin-max-error="="1.0f" + +// CHECK-SPIR: attributes #[[ATTR_SYCL1]] = {{.*}}"fpbuiltin-max-error="="4.0f" +// CHECK-SPIR: attributes #[[ATTR_SYCL2]] = {{.*}}"fpbuiltin-max-error="="5.0f" +// CHECK-SPIR: attributes #[[ATTR_SYCL3]] = {{.*}}"fpbuiltin-max-error="="6.0f" +// CHECK-SPIR: attributes #[[ATTR_SYCL4]] = {{.*}}"fpbuiltin-max-error="="16.0f" +// CHECK-SPIR: attributes #[[ATTR_SYCL5]] = {{.*}}"fpbuiltin-max-error="="3.0f" +// CHECK-SPIR: attributes #[[ATTR_SYCL6]] = {{.*}}"fpbuiltin-max-error="="0.0f" +// CHECK-SPIR: attributes #[[ATTR_SYCL7]] = {{.*}}"fpbuiltin-max-error="="2.5f" +// CHECK-SPIR: attributes #[[ATTR_SYCL8]] = {{.*}}"fpbuiltin-max-error="="2.0f" + +// CHECK-DEFAULT-LABEL: define dso_local void @f1 +// CHECK-DEFAULT: call double @acos(double noundef {{.*}}) +// CHECK-DEFAULT: call double @acosh(double noundef {{.*}}) +// CHECK-DEFAULT: call double @asin(double noundef {{.*}}) +// CHECK-DEFAULT: call double @asinh(double noundef {{.*}}) +// CHECK-DEFAULT: call double @atan(double noundef {{.*}}) +// CHECK-DEFAULT: call double @atan2(double noundef {{.*}}, double noundef {{.*}}) +// CHECK-DEFAULT: call double @atanh(double noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.cos.f64(double {{.*}}) +// CHECK-DEFAULT: call double @cosh(double noundef {{.*}}) +// CHECK-DEFAULT: call double @erf(double noundef {{.*}}) +// CHECK-DEFAULT: call double @erfc(double noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.exp.f64(double {{.*}}) +// CHECK-DEFAULT: call i32 (double, ...) @exp10(double noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.exp2.f64(double {{.*}}) +// CHECK-DEFAULT: call double @expm1(double noundef {{.*}}) +// CHECK-DEFAULT: call i32 (double, double, ...) @fadd(double noundef {{.*}}, double noundef {{.*}}) +// CHECK-DEFAULT: call i32 (double, double, ...) @fdiv(double noundef {{.*}}, double noundef {{.*}}) +// CHECK-DEFAULT: call i32 (double, double, ...) @fmul(double noundef {{.*}}, double noundef {{.*}}) +// CHECK-DEFAULT: call i32 (double, double, ...) @frem(double noundef {{.*}}, double noundef {{.*}}) +// CHECK-DEFAULT: call i32 (double, double, ...) @fsub(double noundef {{.*}}, double noundef {{.*}}) +// CHECK-DEFAULT: call double @hypot(double noundef {{.*}}, double noundef {{.*}}) +// CHECK-DEFAULT: call double @ldexp(double noundef {{.*}}, i32 noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.log.f64(double {{.*}}) +// CHECK-DEFAULT: call double @llvm.log10.f64(double {{.*}}) +// CHECK-DEFAULT: call double @log1p(double noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.log2.f64(double {{.*}}) +// CHECK-DEFAULT: call double @llvm.pow.f64(double {{.*}}, double {{.*}}) +// CHECK-DEFAULT: call i32 (double, ...) @rsqrt(double noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.sin.f64(double {{.*}}) +// CHECK-DEFAULT: call i32 (double, ptr, ptr, ...) @sincos(double noundef {{.*}}, ptr noundef {{.*}}, ptr noundef {{.*}}) +// CHECK-DEFAULT: call double @sinh(double noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.sqrt.f64(double {{.*}}) +// CHECK-DEFAULT: call double @tan(double noundef {{.*}}) +// CHECK-DEFAULT: call double @tanh(double noundef {{.*}}) +// +// CHECK-DEFAULT-LABEL: define dso_local void @f2 +// CHECK-DEFAULT: call float @llvm.cos.f32(float {{.*}}) +// CHECK-DEFAULT: call float @llvm.sin.f32(float {{.*}}) +// CHECK-DEFAULT: call double @tan(double noundef {{.*}}) +// CHECK-DEFAULT: call double @llvm.log10.f64(double {{.*}}) +// CHECK-DEFAULT: call i32 (double, ptr, ptr, ...) @sincos(double noundef {{.*}}, ptr noundef {{.*}}, ptr noundef {{.*}}) +// CHECK-DEFAULT: call float @tanf(float noundef {{.*}}) + +// CHECK-DEFAULT-LABEL: define dso_local void @f3 +// CHECK-DEFAULT: call float @fake_exp10(float {{.*}}) + +void f2(float a, float b) { + float sin = 0.f, cos = 0.f; + + b = cosf(b); + b = sinf(b); + b = tan(b); + b = log10(b); + sincos(b, &sin, &cos); + b = tanf(b); +} + +float fake_exp10(float a) __attribute__((no_builtin)){} +void f3(float a, float b) { + a = fake_exp10(b); +} diff --git a/clang/test/Driver/fp-accuracy.c b/clang/test/Driver/fp-accuracy.c new file mode 100644 index 0000000000000..e13c2dfc657f1 --- /dev/null +++ b/clang/test/Driver/fp-accuracy.c @@ -0,0 +1,64 @@ +// RUN: %clang -### -target x86_64 -ffp-accuracy=high -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=HIGH %s + +// RUN: %clang -### -target x86_64 -ffp-accuracy=low -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=LOW %s + +// RUN: %clang -### -target x86_64 -ffp-accuracy=medium -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=MEDIUM %s + +// RUN: %clang -### -target x86_64 -ffp-accuracy=sycl -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=SYCL %s + +// RUN: %clang -### -target x86_64 -ffp-accuracy=cuda -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CUDA %s + +// RUN: %clang -### -target x86_64 -ffp-accuracy=low:sin,cos -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FUNC-1 %s + +// RUN: %clang -### -target x86_64 -ffp-accuracy=low:sin,cos -ffp-accuracy=high:tan -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=FUNC-2 %s + +// RUN: not %clang -Xclang -verify -fno-math-errno -ffp-accuracy=foo %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=ERR + +// RUN: not %clang -Xclang -verify -fno-math-errno -ffp-accuracy=foo:[sin,cos] %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=ERR + +// RUN: not %clang -Xclang -verify -fno-math-errno -ffp-accuracy=foo:[sin,cos] \ +// RUN: -ffp-accuracy=goo %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=ERR + +// RUN: not %clang -Xclang -verify -fno-math-errno -ffp-accuracy=foo:[sin,cos] \ +// RUN: -ffp-accuracy=goo:[tan] %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=ERR-1 + +// RUN: not %clang -Xclang -verify -fno-math-errno -ffp-accuracy=high=[sin] %s 2>& 1 \ +// RUN: | FileCheck %s --check-prefixes=ERR-2 + +// RUN: not %clang -Xclang -verify -fno-math-errno -ffp-accuracy=low:[sin,cos] \ +// RUN: -ffp-accuracy=high %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=WARN + +// RUN: not %clang -Xclang -verify -ffp-accuracy=low:[sin,cos] \ +// RUN: -ffp-accuracy=high -fmath-errno %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=ERR-3 + +// RUN: not %clang -Xclang -verify -ffp-accuracy=high \ +// RUN: -fmath-errno %s 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=ERR-3 + + +// HIGH: "-ffp-builtin-accuracy=high" +// LOW: "-ffp-builtin-accuracy=low" +// MEDIUM: "-ffp-builtin-accuracy=medium" +// SYCL: "-ffp-builtin-accuracy=sycl" +// CUDA: "-ffp-builtin-accuracy=cuda" +// FUNC-1: "-ffp-builtin-accuracy=low:sin,cos" +// FUNC-2: "-ffp-builtin-accuracy=low:sin,cos high:tan" +// ERR: (frontend): unsupported argument 'foo' to option '-ffp-accuracy' +// ERR-1: (frontend): unsupported argument 'foo' to option '-ffp-accuracy' +// ERR-2: (frontend): unsupported argument 'high=[sin]' to option '-ffp-accuracy' +// WARN: (frontend): floating point accuracy value of 'high' has already been assigned to function 'cos' +// WARN: (frontend): floating point accuracy value of 'high' has already been assigned to function 'sin' +// ERR-3: (frontend): floating point accuracy requirements cannot be guaranteed when '-fmath-errno' is enabled; use '-fno-math-errno' to enable floating point accuracy control