diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index cb4603c79c79c..feb106cc2cb63 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -234,6 +234,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.sqrt libc.src.math.sqrtf libc.src.math.sqrtl + libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf libc.src.math.trunc diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index ff35e8fffec19..2ec44357c84c7 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -489,6 +489,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.sqrt libc.src.math.sqrtf libc.src.math.sqrtl + libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf libc.src.math.trunc diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index a27a494153480..a24514e29334d 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -366,6 +366,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.sqrt libc.src.math.sqrtf libc.src.math.sqrtl + libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf libc.src.math.trunc diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 51d85eed9ff16..5b0d591557944 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -497,6 +497,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.sqrt libc.src.math.sqrtf libc.src.math.sqrtl + libc.src.math.tan libc.src.math.tanf libc.src.math.tanhf libc.src.math.trunc diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index e4da3d42baf7a..b07aff5913846 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -338,7 +338,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | sqrt | |check| | |check| | |check| | | |check| | 7.12.7.10 | F.10.4.10 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| tan | |check| | | | | | 7.12.4.7 | F.10.1.7 | +| tan | |check| | |check| | | | | 7.12.4.7 | F.10.1.7 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | tanh | |check| | | | | | 7.12.5.6 | F.10.2.6 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/src/__support/FPUtil/double_double.h b/libc/src/__support/FPUtil/double_double.h index 3d16a3cce3a99..6867026953f25 100644 --- a/libc/src/__support/FPUtil/double_double.h +++ b/libc/src/__support/FPUtil/double_double.h @@ -129,6 +129,42 @@ LIBC_INLINE DoubleDouble multiply_add(const DoubleDouble &a, return add(c, quick_mult(a, b)); } +// Accurate double-double division, following Karp-Markstein's trick for +// division, implemented in the CORE-MATH project at: +// https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/tan/tan.c#L1855 +// +// Error bounds: +// Let a = ah + al, b = bh + bl. +// Let r = rh + rl be the approximation of (ah + al) / (bh + bl). +// Then: +// (ah + al) / (bh + bl) - rh = +// = ((ah - bh * rh) + (al - bl * rh)) / (bh + bl) +// = (1 + O(bl/bh)) * ((ah - bh * rh) + (al - bl * rh)) / bh +// Let q = round(1/bh), then the above expressions are approximately: +// = (1 + O(bl / bh)) * (1 + O(2^-52)) * q * ((ah - bh * rh) + (al - bl * rh)) +// So we can compute: +// rl = q * (ah - bh * rh) + q * (al - bl * rh) +// as accurate as possible, then the error is bounded by: +// |(ah + al) / (bh + bl) - (rh + rl)| < O(bl/bh) * (2^-52 + al/ah + bl/bh) +LIBC_INLINE DoubleDouble div(const DoubleDouble &a, const DoubleDouble &b) { + DoubleDouble r; + double q = 1.0 / b.hi; + r.hi = a.hi * q; + +#ifdef LIBC_TARGET_CPU_HAS_FMA + double e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi); + double e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo); +#else + DoubleDouble b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi); + DoubleDouble b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi); + double e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo; + double e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo; +#endif // LIBC_TARGET_CPU_HAS_FMA + + r.lo = q * (e_hi + e_lo); + return r; +} + } // namespace LIBC_NAMESPACE::fputil #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index d6ea8c54174b6..9c8cf84ffe6d7 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -323,6 +323,27 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + tan + SRCS + tan.cpp + HDRS + ../tan.h + DEPENDS + .range_reduction_double + libc.hdr.errno_macros + libc.src.errno.errno + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.optimization + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( tanf SRCS diff --git a/libc/src/math/generic/tan.cpp b/libc/src/math/generic/tan.cpp new file mode 100644 index 0000000000000..e6230e9c1cd69 --- /dev/null +++ b/libc/src/math/generic/tan.cpp @@ -0,0 +1,318 @@ +//===-- Double-precision tan function -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/tan.h" +#include "hdr/errno_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA + +#ifdef LIBC_TARGET_CPU_HAS_FMA +#include "range_reduction_double_fma.h" + +// With FMA, we limit the maxmimum exponent to be 2^16, so that the error bound +// from the fma::range_reduction_small is bounded by 2^-88 instead of 2^-72. +#define FAST_PASS_EXPONENT 16 +using LIBC_NAMESPACE::fma::ONE_TWENTY_EIGHT_OVER_PI; +using LIBC_NAMESPACE::fma::range_reduction_small; +using LIBC_NAMESPACE::fma::SIN_K_PI_OVER_128; + +LIBC_INLINE constexpr bool NO_FMA = false; +#else +#include "range_reduction_double_nofma.h" + +using LIBC_NAMESPACE::nofma::FAST_PASS_EXPONENT; +using LIBC_NAMESPACE::nofma::ONE_TWENTY_EIGHT_OVER_PI; +using LIBC_NAMESPACE::nofma::range_reduction_small; +using LIBC_NAMESPACE::nofma::SIN_K_PI_OVER_128; + +LIBC_INLINE constexpr bool NO_FMA = true; +#endif // LIBC_TARGET_CPU_HAS_FMA + +// TODO: We might be able to improve the performance of large range reduction of +// non-FMA targets further by operating directly on 25-bit chunks of 128/pi and +// pre-split SIN_K_PI_OVER_128, but that might double the memory footprint of +// those lookup table. +#include "range_reduction_double_common.h" + +#if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0) +#define LIBC_MATH_TAN_SKIP_ACCURATE_PASS +#endif + +namespace LIBC_NAMESPACE { + +using DoubleDouble = fputil::DoubleDouble; +using Float128 = typename fputil::DyadicFloat<128>; + +namespace { + +LIBC_INLINE DoubleDouble tan_eval(const DoubleDouble &u) { + // Evaluate tan(y) = tan(x - k * (pi/128)) + // We use the degree-9 Taylor approximation: + // tan(y) ~ P(y) = y + y^3/3 + 2*y^5/15 + 17*y^7/315 + 62*y^9/2835 + // Then the error is bounded by: + // |tan(y) - P(y)| < 2^-6 * |y|^11 < 2^-6 * 2^-66 = 2^-72. + // For y ~ u_hi + u_lo, fully expanding the polynomial and drop any terms + // < ulp(u_hi^3) gives us: + // P(y) = y + y^3/3 + 2*y^5/15 + 17*y^7/315 + 62*y^9/2835 = ... + // ~ u_hi + u_hi^3 * (1/3 + u_hi^2 * (2/15 + u_hi^2 * (17/315 + + // + u_hi^2 * 62/2835))) + + // + u_lo (1 + u_hi^2 * (1 + u_hi^2 * 2/3)) + double u_hi_sq = u.hi * u.hi; // Error < ulp(u_hi^2) < 2^(-6 - 52) = 2^-58. + // p1 ~ 17/315 + u_hi^2 62 / 2835. + double p1 = + fputil::multiply_add(u_hi_sq, 0x1.664f4882c10fap-6, 0x1.ba1ba1ba1ba1cp-5); + // p2 ~ 1/3 + u_hi^2 2 / 15. + double p2 = + fputil::multiply_add(u_hi_sq, 0x1.1111111111111p-3, 0x1.5555555555555p-2); + // q1 ~ 1 + u_hi^2 * 2/3. + double q1 = fputil::multiply_add(u_hi_sq, 0x1.5555555555555p-1, 1.0); + double u_hi_3 = u_hi_sq * u.hi; + double u_hi_4 = u_hi_sq * u_hi_sq; + // p3 ~ 1/3 + u_hi^2 * (2/15 + u_hi^2 * (17/315 + u_hi^2 * 62/2835)) + double p3 = fputil::multiply_add(u_hi_4, p1, p2); + // q2 ~ 1 + u_hi^2 * (1 + u_hi^2 * 2/3) + double q2 = fputil::multiply_add(u_hi_sq, q1, 1.0); + double tan_lo = fputil::multiply_add(u_hi_3, p3, u.lo * q2); + // Overall, |tan(y) - (u_hi + tan_lo)| < ulp(u_hi^3) <= 2^-71. + // And the relative errors is: + // |(tan(y) - (u_hi + tan_lo)) / tan(y) | <= 2*ulp(u_hi^2) < 2^-64 + + return fputil::exact_add(u.hi, tan_lo); +} + +// Accurate evaluation of tan for small u. +Float128 tan_eval(const Float128 &u) { + Float128 u_sq = fputil::quick_mul(u, u); + + // tan(x) ~ x + x^3/3 + x^5 * 2/15 + x^7 * 17/315 + x^9 * 62/2835 + + // + x^11 * 1382/155925 + x^13 * 21844/6081075 + + // + x^15 * 929569/638512875 + x^17 * 6404582/10854718875 + // Relative errors < 2^-127 for |u| < pi/256. + constexpr Float128 TAN_COEFFS[] = { + {Sign::POS, -127, 0x80000000'00000000'00000000'00000000_u128}, // 1 + {Sign::POS, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaaa'aaaaaaab_u128}, // 1 + {Sign::POS, -130, 0x88888888'88888888'88888888'88888889_u128}, // 2/15 + {Sign::POS, -132, 0xdd0dd0dd'0dd0dd0d'd0dd0dd0'dd0dd0dd_u128}, // 17/315 + {Sign::POS, -133, 0xb327a441'6087cf99'6b5dd24e'ec0b327a_u128}, // 62/2835 + {Sign::POS, -134, + 0x91371aaf'3611e47a'da8e1cba'7d900eca_u128}, // 1382/155925 + {Sign::POS, -136, + 0xeb69e870'abeefdaf'e606d2e4'd1e65fbc_u128}, // 21844/6081075 + {Sign::POS, -137, + 0xbed1b229'5baf15b5'0ec9af45'a2619971_u128}, // 929569/638512875 + {Sign::POS, -138, + 0x9aac1240'1b3a2291'1b2ac7e3'e4627d0a_u128}, // 6404582/10854718875 + }; + + return fputil::quick_mul( + u, fputil::polyeval(u_sq, TAN_COEFFS[0], TAN_COEFFS[1], TAN_COEFFS[2], + TAN_COEFFS[3], TAN_COEFFS[4], TAN_COEFFS[5], + TAN_COEFFS[6], TAN_COEFFS[7], TAN_COEFFS[8])); +} + +// Calculation a / b = a * (1/b) for Float128. +// Using the initial approximation of q ~ (1/b), then apply 2 Newton-Raphson +// iterations, before multiplying by a. +Float128 newton_raphson_div(const Float128 &a, Float128 b, double q) { + Float128 q0(q); + constexpr Float128 TWO(2.0); + b.sign = (b.sign == Sign::POS) ? Sign::NEG : Sign::POS; + Float128 q1 = + fputil::quick_mul(q0, fputil::quick_add(TWO, fputil::quick_mul(b, q0))); + Float128 q2 = + fputil::quick_mul(q1, fputil::quick_add(TWO, fputil::quick_mul(b, q1))); + return fputil::quick_mul(a, q2); +} + +} // anonymous namespace + +LLVM_LIBC_FUNCTION(double, tan, (double x)) { + using FPBits = typename fputil::FPBits; + FPBits xbits(x); + + uint16_t x_e = xbits.get_biased_exponent(); + + DoubleDouble y; + unsigned k; + generic::LargeRangeReduction range_reduction_large; + + // |x| < 2^32 (with FMA) or |x| < 2^23 (w/o FMA) + if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) { + // |x| < 2^-27 + if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) { + // Signed zeros. + if (LIBC_UNLIKELY(x == 0.0)) + return x; + + // For |x| < 2^-27, |tan(x) - x| < ulp(x)/2. +#ifdef LIBC_TARGET_CPU_HAS_FMA + return fputil::multiply_add(x, 0x1.0p-54, x); +#else + if (LIBC_UNLIKELY(x_e < 4)) { + int rounding_mode = fputil::quick_get_round(); + if (rounding_mode == FE_TOWARDZERO || + (xbits.sign() == Sign::POS && rounding_mode == FE_DOWNWARD) || + (xbits.sign() == Sign::NEG && rounding_mode == FE_UPWARD)) + return FPBits(xbits.uintval() + 1).get_val(); + } + return fputil::multiply_add(x, 0x1.0p-54, x); +#endif // LIBC_TARGET_CPU_HAS_FMA + } + + // // Small range reduction. + k = range_reduction_small(x, y); + } else { + // Inf or NaN + if (LIBC_UNLIKELY(x_e > 2 * FPBits::EXP_BIAS)) { + // tan(+-Inf) = NaN + if (xbits.get_mantissa() == 0) { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + } + return x + FPBits::quiet_nan().get_val(); + } + + // Large range reduction. + k = range_reduction_large.compute_high_part(x); + y = range_reduction_large.fast(); + } + + DoubleDouble tan_y = tan_eval(y); + + // Look up sin(k * pi/128) and cos(k * pi/128) + // Memory saving versions: + + // Use 128-entry table instead: + // DoubleDouble sin_k = SIN_K_PI_OVER_128[k & 127]; + // uint64_t sin_s = static_cast(k & 128) << (63 - 7); + // sin_k.hi = FPBits(FPBits(sin_k.hi).uintval() ^ sin_s).get_val(); + // sin_k.lo = FPBits(FPBits(sin_k.hi).uintval() ^ sin_s).get_val(); + // DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 127]; + // uint64_t cos_s = static_cast((k + 64) & 128) << (63 - 7); + // cos_k.hi = FPBits(FPBits(cos_k.hi).uintval() ^ cos_s).get_val(); + // cos_k.lo = FPBits(FPBits(cos_k.hi).uintval() ^ cos_s).get_val(); + + // Use 64-entry table instead: + // auto get_idx_dd = [](unsigned kk) -> DoubleDouble { + // unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); + // DoubleDouble ans = SIN_K_PI_OVER_128[idx]; + // if (kk & 128) { + // ans.hi = -ans.hi; + // ans.lo = -ans.lo; + // } + // return ans; + // }; + // DoubleDouble msin_k = get_idx_dd(k + 128); + // DoubleDouble cos_k = get_idx_dd(k + 64); + + // Fast look up version, but needs 256-entry table. + // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). + DoubleDouble msin_k = SIN_K_PI_OVER_128[(k + 128) & 255]; + DoubleDouble cos_k = SIN_K_PI_OVER_128[(k + 64) & 255]; + + // After range reduction, k = round(x * 128 / pi) and y = x - k * (pi / 128). + // So k is an integer and -pi / 256 <= y <= pi / 256. + // Then tan(x) = sin(x) / cos(x) + // = sin((k * pi/128 + y) / cos((k * pi/128 + y) + // = (cos(y) * sin(k*pi/128) + sin(y) * cos(k*pi/128)) / + // / (cos(y) * cos(k*pi/128) - sin(y) * sin(k*pi/128)) + // = (sin(k*pi/128) + tan(y) * cos(k*pi/128)) / + // / (cos(k*pi/128) - tan(y) * sin(k*pi/128)) + DoubleDouble cos_k_tan_y = fputil::quick_mult(tan_y, cos_k); + DoubleDouble msin_k_tan_y = fputil::quick_mult(tan_y, msin_k); + + // num_dd = sin(k*pi/128) + tan(y) * cos(k*pi/128) + DoubleDouble num_dd = fputil::exact_add(cos_k_tan_y.hi, -msin_k.hi); + // den_dd = cos(k*pi/128) - tan(y) * sin(k*pi/128) + DoubleDouble den_dd = fputil::exact_add(msin_k_tan_y.hi, cos_k.hi); + num_dd.lo += cos_k_tan_y.lo - msin_k.lo; + den_dd.lo += msin_k_tan_y.lo + cos_k.lo; + +#ifdef LIBC_MATH_TAN_SKIP_ACCURATE_PASS + double tan_x = (num_dd.hi + num_dd.lo) / (den_dd.hi + den_dd.lo); + return tan_x; +#else + // Accurate test and pass for correctly rounded implementation. + + // Accurate double-double division + DoubleDouble tan_x = fputil::div(num_dd, den_dd); + + // Relative errors for k != 0 mod 64 is: + // absolute errors / min(sin(k*pi/128), cos(k*pi/128)) <= 2^-71 / 2^-7 + // = 2^-64. + // For k = 0 mod 64, the relative errors is bounded by: + // 2^-71 / 2^(exponent of x). + constexpr int ERR = 64; + + int y_exp = 7 + FPBits(y.hi).get_exponent(); + int rel_err_exp = ERR + static_cast((k & 63) == 0) * y_exp; + int64_t tan_x_err = static_cast(FPBits(tan_x.hi).uintval()) - + (static_cast(rel_err_exp) << 52); + double tan_err = FPBits(static_cast(tan_x_err)).get_val(); + + double err_higher = tan_x.lo + tan_err; + double err_lower = tan_x.lo - tan_err; + + double tan_upper = tan_x.hi + err_higher; + double tan_lower = tan_x.hi + err_lower; + + // Ziv's rounding test. + if (LIBC_LIKELY(tan_upper == tan_lower)) + return tan_upper; + + Float128 u_f128; + if (LIBC_LIKELY(x_e < FPBits::EXP_BIAS + FAST_PASS_EXPONENT)) + u_f128 = generic::range_reduction_small_f128(x); + else + u_f128 = range_reduction_large.accurate(); + + Float128 tan_u = tan_eval(u_f128); + + auto get_sin_k = [](unsigned kk) -> Float128 { + unsigned idx = (kk & 64) ? 64 - (kk & 63) : (kk & 63); + Float128 ans = generic::SIN_K_PI_OVER_128_F128[idx]; + if (kk & 128) + ans.sign = Sign::NEG; + return ans; + }; + + // cos(k * pi/128) = sin(k * pi/128 + pi/2) = sin((k + 64) * pi/128). + Float128 sin_k_f128 = get_sin_k(k); + Float128 cos_k_f128 = get_sin_k(k + 64); + Float128 msin_k_f128 = get_sin_k(k + 128); + + // num_f128 = sin(k*pi/128) + tan(y) * cos(k*pi/128) + Float128 num_f128 = + fputil::quick_add(sin_k_f128, fputil::quick_mul(cos_k_f128, tan_u)); + // den_f128 = cos(k*pi/128) - tan(y) * sin(k*pi/128) + Float128 den_f128 = + fputil::quick_add(cos_k_f128, fputil::quick_mul(msin_k_f128, tan_u)); + + // tan(x) = (sin(k*pi/128) + tan(y) * cos(k*pi/128)) / + // / (cos(k*pi/128) - tan(y) * sin(k*pi/128)) + // TODO: The initial seed 1.0/den_dd.hi for Newton-Raphson reciprocal can be + // reused from DoubleDouble fputil::div in the fast pass. + Float128 result = newton_raphson_div(num_f128, den_f128, 1.0 / den_dd.hi); + + // TODO: Add assertion if Ziv's accuracy tests fail in debug mode. + // https://github.com/llvm/llvm-project/issues/96452. + return static_cast(result); + +#endif // !LIBC_MATH_TAN_SKIP_ACCURATE_PASS +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/x86_64/CMakeLists.txt b/libc/src/math/x86_64/CMakeLists.txt deleted file mode 100644 index 3cfc422e56d49..0000000000000 --- a/libc/src/math/x86_64/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_entrypoint_object( - tan - SRCS - tan.cpp - HDRS - ../tan.h - COMPILE_OPTIONS - -O2 -) diff --git a/libc/src/math/x86_64/tan.cpp b/libc/src/math/x86_64/tan.cpp deleted file mode 100644 index bc0e0fc7d1ffa..0000000000000 --- a/libc/src/math/x86_64/tan.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- Implementation of the tan function for x86_64 ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/math/tan.h" -#include "src/__support/common.h" - -namespace LIBC_NAMESPACE { - -LLVM_LIBC_FUNCTION(double, tan, (double x)) { - double result; - // The fptan instruction pushes the number 1 on to the FP stack after - // computing tan. So, we read out the one before popping the actual result. - __asm__ __volatile__("fptan" : "+t"(x)); - __asm__ __volatile__("fstpl %0" : "=m"(result)); - return result; -} - -} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 1b269edaa2477..9ff62a868bf93 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3853,3 +3853,13 @@ add_fp_unittest( DEPENDS libc.src.math.sincos ) + +add_fp_unittest( + tan_test + SUITE + libc-math-smoke-tests + SRCS + tan_test.cpp + DEPENDS + libc.src.math.tan +) diff --git a/libc/test/src/math/smoke/tan_test.cpp b/libc/test/src/math/smoke/tan_test.cpp new file mode 100644 index 0000000000000..498dba76b6e71 --- /dev/null +++ b/libc/test/src/math/smoke/tan_test.cpp @@ -0,0 +1,26 @@ +//===-- Unittests for tan -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/tan.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcTanTest = LIBC_NAMESPACE::testing::FPTest; + +using LIBC_NAMESPACE::testing::tlog; + +TEST_F(LlvmLibcTanTest, SpecialNumbers) { + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::tan(aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::tan(inf)); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::tan(neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::tan(zero)); + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::tan(neg_zero)); + EXPECT_FP_EQ(0x1.0p-50, LIBC_NAMESPACE::tan(0x1.0p-50)); + EXPECT_FP_EQ(min_normal, LIBC_NAMESPACE::tan(min_normal)); + EXPECT_FP_EQ(min_denormal, LIBC_NAMESPACE::tan(min_denormal)); +} diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp index d813dccc38369..e9e3e59f4d12d 100644 --- a/libc/test/src/math/tan_test.cpp +++ b/libc/test/src/math/tan_test.cpp @@ -6,27 +6,116 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/FPUtil/FPBits.h" #include "src/math/tan.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include "hdr/math_macros.h" - using LlvmLibcTanTest = LIBC_NAMESPACE::testing::FPTest; namespace mpfr = LIBC_NAMESPACE::testing::mpfr; -TEST_F(LlvmLibcTanTest, Range) { - static constexpr double _2pi = 6.283185307179586; - constexpr StorageType COUNT = 100'000; - constexpr StorageType STEP = STORAGE_MAX / COUNT; - for (StorageType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { - double x = FPBits(v).get_val(); - // TODO: Expand the range of testing after range reduction is implemented. - if (isnan(x) || isinf(x) || x > _2pi || x < -_2pi) - continue; - - ASSERT_MPFR_MATCH(mpfr::Operation::Tan, x, LIBC_NAMESPACE::tan(x), 1.0); +using LIBC_NAMESPACE::testing::tlog; + +TEST_F(LlvmLibcTanTest, TrickyInputs) { + constexpr double INPUTS[] = { + 0x1.d130383d17321p-27, 0x1.8000000000009p-23, 0x1.8000000000024p-22, + 0x1.800000000009p-21, 0x1.20000000000f3p-20, 0x1.800000000024p-20, + 0x1.e0000000001c2p-20, 0x1.0da8cc189b47dp-10, 0x1.00a33764a0a83p-7, + 0x1.911a18779813fp-7, 0x1.940c877fb7dacp-7, 0x1.f42fb19b5b9b2p-6, + 0x1.0285070f9f1bcp-5, 0x1.6ca9ef729af76p-1, 0x1.23f40dccdef72p+0, + 0x1.43cf16358c9d7p+0, 0x1.addf3b9722265p+0, 0x1.ae78d360afa15p+0, + 0x1.fe81868fc47fep+1, 0x1.e31b55306f22cp+2, 0x1.e639103a05997p+2, + 0x1.f7898d5a756ddp+2, 0x1.1685973506319p+3, 0x1.5f09cad750ab1p+3, + 0x1.aaf85537ea4c7p+3, 0x1.4f2b874135d27p+4, 0x1.13114266f9764p+4, + 0x1.a211877de55dbp+4, 0x1.a5eece87e8606p+4, 0x1.a65d441ea6dcep+4, + 0x1.1ffb509f3db15p+5, 0x1.2345d1e090529p+5, 0x1.c96e28eb679f8p+5, + 0x1.da1838053b866p+5, 0x1.be886d9c2324dp+6, 0x1.ab514bfc61c76p+7, + 0x1.14823229799c2p+7, 0x1.48ff1782ca91dp+8, 0x1.dcbfda0c7559ep+8, + 0x1.dcbfda0c7559ep+8, 0x1.2e566149bf5fdp+9, 0x1.cb996c60f437ep+9, + 0x1.119471e9216cdp+10, 0x1.ae945054939c2p+10, 0x1.fffffffffdb6p+24, + 0x1.fd4da4ef37075p+29, 0x1.55202aefde314p+31, 0x1.b951f1572eba5p+31, + 0x1.76e86a7485a46p59, 0x1.7776c2343ba4ep+101, 0x1.85fc0f04c0128p+101, + 0x1.678309fa50d58p+110, 0x1.524489232dc4ap+178, 0x1.fffffffffef4ep+199, + 0x1.6deb37da81129p+205, 0x1.3eec5912ea7cdp+331, 0x1.08087e9aad90bp+887, + 0x1.6ac5b262ca1ffp+843, 0x1.8bb5847d49973p+845, 0x1.6ac5b262ca1ffp+849, + 0x1.f08b14e1c4d0fp+890, 0x1.2b5fe88a9d8d5p+903, 0x1.a880417b7b119p+1023, + 0x1.f6d7518808571p+1023, + }; + constexpr int N = sizeof(INPUTS) / sizeof(INPUTS[0]); + + for (int i = 0; i < N; ++i) { + double x = INPUTS[i]; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tan, x, + LIBC_NAMESPACE::tan(x), 0.5); + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Tan, -x, + LIBC_NAMESPACE::tan(-x), 0.5); } } + +TEST_F(LlvmLibcTanTest, InDoubleRange) { + constexpr uint64_t COUNT = 1'234'51; + uint64_t START = LIBC_NAMESPACE::fputil::FPBits(0x1.0p-50).uintval(); + uint64_t STOP = LIBC_NAMESPACE::fputil::FPBits(0x1.0p200).uintval(); + uint64_t STEP = (STOP - START) / COUNT; + + auto test = [&](mpfr::RoundingMode rounding_mode) { + mpfr::ForceRoundingMode force_rounding(rounding_mode); + if (!force_rounding.success) + return; + + uint64_t fails = 0; + uint64_t tested = 0; + uint64_t total = 0; + double worst_input, worst_output = 0.0; + double ulp = 0.5; + + for (uint64_t i = 0, v = START; i <= COUNT; ++i, v += STEP) { + double x = FPBits(v).get_val(); + if (isnan(x) || isinf(x)) + continue; + + double result = LIBC_NAMESPACE::tan(x); + ++total; + if (isnan(result) || isinf(result)) + continue; + + ++tested; + + if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Tan, x, result, + 0.5, rounding_mode)) { + ++fails; + while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Tan, x, + result, ulp, rounding_mode)) { + worst_input = x; + worst_output = result; + + if (ulp > 1000.0) + break; + + ulp *= 2.0; + } + } + } + if (fails) { + tlog << " Tan failed: " << fails << "/" << tested << "/" << total + << " tests.\n"; + tlog << " Max ULPs is at most: " << static_cast(ulp) << ".\n"; + EXPECT_MPFR_MATCH(mpfr::Operation::Tan, worst_input, worst_output, 0.5, + rounding_mode); + } + }; + + tlog << " Test Rounding To Nearest...\n"; + test(mpfr::RoundingMode::Nearest); + + tlog << " Test Rounding Downward...\n"; + test(mpfr::RoundingMode::Downward); + + tlog << " Test Rounding Upward...\n"; + test(mpfr::RoundingMode::Upward); + + tlog << " Test Rounding Toward Zero...\n"; + test(mpfr::RoundingMode::TowardZero); +}