[libc][math][c23] Add hypotf16 function (#131991)

meltq · web-flow · commit 8078665bca1e · 2025-03-31T10:06:28.000-04:00
Implement hypot for Float16 along with tests.
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -706,6 +706,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.fromfpf16
     libc.src.math.fromfpxf16
     libc.src.math.getpayloadf16
+    libc.src.math.hypotf16
     libc.src.math.ilogbf16
     libc.src.math.iscanonicalf16
     libc.src.math.issignalingf16
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
@@ -305,7 +305,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fsqrt     | N/A              | |check|         |  |check|               | N/A                  | |check|\*              | 7.12.14.6              | F.10.11                    |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| hypot     | |check|          | |check|         |                        |                      |                        | 7.12.7.4               | F.10.4.4                   |
+| hypot     | |check|          | |check|         |                        | |check|              |                        | 7.12.7.4               | F.10.4.4                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | lgamma    |                  |                 |                        |                      |                        | 7.12.8.3               | F.10.5.3                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/include/math.yaml b/libc/include/math.yaml
@@ -1395,6 +1395,14 @@ functions:
     arguments:
       - type: float
       - type: float
+  - name: hypotf16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: _Float16
+      - type: _Float16
+    guard: LIBC_TYPES_HAS_FLOAT16
   - name: ilogb
     standards:
       - stdc
diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h
@@ -30,7 +30,7 @@ LIBC_INLINE T find_leading_one(T mant, int &shift_length) {
   if (mant > 0) {
     shift_length = (sizeof(mant) * 8) - 1 - cpp::countl_zero(mant);
   }
-  return T(1) << shift_length;
+  return static_cast<T>((T(1) << shift_length));
 }
 
 } // namespace internal
@@ -207,8 +207,10 @@ LIBC_INLINE T hypot(T x, T y) {
 
   for (StorageType current_bit = leading_one >> 1; current_bit;
        current_bit >>= 1) {
-    r = (r << 1) + ((tail_bits & current_bit) ? 1 : 0);
-    StorageType tmp = (y_new << 1) + current_bit; // 2*y_new(n - 1) + 2^(-n)
+    r = static_cast<StorageType>((r << 1)) +
+        ((tail_bits & current_bit) ? 1 : 0);
+    StorageType tmp = static_cast<StorageType>((y_new << 1)) +
+                      current_bit; // 2*y_new(n - 1) + 2^(-n)
     if (r >= tmp) {
       r -= tmp;
       y_new += current_bit;
diff --git a/libc/src/__support/FPUtil/cast.h b/libc/src/__support/FPUtil/cast.h
@@ -18,6 +18,9 @@
 
 namespace LIBC_NAMESPACE::fputil {
 
+// TODO: Add optimization for known good targets with fast
+// float to float16 conversion:
+// https://github.com/llvm/llvm-project/issues/133517
 template <typename OutType, typename InType>
 LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_floating_point_v<OutType> &&
                                            cpp::is_floating_point_v<InType>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -313,6 +313,7 @@ add_math_entrypoint_object(getpayloadf128)
 
 add_math_entrypoint_object(hypot)
 add_math_entrypoint_object(hypotf)
+add_math_entrypoint_object(hypotf16)
 
 add_math_entrypoint_object(ilogb)
 add_math_entrypoint_object(ilogbf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -3105,6 +3105,22 @@ add_entrypoint_object(
     libc.src.__support.macros.optimization
 )
 
+add_entrypoint_object(
+  hypotf16
+  SRCS
+    hypotf16.cpp
+  HDRS
+    ../hypotf16.h
+  DEPENDS
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.sqrt
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.types
+)
+
 add_entrypoint_object(
   fdim
   SRCS
diff --git a/libc/src/math/generic/hypotf16.cpp b/libc/src/math/generic/hypotf16.cpp
@@ -0,0 +1,89 @@
+//===-- Implementation of hypotf16 function -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/hypotf16.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// For targets where conversion from float to float16 has to be
+// emulated, fputil::hypot<float16> is faster
+LLVM_LIBC_FUNCTION(float16, hypotf16, (float16 x, float16 y)) {
+  using FloatBits = fputil::FPBits<float>;
+  using FPBits = fputil::FPBits<float16>;
+
+  FPBits x_abs = FPBits(x).abs();
+  FPBits y_abs = FPBits(y).abs();
+
+  bool x_abs_larger = x_abs.uintval() >= y_abs.uintval();
+
+  FPBits a_bits = x_abs_larger ? x_abs : y_abs;
+  FPBits b_bits = x_abs_larger ? y_abs : x_abs;
+
+  uint16_t a_u = a_bits.uintval();
+  uint16_t b_u = b_bits.uintval();
+
+  // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()`
+  // generates extra exponent bit masking instructions on x86-64.
+  if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) {
+    // x or y is inf or nan
+    if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) {
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+    if (a_bits.is_inf() || b_bits.is_inf())
+      return FPBits::inf().get_val();
+    return a_bits.get_val();
+  }
+
+  if (LIBC_UNLIKELY(a_u - b_u >=
+                    static_cast<uint16_t>((FPBits::FRACTION_LEN + 2)
+                                          << FPBits::FRACTION_LEN)))
+    return x_abs.get_val() + y_abs.get_val();
+
+  float af = fputil::cast<float>(a_bits.get_val());
+  float bf = fputil::cast<float>(b_bits.get_val());
+
+  // These squares are exact.
+  float a_sq = af * af;
+  float sum_sq = fputil::multiply_add(bf, bf, a_sq);
+
+  FloatBits result(fputil::sqrt<float>(sum_sq));
+  uint32_t r_u = result.uintval();
+
+  // If any of the sticky bits of the result are non-zero, except the LSB, then
+  // the rounded result is correct.
+  if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0FFE) == 0)) {
+    float r_d = result.get_val();
+
+    // Perform rounding correction.
+    float sum_sq_lo = fputil::multiply_add(bf, bf, a_sq - sum_sq);
+    float err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
+
+    if (err > 0) {
+      r_u |= 1;
+    } else if ((err < 0) && (r_u & 1) == 0) {
+      r_u -= 1;
+    } else if ((r_u & 0x0000'1FFF) == 0) {
+      // The rounded result is exact.
+      fputil::clear_except_if_required(FE_INEXACT);
+    }
+    return fputil::cast<float16>(FloatBits(r_u).get_val());
+  }
+
+  return fputil::cast<float16>(result.get_val());
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/hypotf16.h b/libc/src/math/hypotf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for hypotf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_HYPOTF16_H
+#define LLVM_LIBC_SRC_MATH_HYPOTF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 hypotf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_HYPOTF16_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
@@ -1701,6 +1701,17 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  hypotf16_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    hypotf16_test.cpp
+  DEPENDS
+    libc.src.math.hypotf16
+)
+
 add_fp_unittest(
   nextafter_test
   SUITE
diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h
@@ -73,7 +73,7 @@ class HypotTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest {
     constexpr StorageType COUNT = 10'001;
     for (unsigned scale = 0; scale < 4; ++scale) {
       StorageType max_value = MAX_SUBNORMAL << scale;
-      StorageType step = (max_value - MIN_SUBNORMAL) / COUNT;
+      StorageType step = (max_value - MIN_SUBNORMAL) / COUNT + 1;
       for (int signs = 0; signs < 4; ++signs) {
         for (StorageType v = MIN_SUBNORMAL, w = max_value;
              v <= max_value && w >= MIN_SUBNORMAL; v += step, w -= step) {
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -314,6 +314,24 @@ add_fp_unittest(
     -lpthread
 )
 
+add_fp_unittest(
+  hypotf16_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    hypotf16_test.cpp
+  COMPILE_OPTIONS
+    ${libc_opt_high_flag}
+  DEPENDS
+    .exhaustive_test
+    libc.src.math.hypotf16
+    libc.src.__support.FPUtil.fp_bits
+  LINK_LIBRARIES
+    -lpthread
+)
+
 add_fp_unittest(
   fmod_generic_impl_test
   NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/exhaustive/hypotf16_test.cpp b/libc/test/src/math/exhaustive/hypotf16_test.cpp
@@ -0,0 +1,67 @@
+//===-- Exhaustive test for hypotf16 --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/Hypot.h"
+#include "src/math/hypotf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+struct Hypotf16Checker : public virtual LIBC_NAMESPACE::testing::Test {
+  using FloatType = float16;
+  using FPBits = LIBC_NAMESPACE::fputil::FPBits<float16>;
+  using StorageType = typename FPBits::StorageType;
+
+  uint64_t check(uint16_t x_start, uint16_t x_stop, uint16_t y_start,
+                 uint16_t y_stop, mpfr::RoundingMode rounding) {
+    mpfr::ForceRoundingMode r(rounding);
+    if (!r.success)
+      return true;
+    uint16_t xbits = x_start;
+    uint64_t failed = 0;
+    do {
+      float16 x = FPBits(xbits).get_val();
+      uint16_t ybits = xbits;
+      do {
+        float16 y = FPBits(ybits).get_val();
+        bool correct = TEST_FP_EQ(LIBC_NAMESPACE::fputil::hypot<float16>(x, y),
+                                  LIBC_NAMESPACE::hypotf16(x, y));
+        // Using MPFR will be much slower.
+        // mpfr::BinaryInput<float16> input{x, y};
+        // bool correct = TEST_MPFR_MATCH_ROUNDING_SILENTLY(
+        //  mpfr::Operation::Hypot, input, LIBC_NAMESPACE::hypotf16(x, y),
+        // 0.5,
+        //  rounding);
+        failed += (!correct);
+      } while (ybits++ < y_stop);
+    } while (xbits++ < x_stop);
+    return failed;
+  }
+};
+
+using LlvmLibcHypotf16ExhaustiveTest =
+    LlvmLibcExhaustiveMathTest<Hypotf16Checker, 1 << 2>;
+
+// Range of both inputs: [0, inf]
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7C00U;
+
+TEST_F(LlvmLibcHypotf16ExhaustiveTest, PositiveRange) {
+  test_full_range_all_roundings(POS_START, POS_STOP, POS_START, POS_STOP);
+}
+
+// Range of both inputs: [-0, -inf]
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xFC00U;
+
+TEST_F(LlvmLibcHypotf16ExhaustiveTest, NegativeRange) {
+  test_full_range_all_roundings(NEG_START, NEG_STOP, NEG_START, NEG_STOP);
+}
diff --git a/libc/test/src/math/hypotf16_test.cpp b/libc/test/src/math/hypotf16_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for hypotf16 --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "HypotTest.h"
+
+#include "src/math/hypotf16.h"
+
+using LlvmLibcHypotf16Test = HypotTestTemplate<float16>;
+
+TEST_F(LlvmLibcHypotf16Test, SubnormalRange) {
+  test_subnormal_range(&LIBC_NAMESPACE::hypotf16);
+}
+
+TEST_F(LlvmLibcHypotf16Test, NormalRange) {
+  test_normal_range(&LIBC_NAMESPACE::hypotf16);
+}
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -340,6 +340,18 @@ add_perf_binary(
     -fno-builtin
 )
 
+add_perf_binary(
+  hypotf16_perf
+  SRCS
+    hypotf16_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    libc.src.math.hypotf16
+    libc.src.__support.FPUtil.fp_bits
+  COMPILE_OPTIONS
+    -fno-builtin
+)
+
 add_perf_binary(
   hypotf_perf
   SRCS
diff --git a/libc/test/src/math/performance_testing/hypotf16_perf.cpp b/libc/test/src/math/performance_testing/hypotf16_perf.cpp
@@ -0,0 +1,16 @@
+//===-- Differential test for hypotf16 ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+
+#include "src/__support/FPUtil/Hypot.h"
+#include "src/math/hypotf16.h"
+
+BINARY_OP_SINGLE_OUTPUT_PERF(float16, float16, LIBC_NAMESPACE::hypotf16,
+                             LIBC_NAMESPACE::fputil::hypot<float16>,
+                             "hypotf16_perf.log")
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
diff --git a/libc/test/src/math/smoke/hypotf16_test.cpp b/libc/test/src/math/smoke/hypotf16_test.cpp