From 6362d4e13ca18535adcafd0bf66e5d56a77c478d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 11:47:44 +0000
Subject: [PATCH 1/3] Add extensive and exhaustive tests

Add a generator that will test all inputs for input spaces `u32::MAX` or
smaller (e.g. single-argument `f32` routines).  For anything larger,
still run approximately `u32::MAX` tests, but distribute inputs evenly
across the function domain.

Since we often only want to run one of these tests at a time, this
implementation parallelizes within each test using `rayon`. A custom
test runner is used so a progress bar is possible.

Specific tests must be enabled by setting the `LIBM_EXTENSIVE_TESTS`
environment variable, e.g.

    LIBM_EXTENSIVE_TESTS=all_f16,cos,cosf cargo run ...

Testing on a recent machine, most tests take about two minutes or less.
The Bessel functions are quite slow and take closer to 10 minutes, and
FMA is increased to run for about the same.
---
 crates/libm-test/Cargo.toml                |   9 +
 crates/libm-test/src/gen.rs                |   1 +
 crates/libm-test/src/gen/extensive.rs      | 153 ++++++++++++++
 crates/libm-test/src/gen/random.rs         |   4 +-
 crates/libm-test/src/lib.rs                |   4 +-
 crates/libm-test/src/num.rs                |   2 +-
 crates/libm-test/src/run_cfg.rs            |  37 +++-
 crates/libm-test/tests/z_extensive/main.rs |  14 ++
 crates/libm-test/tests/z_extensive/run.rs  | 234 +++++++++++++++++++++
 9 files changed, 450 insertions(+), 8 deletions(-)
 create mode 100644 crates/libm-test/src/gen/extensive.rs
 create mode 100644 crates/libm-test/tests/z_extensive/main.rs
 create mode 100644 crates/libm-test/tests/z_extensive/run.rs

diff --git a/crates/libm-test/Cargo.toml b/crates/libm-test/Cargo.toml
index 9b3ab5c53..69e96034e 100644
--- a/crates/libm-test/Cargo.toml
+++ b/crates/libm-test/Cargo.toml
@@ -26,12 +26,14 @@ short-benchmarks = []
 [dependencies]
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
+indicatif = { version = "0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
 musl-math-sys = { path = "../musl-math-sys", optional = true }
 paste = "1.0.15"
 rand = "0.8.5"
 rand_chacha = "0.3.1"
+rayon = "1.10.0"
 rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] }
 
 [target.'cfg(target_family = "wasm")'.dependencies]
@@ -43,11 +45,18 @@ rand = { version = "0.8.5", optional = true }
 
 [dev-dependencies]
 criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
+libtest-mimic = "0.8.1"
 
 [[bench]]
 name = "random"
 harness = false
 
+[[test]]
+# No harness so that we can skip tests at runtime based on env. Prefixed with
+# `z` so these tests get run last.
+name = "z_extensive"
+harness = false
+
 [lints.rust]
 # Values from the chared config.rs used by `libm` but not the test crate
 unexpected_cfgs = { level = "warn", check-cfg = [
diff --git a/crates/libm-test/src/gen.rs b/crates/libm-test/src/gen.rs
index 83e00f31d..e2bfcdf34 100644
--- a/crates/libm-test/src/gen.rs
+++ b/crates/libm-test/src/gen.rs
@@ -2,6 +2,7 @@
 
 pub mod domain_logspace;
 pub mod edge_cases;
+pub mod extensive;
 pub mod random;
 
 /// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure
diff --git a/crates/libm-test/src/gen/extensive.rs b/crates/libm-test/src/gen/extensive.rs
new file mode 100644
index 000000000..d8b991b2a
--- /dev/null
+++ b/crates/libm-test/src/gen/extensive.rs
@@ -0,0 +1,153 @@
+use std::fmt;
+use std::ops::RangeInclusive;
+
+use libm::support::MinInt;
+
+use crate::domain::HasDomain;
+use crate::gen::KnownSize;
+use crate::op::OpITy;
+use crate::run_cfg::{int_range, iteration_count};
+use crate::{CheckCtx, GeneratorKind, MathOp, logspace};
+
+/// Generate a sequence of inputs that either cover the domain in completeness (for smaller float
+/// types and single argument functions) or provide evenly spaced inputs across the domain with
+/// approximately `u32::MAX` total iterations.
+pub trait ExtensiveInput<Op> {
+    fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> + Send;
+}
+
+/// Construct an iterator from `logspace` and also calculate the total number of steps expected
+/// for that iterator.
+fn logspace_steps<Op>(
+    start: Op::FTy,
+    end: Op::FTy,
+    ctx: &CheckCtx,
+    argnum: usize,
+) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
+where
+    Op: MathOp,
+    OpITy<Op>: TryFrom<u64, Error: fmt::Debug>,
+    RangeInclusive<OpITy<Op>>: Iterator,
+{
+    let max_steps = iteration_count(ctx, GeneratorKind::Extensive, argnum);
+    let max_steps = OpITy::<Op>::try_from(max_steps).unwrap_or(OpITy::<Op>::MAX);
+    let iter = logspace(start, end, max_steps);
+
+    // `logspace` can't implement `ExactSizeIterator` because of the range, but its size hint
+    // should be accurate (assuming <= usize::MAX iterations).
+    let size_hint = iter.size_hint();
+    assert_eq!(size_hint.0, size_hint.1.unwrap());
+
+    (iter, size_hint.0.try_into().unwrap())
+}
+
+macro_rules! impl_extensive_input {
+    ($fty:ty) => {
+        impl<Op> ExtensiveInput<Op> for ($fty,)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: HasDomain<Op::FTy>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = Op::DOMAIN.range_start();
+                let end = Op::DOMAIN.range_end();
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let iter0 = iter0.map(|v| (v,));
+                KnownSize::new(iter0, steps0)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for ($fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for ($fty, $fty, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
+                let (iter2, steps2) = logspace_steps::<Op>(start, end, ctx, 2);
+
+                let iter = iter0
+                    .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
+                    .flat_map(move |(first, second)| {
+                        iter2.clone().map(move |third| (first, second, third))
+                    });
+                let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for (i32, $fty)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+
+                let iter0 = int_range(ctx, GeneratorKind::Extensive, 0);
+                let steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+                let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+
+        impl<Op> ExtensiveInput<Op> for ($fty, i32)
+        where
+            Op: MathOp<RustArgs = Self, FTy = $fty>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
+                let start = <$fty>::NEG_INFINITY;
+                let end = <$fty>::INFINITY;
+
+                let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
+                let iter1 = int_range(ctx, GeneratorKind::Extensive, 0);
+                let steps1 = iteration_count(ctx, GeneratorKind::Extensive, 0);
+
+                let iter =
+                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
+                let count = steps0.checked_mul(steps1).unwrap();
+
+                KnownSize::new(iter, count)
+            }
+        }
+    };
+}
+
+impl_extensive_input!(f32);
+impl_extensive_input!(f64);
+
+/// Create a test case iterator for extensive inputs.
+pub fn get_test_cases<Op>(
+    ctx: &CheckCtx,
+) -> impl ExactSizeIterator<Item = Op::RustArgs> + Send + use<'_, Op>
+where
+    Op: MathOp,
+    Op::RustArgs: ExtensiveInput<Op>,
+{
+    Op::RustArgs::get_cases(ctx)
+}
diff --git a/crates/libm-test/src/gen/random.rs b/crates/libm-test/src/gen/random.rs
index 6df944317..29a9dcd2b 100644
--- a/crates/libm-test/src/gen/random.rs
+++ b/crates/libm-test/src/gen/random.rs
@@ -86,7 +86,7 @@ macro_rules! impl_random_input {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
                 let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
                 let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
-                let range0 = int_range(ctx, 0);
+                let range0 = int_range(ctx, GeneratorKind::Random, 0);
                 let iter = random_ints(count0, range0)
                     .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
                 KnownSize::new(iter, count0 * count1)
@@ -97,7 +97,7 @@ macro_rules! impl_random_input {
             fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
                 let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
                 let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
-                let range1 = int_range(ctx, 1);
+                let range1 = int_range(ctx, GeneratorKind::Random, 1);
                 let iter = random_floats(count0).flat_map(move |f1: $fty| {
                     random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
                 });
diff --git a/crates/libm-test/src/lib.rs b/crates/libm-test/src/lib.rs
index 8a4e782df..a940db1d2 100644
--- a/crates/libm-test/src/lib.rs
+++ b/crates/libm-test/src/lib.rs
@@ -25,7 +25,8 @@ pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, logspace};
 pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
-pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind};
+use run_cfg::EXTENSIVE_MAX_ITERATIONS;
+pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
 pub use test_traits::{CheckOutput, Hex, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
@@ -85,6 +86,7 @@ pub fn test_log(s: &str) {
         writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap();
         writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap();
         writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap();
+        writeln!(f, "extensive iterations {}", *EXTENSIVE_MAX_ITERATIONS).unwrap();
 
         Some(f)
     });
diff --git a/crates/libm-test/src/num.rs b/crates/libm-test/src/num.rs
index eff2fbc1f..f693ef02f 100644
--- a/crates/libm-test/src/num.rs
+++ b/crates/libm-test/src/num.rs
@@ -215,7 +215,7 @@ fn as_ulp_steps<F: Float>(x: F) -> Option<F::SignedInt> {
 /// to logarithmic spacing of their values.
 ///
 /// Note that this tends to skip negative zero, so that needs to be checked explicitly.
-pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F>
+pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F> + Clone
 where
     RangeInclusive<F::Int>: Iterator,
 {
diff --git a/crates/libm-test/src/run_cfg.rs b/crates/libm-test/src/run_cfg.rs
index 9cede0cc7..48a654caa 100644
--- a/crates/libm-test/src/run_cfg.rs
+++ b/crates/libm-test/src/run_cfg.rs
@@ -10,6 +10,22 @@ use crate::{BaseName, FloatTy, Identifier, test_log};
 /// The environment variable indicating which extensive tests should be run.
 pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
 
+/// Specify the number of iterations via this environment variable, rather than using the default.
+pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS";
+
+/// Maximum number of iterations to run for a single routine.
+///
+/// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines
+/// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly
+/// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple
+/// hours.
+pub static EXTENSIVE_MAX_ITERATIONS: LazyLock<u64> = LazyLock::new(|| {
+    let default = 1 << 32;
+    env::var(EXTENSIVE_ITER_ENV)
+        .map(|v| v.parse().expect("failed to parse iteration count"))
+        .unwrap_or(default)
+});
+
 /// Context passed to [`CheckOutput`].
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct CheckCtx {
@@ -54,6 +70,7 @@ pub enum CheckBasis {
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum GeneratorKind {
     Domain,
+    Extensive,
     Random,
 }
 
@@ -171,8 +188,14 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     let mut total_iterations = match gen_kind {
         GeneratorKind::Domain => domain_iter_count,
         GeneratorKind::Random => random_iter_count,
+        GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS,
     };
 
+    // FMA has a huge domain but is reasonably fast to run, so increase iterations.
+    if ctx.base_name == BaseName::Fma {
+        total_iterations *= 4;
+    }
+
     if cfg!(optimizations_enabled) {
         // Always run at least 10,000 tests.
         total_iterations = total_iterations.max(10_000);
@@ -191,7 +214,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
     let total = ntests.pow(t_env.input_count.try_into().unwrap());
 
     let seed_msg = match gen_kind {
-        GeneratorKind::Domain => String::new(),
+        GeneratorKind::Domain | GeneratorKind::Extensive => String::new(),
         GeneratorKind::Random => {
             format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap())
         }
@@ -210,7 +233,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
 }
 
 /// Some tests require that an integer be kept within reasonable limits; generate that here.
-pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
+pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> RangeInclusive<i32> {
     let t_env = TestEnv::from_env(ctx);
 
     if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) {
@@ -221,10 +244,17 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
 
     // The integer argument to `jn` is an iteration count. Limit this to ensure tests can be
     // completed in a reasonable amount of time.
-    if t_env.slow_platform || !cfg!(optimizations_enabled) {
+    let non_extensive_range = if t_env.slow_platform || !cfg!(optimizations_enabled) {
         (-0xf)..=0xff
     } else {
         (-0xff)..=0xffff
+    };
+
+    let extensive_range = (-0xfff)..=0xfffff;
+
+    match gen_kind {
+        GeneratorKind::Extensive => extensive_range,
+        GeneratorKind::Domain | GeneratorKind::Random => non_extensive_range,
     }
 }
 
@@ -241,7 +271,6 @@ pub fn check_near_count(_ctx: &CheckCtx) -> u64 {
 }
 
 /// Check whether extensive actions should be run or skipped.
-#[expect(dead_code, reason = "extensive tests have not yet been added")]
 pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
     let t_env = TestEnv::from_env(ctx);
     !t_env.should_run_extensive
diff --git a/crates/libm-test/tests/z_extensive/main.rs b/crates/libm-test/tests/z_extensive/main.rs
new file mode 100644
index 000000000..3a2af88bd
--- /dev/null
+++ b/crates/libm-test/tests/z_extensive/main.rs
@@ -0,0 +1,14 @@
+//! `main` is just a wrapper to handle configuration.
+
+#[cfg(not(feature = "test-multiprecision"))]
+fn main() {
+    eprintln!("multiprecision not enabled; skipping extensive tests");
+}
+
+#[cfg(feature = "test-multiprecision")]
+mod run;
+
+#[cfg(feature = "test-multiprecision")]
+fn main() {
+    run::run();
+}
diff --git a/crates/libm-test/tests/z_extensive/run.rs b/crates/libm-test/tests/z_extensive/run.rs
new file mode 100644
index 000000000..7acff5324
--- /dev/null
+++ b/crates/libm-test/tests/z_extensive/run.rs
@@ -0,0 +1,234 @@
+//! Exhaustive tests for `f16` and `f32`, high-iteration for `f64` and `f128`.
+
+use std::fmt;
+use std::io::{self, IsTerminal};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::Duration;
+
+use indicatif::{ProgressBar, ProgressStyle};
+use libm_test::gen::extensive::{self, ExtensiveInput};
+use libm_test::mpfloat::MpOp;
+use libm_test::{
+    CheckBasis, CheckCtx, CheckOutput, MathOp, TestResult, TupleCall, skip_extensive_test,
+};
+use libtest_mimic::{Arguments, Trial};
+use rayon::prelude::*;
+
+/// Run the extensive test suite.
+pub fn run() {
+    let mut args = Arguments::from_args();
+    // Prevent multiple tests from running in parallel, each test gets parallized internally.
+    args.test_threads = Some(1);
+    let tests = register_all_tests();
+
+    // With default parallelism, the CPU doesn't saturate. We don't need to be nice to
+    // other processes, so do 1.5x to make sure we use all available resources.
+    let threads = std::thread::available_parallelism().map(Into::into).unwrap_or(0) * 3 / 2;
+    rayon::ThreadPoolBuilder::new().num_threads(threads).build_global().unwrap();
+
+    libtest_mimic::run(&args, tests).exit();
+}
+
+macro_rules! mp_extensive_tests {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($attr:meta),*],
+        extra: [$push_to:ident],
+    ) => {
+        $(#[$attr])*
+        register_single_test::<libm_test::op::$fn_name::Routine>(&mut $push_to);
+    };
+}
+
+/// Create a list of tests for consumption by `libtest_mimic`.
+fn register_all_tests() -> Vec<Trial> {
+    let mut all_tests = Vec::new();
+
+    libm_macros::for_each_function! {
+        callback: mp_extensive_tests,
+        extra: [all_tests],
+        skip: [
+            // FIXME: MPFR tests needed
+            remquo,
+            remquof,
+
+            // FIXME: test needed, see
+            // https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
+            nextafter,
+            nextafterf,
+        ],
+    }
+
+    all_tests
+}
+
+/// Add a single test to the list.
+fn register_single_test<Op>(all: &mut Vec<Trial>)
+where
+    Op: MathOp + MpOp,
+    Op::RustArgs: ExtensiveInput<Op> + Send,
+{
+    let test_name = format!("mp_extensive_{}", Op::NAME);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
+    let skip = skip_extensive_test(&ctx);
+
+    let runner = move || {
+        if !cfg!(optimizations_enabled) {
+            panic!("extensive tests should be run with --release");
+        }
+
+        let res = run_single_test::<Op>();
+        let e = match res {
+            Ok(()) => return Ok(()),
+            Err(e) => e,
+        };
+
+        // Format with the `Debug` implementation so we get the error cause chain, and print it
+        // here so we see the result immediately (rather than waiting for all tests to conclude).
+        let e = format!("{e:?}");
+        eprintln!("failure testing {}:{e}\n", Op::IDENTIFIER);
+
+        Err(e.into())
+    };
+
+    all.push(Trial::test(test_name, runner).with_ignored_flag(skip));
+}
+
+/// Test runner for a signle routine.
+fn run_single_test<Op>() -> TestResult
+where
+    Op: MathOp + MpOp,
+    Op::RustArgs: ExtensiveInput<Op> + Send,
+{
+    // Small delay before printing anything so other output from the runner has a chance to flush.
+    std::thread::sleep(Duration::from_millis(500));
+    eprintln!();
+
+    let completed = AtomicU64::new(0);
+    let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
+    let cases = &mut extensive::get_test_cases::<Op>(&ctx);
+    let total: u64 = cases.len().try_into().unwrap();
+    let pb = Progress::new(Op::NAME, total);
+
+    let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec<Op::RustArgs>| -> TestResult {
+        for input in input_vec {
+            // Test the input.
+            let mp_res = Op::run(mp_vals, input);
+            let crate_res = input.call(Op::ROUTINE);
+            crate_res.validate(mp_res, input, &ctx)?;
+
+            let completed = completed.fetch_add(1, Ordering::Relaxed) + 1;
+            pb.update(completed, input);
+        }
+
+        Ok(())
+    };
+
+    // Chunk the cases so Rayon doesn't switch threads between each iterator item. 50k seems near
+    // a performance sweet spot. Ideally we would reuse these allocations rather than discarding,
+    // but that is difficult with Rayon's API.
+    let chunk_size = 50_000;
+    let chunks = std::iter::from_fn(move || {
+        let mut v = Vec::with_capacity(chunk_size);
+        v.extend(cases.take(chunk_size));
+        (!v.is_empty()).then_some(v)
+    });
+
+    // Run the actual tests
+    let res = chunks.par_bridge().try_for_each_init(Op::new_mp, test_single_chunk);
+
+    let real_total = completed.load(Ordering::Relaxed);
+    pb.complete(real_total);
+
+    if res.is_ok() && real_total != total {
+        // Provide a warning if our estimate needs to be updated.
+        panic!("total run {real_total} does not match expected {total}");
+    }
+
+    res
+}
+
+/// Wrapper around a `ProgressBar` that handles styles and non-TTY messages.
+struct Progress {
+    pb: ProgressBar,
+    name_padded: String,
+    final_style: ProgressStyle,
+    is_tty: bool,
+}
+
+impl Progress {
+    const PB_TEMPLATE: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
+        {human_pos:>13}/{human_len:13} {per_sec:18} eta {eta:8} {msg}";
+    const PB_TEMPLATE_FINAL: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
+        {human_pos:>13}/{human_len:13} {per_sec:18} done in {elapsed_precise}";
+
+    fn new(name: &str, total: u64) -> Self {
+        eprintln!("starting extensive tests for `{name}`");
+        let name_padded = format!("{name:9}");
+        let is_tty = io::stderr().is_terminal();
+
+        let initial_style =
+            ProgressStyle::with_template(&Self::PB_TEMPLATE.replace("NAME", &name_padded))
+                .unwrap()
+                .progress_chars("##-");
+
+        let final_style =
+            ProgressStyle::with_template(&Self::PB_TEMPLATE_FINAL.replace("NAME", &name_padded))
+                .unwrap()
+                .progress_chars("##-");
+
+        let pb = ProgressBar::new(total);
+        pb.set_style(initial_style);
+
+        Self { pb, final_style, name_padded, is_tty }
+    }
+
+    fn update(&self, completed: u64, input: impl fmt::Debug) {
+        // Infrequently update the progress bar.
+        if completed % 20_000 == 0 {
+            self.pb.set_position(completed);
+        }
+
+        if completed % 500_000 == 0 {
+            self.pb.set_message(format!("input: {input:<24?}"));
+        }
+
+        if !self.is_tty && completed % 5_000_000 == 0 {
+            let len = self.pb.length().unwrap_or_default();
+            eprintln!(
+                "[{elapsed:3?}s {percent:3.0}%] {name} \
+                {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s eta {eta:4}s {input:<24?}",
+                elapsed = self.pb.elapsed().as_secs(),
+                percent = completed as f32 * 100.0 / len as f32,
+                name = self.name_padded,
+                human_pos = completed,
+                human_len = len,
+                per_sec = self.pb.per_sec(),
+                eta = self.pb.eta().as_secs()
+            );
+        }
+    }
+
+    fn complete(self, real_total: u64) {
+        self.pb.set_style(self.final_style);
+        self.pb.set_position(real_total);
+        self.pb.abandon();
+
+        if !self.is_tty {
+            let len = self.pb.length().unwrap_or_default();
+            eprintln!(
+                "[{elapsed:3}s {percent:3.0}%] {name} \
+                {human_pos:>10}/{human_len:<10} {per_sec:14.2}/s done in {elapsed_precise}",
+                elapsed = self.pb.elapsed().as_secs(),
+                percent = real_total as f32 * 100.0 / len as f32,
+                name = self.name_padded,
+                human_pos = real_total,
+                human_len = len,
+                per_sec = self.pb.per_sec(),
+                elapsed_precise = self.pb.elapsed().as_secs(),
+            );
+        }
+
+        eprintln!();
+    }
+}

From f2397e5e9b2e8e7590e126ccdb800374e730dba1 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Sun, 22 Dec 2024 11:47:53 +0000
Subject: [PATCH 2/3] Update precision based on failures from extensive tests

---
 crates/libm-test/src/precision.rs | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/crates/libm-test/src/precision.rs b/crates/libm-test/src/precision.rs
index 4a6ca8af7..696bb3735 100644
--- a/crates/libm-test/src/precision.rs
+++ b/crates/libm-test/src/precision.rs
@@ -55,7 +55,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         Bn::Asin => 1,
         Bn::Asinh => 2,
         Bn::Atan => 1,
-        Bn::Atan2 => 1,
+        Bn::Atan2 => 2,
         Bn::Atanh => 2,
         Bn::Cbrt => 1,
         Bn::Cos => 1,
@@ -187,6 +187,20 @@ impl MaybeOverride<(f32,)> for SpecialCase {
             return XFAIL;
         }
 
+        if (ctx.base_name == BaseName::Lgamma || ctx.base_name == BaseName::LgammaR)
+            && input.0 > 4e36
+            && expected.is_infinite()
+            && !actual.is_infinite()
+        {
+            // This result should saturate but we return a finite value.
+            return XFAIL;
+        }
+
+        if ctx.base_name == BaseName::J0 && input.0 < -1e34 {
+            // Errors get huge close to -inf
+            return XFAIL;
+        }
+
         maybe_check_nan_bits(actual, expected, ctx)
     }
 
@@ -248,6 +262,11 @@ impl MaybeOverride<(f64,)> for SpecialCase {
             return XFAIL;
         }
 
+        if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
+            // Errors get huge close to -inf
+            return XFAIL;
+        }
+
         maybe_check_nan_bits(actual, expected, ctx)
     }
 
@@ -364,6 +383,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
         }
     }
 }
+
 impl MaybeOverride<(i32, f64)> for SpecialCase {
     fn check_float<F: Float>(
         input: (i32, f64),

From 9b08ee52b09c8717deb7c3c1e9f3c8fff0039c77 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 2 Jan 2025 10:25:27 +0000
Subject: [PATCH 3/3] Run extensive tests in CI when relevant files change

Add a CI job with a dynamically calculated matrix that runs extensive
jobs on changed files. This makes use of the new
`function-definitions.json` file to determine which changed files
require full tests for a routine to run.
---
 .github/workflows/main.yml        |  59 +++++++++++-
 ci/calculate-exhaustive-matrix.py | 148 ++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+), 1 deletion(-)
 create mode 100755 ci/calculate-exhaustive-matrix.py

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 023ec58c0..1b2fd12ba 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -14,7 +14,7 @@ env:
 jobs:
   test:
     name: Build and test
-    timeout-minutes: 20
+    timeout-minutes: 25
     strategy:
       fail-fast: false
       matrix:
@@ -186,6 +186,62 @@ jobs:
         rustup component add rustfmt
     - run: cargo fmt -- --check
 
+  # Determine which extensive tests should be run based on changed files.
+  calculate_extensive_matrix:
+    name: Calculate job matrix
+    runs-on: ubuntu-24.04
+    outputs:
+      matrix: ${{ steps.script.outputs.matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 100
+      - name: Fetch pull request ref
+        run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
+      - run: python3 ci/calculate-exhaustive-matrix.py >> "$GITHUB_OUTPUT"
+        id: script
+
+  extensive:
+    name: Extensive tests for ${{ matrix.ty }}
+    needs:
+      # Wait on `clippy` so we have some confidence that the crate will build
+      - clippy
+      - calculate_extensive_matrix
+    runs-on: ubuntu-24.04
+    timeout-minutes: 80
+    strategy:
+      matrix:
+        # Use the output from `calculate_extensive_matrix` to calculate the matrix
+        # FIXME: it would be better to run all jobs (i.e. all types) but mark those that
+        # didn't change as skipped, rather than completely excluding the job. However,
+        # this is not currently possible https://github.com/actions/runner/issues/1985.
+        include: ${{ fromJSON(needs.calculate_extensive_matrix.outputs.matrix).matrix }}
+    env:
+      CHANGED: ${{ matrix.changed }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install Rust
+        run: |
+          rustup update nightly --no-self-update
+          rustup default nightly
+      - uses: Swatinem/rust-cache@v2
+      - name: Download musl source
+        run: ./ci/download-musl.sh
+      - name: Run extensive tests
+        run: |
+          echo "Changed: '$CHANGED'"
+          if [ -z "$CHANGED" ]; then
+            echo "No tests to run, exiting."
+            exit
+          fi
+
+          LIBM_EXTENSIVE_TESTS="$CHANGED" cargo t \
+            --features test-multiprecision,unstable \
+            --release -- extensive
+      - name: Print test logs if available
+        run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+        shell: bash
+
   success:
     needs:
       - test
@@ -193,6 +249,7 @@ jobs:
       - benchmarks
       - msrv
       - rustfmt
+      - extensive
     runs-on: ubuntu-24.04
     # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
     # failed" as success. So we have to do some contortions to ensure the job fails if any of its
diff --git a/ci/calculate-exhaustive-matrix.py b/ci/calculate-exhaustive-matrix.py
new file mode 100755
index 000000000..8b42f9389
--- /dev/null
+++ b/ci/calculate-exhaustive-matrix.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""Calculate which exhaustive tests should be run as part of CI.
+
+This dynamically prepares a list of routines that had a source file change based on
+git history.
+"""
+
+import subprocess as sp
+import sys
+import json
+from dataclasses import dataclass
+from os import getenv
+from pathlib import Path
+from typing import TypedDict
+
+
+REPO_ROOT = Path(__file__).parent.parent
+GIT = ["git", "-C", REPO_ROOT]
+
+# Don't run exhaustive tests if these files change, even if they contaiin a function
+# definition.
+IGNORE_FILES = [
+    "src/math/support/",
+    "src/libm_helper.rs",
+    "src/math/arch/intrinsics.rs",
+]
+
+TYPES = ["f16", "f32", "f64", "f128"]
+
+
+class FunctionDef(TypedDict):
+    """Type for an entry in `function-definitions.json`"""
+
+    sources: list[str]
+    type: str
+
+
+@dataclass
+class Context:
+    gh_ref: str | None
+    changed: list[Path]
+    defs: dict[str, FunctionDef]
+
+    def __init__(self) -> None:
+        self.gh_ref = getenv("GITHUB_REF")
+        self.changed = []
+        self._init_change_list()
+
+        with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f:
+            defs = json.load(f)
+
+        defs.pop("__comment", None)
+        self.defs = defs
+
+    def _init_change_list(self):
+        """Create a list of files that have been changed. This uses GITHUB_REF if
+        available, otherwise a diff between `HEAD` and `master`.
+        """
+
+        # For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being
+        # the PR number), and sets this as `GITHUB_REF`.
+        ref = self.gh_ref
+        eprint(f"using ref `{ref}`")
+        if ref is None or "merge" not in ref:
+            # If the ref is not for `merge` then we are not in PR CI
+            eprint("No diff available for ref")
+            return
+
+        # The ref is for a dummy merge commit. We can extract the merge base by
+        # inspecting all parents (`^@`).
+        merge_sha = sp.check_output(
+            GIT + ["show-ref", "--hash", ref], text=True
+        ).strip()
+        merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True)
+        eprint(f"Merge:\n{merge_log}\n")
+
+        parents = (
+            sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True)
+            .strip()
+            .splitlines()
+        )
+        assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}"
+        base = parents[0].strip()
+        incoming = parents[1].strip()
+
+        eprint(f"base: {base}, incoming: {incoming}")
+        textlist = sp.check_output(
+            GIT + ["diff", base, incoming, "--name-only"], text=True
+        )
+        self.changed = [Path(p) for p in textlist.splitlines()]
+
+    @staticmethod
+    def _ignore_file(fname: str) -> bool:
+        return any(fname.startswith(pfx) for pfx in IGNORE_FILES)
+
+    def changed_routines(self) -> dict[str, list[str]]:
+        """Create a list of routines for which one or more files have been updated,
+        separated by type.
+        """
+        routines = set()
+        for name, meta in self.defs.items():
+            # Don't update if changes to the file should be ignored
+            sources = (f for f in meta["sources"] if not self._ignore_file(f))
+
+            # Select changed files
+            changed = [f for f in sources if Path(f) in self.changed]
+
+            if len(changed) > 0:
+                eprint(f"changed files for {name}: {changed}")
+                routines.add(name)
+
+        ret = {}
+        for r in sorted(routines):
+            ret.setdefault(self.defs[r]["type"], []).append(r)
+
+        return ret
+
+    def make_workflow_output(self) -> str:
+        """Create a JSON object a list items for each type's changed files, if any
+        did change, and the routines that were affected by the change.
+        """
+        changed = self.changed_routines()
+        ret = []
+        for ty in TYPES:
+            ty_changed = changed.get(ty, [])
+            item = {
+                "ty": ty,
+                "changed": ",".join(ty_changed),
+            }
+            ret.append(item)
+        output = json.dumps({"matrix": ret}, separators=(",", ":"))
+        eprint(f"output: {output}")
+        return output
+
+
+def eprint(*args, **kwargs):
+    """Print to stderr."""
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def main():
+    ctx = Context()
+    output = ctx.make_workflow_output()
+    print(f"matrix={output}")
+
+
+if __name__ == "__main__":
+    main()