diff --git a/ci/run.sh b/ci/run.sh index 2512dc633..dcbe1caf4 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -4,7 +4,9 @@ set -eux target="${1:-}" -if [ -z "${1:-}" ]; then +export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" + +if [ -z "$target" ]; then host_target=$(rustc -vV | awk '/^host/ { print $2 }') echo "Defaulted to host target $host_target" target="$host_target" @@ -30,6 +32,8 @@ else $run --features no-asm --release $run --features no-f16-f128 $run --features no-f16-f128 --release + $run --benches + $run --benches --release fi if [ "${TEST_VERBATIM:-}" = "1" ]; then diff --git a/testcrate/Cargo.toml b/testcrate/Cargo.toml index 1de0c3976..6b5c4cf48 100644 --- a/testcrate/Cargo.toml +++ b/testcrate/Cargo.toml @@ -21,6 +21,10 @@ path = ".." default-features = false features = ["public-test-deps"] +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +paste = "1.0.15" + [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] test = { git = "https://github.com/japaric/utest" } utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" } @@ -34,6 +38,48 @@ no-f16-f128 = ["compiler_builtins/no-f16-f128"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] # Skip tests that rely on f128 symbols being available on the system -no-sys-f128 = ["no-sys-f128-int-convert"] +no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] # Some platforms have some f128 functions but everything except integer conversions no-sys-f128-int-convert = [] +no-sys-f16-f128-convert = [] +# Skip tests that rely on f16 symbols being available on the system +no-sys-f16 = [] + +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +[[bench]] +name = "float_add" +harness = false + +[[bench]] +name = "float_sub" +harness = false + +[[bench]] +name = "float_mul" +harness = false + +[[bench]] +name = "float_div" +harness = false + +[[bench]] +name = "float_cmp" +harness = false + +[[bench]] +name = "float_conv" +harness = false + +[[bench]] +name = "float_extend" +harness = false + +[[bench]] +name = "float_trunc" +harness = false + +[[bench]] +name = "float_pow" +harness = false diff --git a/testcrate/bench-175b45d1-aarch64-macos.txt b/testcrate/bench-175b45d1-aarch64-macos.txt new file mode 100644 index 000000000..e79bbe368 --- /dev/null +++ b/testcrate/bench-175b45d1-aarch64-macos.txt @@ -0,0 +1,500 @@ + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + +add_f32 compiler-builtins + time: [35.804 µs 35.863 µs 35.920 µs] +Found 5 outliers among 100 measurements (5.00%) + 2 (2.00%) high mild + 3 (3.00%) high severe + +add_f32 system time: [39.084 µs 39.127 µs 39.169 µs] +Found 11 outliers among 100 measurements (11.00%) + 7 (7.00%) high mild + 4 (4.00%) high severe + +add_f32 assembly (aarch64 unix) + time: [8.1034 µs 8.1441 µs 8.1866 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +add_f64 compiler-builtins + time: [35.647 µs 35.725 µs 35.799 µs] +Found 10 outliers among 100 measurements (10.00%) + 8 (8.00%) high mild + 2 (2.00%) high severe + +add_f64 system time: [39.308 µs 39.322 µs 39.336 µs] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +add_f64 assembly (aarch64 unix) + time: [8.0401 µs 8.0442 µs 8.0499 µs] +Found 11 outliers among 100 measurements (11.00%) + 2 (2.00%) high mild + 9 (9.00%) high severe + +add_f128 compiler-builtins + time: [41.801 µs 41.986 µs 42.201 µs] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +cmp_f32_gt compiler-builtins + time: [13.579 µs 13.675 µs 13.778 µs] +Found 16 outliers among 100 measurements (16.00%) + 6 (6.00%) high mild + 10 (10.00%) high severe + +cmp_f32_gt system time: [12.343 µs 12.348 µs 12.355 µs] +Found 13 outliers among 100 measurements (13.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 9 (9.00%) high severe + +cmp_f32_gt assembly (aarch64 unix) + time: [8.2593 µs 8.3185 µs 8.3813 µs] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +cmp_f32_unord compiler-builtins + time: [11.977 µs 12.042 µs 12.109 µs] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) low severe + 6 (6.00%) low mild + 2 (2.00%) high mild + +cmp_f32_unord system time: [8.1236 µs 8.1736 µs 8.2350 µs] +Found 18 outliers among 100 measurements (18.00%) + 5 (5.00%) high mild + 13 (13.00%) high severe + +cmp_f32_unord assembly (aarch64 unix) + time: [8.1446 µs 8.2080 µs 8.2762 µs] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +cmp_f64_gt compiler-builtins + time: [16.073 µs 16.077 µs 16.082 µs] +Found 17 outliers among 100 measurements (17.00%) + 2 (2.00%) low mild + 4 (4.00%) high mild + 11 (11.00%) high severe + +cmp_f64_gt system time: [12.456 µs 12.487 µs 12.522 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +cmp_f64_gt assembly (aarch64 unix) + time: [8.0557 µs 8.0616 µs 8.0685 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +cmp_f64_unord compiler-builtins + time: [10.715 µs 10.724 µs 10.737 µs] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +cmp_f64_unord system time: [8.0692 µs 8.0734 µs 8.0784 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +cmp_f64_unord assembly (aarch64 unix) + time: [8.0569 µs 8.0677 µs 8.0818 µs] +Found 18 outliers among 100 measurements (18.00%) + 4 (4.00%) high mild + 14 (14.00%) high severe + +cmp_f128_gt compiler-builtins + time: [18.234 µs 18.401 µs 18.602 µs] + +cmp_f128_unord compiler-builtins + time: [13.410 µs 13.471 µs 13.542 µs] +Found 7 outliers among 100 measurements (7.00%) + 7 (7.00%) high mild + +conv_u32_f32 compiler-builtins + time: [774.58 ns 776.01 ns 777.59 ns] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) high mild + 7 (7.00%) high severe + +conv_u32_f32 system time: [622.68 ns 625.64 ns 629.26 ns] +Found 16 outliers among 100 measurements (16.00%) + 7 (7.00%) high mild + 9 (9.00%) high severe + +conv_u32_f32 assembly (aarch64 unix) + time: [468.05 ns 469.76 ns 471.46 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_u32_f64 compiler-builtins + time: [617.61 ns 618.00 ns 618.52 ns] +Found 13 outliers among 100 measurements (13.00%) + 4 (4.00%) high mild + 9 (9.00%) high severe + +conv_u32_f64 system time: [469.56 ns 471.03 ns 472.81 ns] +Found 11 outliers among 100 measurements (11.00%) + 7 (7.00%) high mild + 4 (4.00%) high severe + +conv_u32_f64 assembly (aarch64 unix) + time: [464.43 ns 465.01 ns 465.72 ns] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) high mild + 8 (8.00%) high severe + +conv_u64_f32 compiler-builtins + time: [847.95 ns 848.19 ns 848.46 ns] +Found 19 outliers among 100 measurements (19.00%) + 3 (3.00%) low mild + 9 (9.00%) high mild + 7 (7.00%) high severe + +conv_u64_f32 system time: [701.68 ns 701.95 ns 702.30 ns] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +conv_u64_f32 assembly (aarch64 unix) + time: [511.73 ns 512.43 ns 513.32 ns] +Found 6 outliers among 100 measurements (6.00%) + 6 (6.00%) high mild + +conv_u64_f64 compiler-builtins + time: [681.23 ns 682.55 ns 684.30 ns] +Found 18 outliers among 100 measurements (18.00%) + 1 (1.00%) high mild + 17 (17.00%) high severe + +conv_u64_f64 system time: [679.34 ns 679.57 ns 679.88 ns] +Found 18 outliers among 100 measurements (18.00%) + 1 (1.00%) low mild + 6 (6.00%) high mild + 11 (11.00%) high severe + +conv_u64_f64 assembly (aarch64 unix) + time: [509.90 ns 510.09 ns 510.30 ns] +Found 15 outliers among 100 measurements (15.00%) + 6 (6.00%) high mild + 9 (9.00%) high severe + +conv_u128_f32 compiler-builtins + time: [1.1368 µs 1.1372 µs 1.1377 µs] +Found 14 outliers among 100 measurements (14.00%) + 8 (8.00%) high mild + 6 (6.00%) high severe + +conv_u128_f32 system time: [1.4338 µs 1.4370 µs 1.4410 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +conv_u128_f64 compiler-builtins + time: [1.0133 µs 1.0143 µs 1.0156 µs] +Found 16 outliers among 100 measurements (16.00%) + 2 (2.00%) high mild + 14 (14.00%) high severe + +conv_u128_f64 system time: [1.3473 µs 1.3530 µs 1.3600 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_i32_f32 compiler-builtins + time: [906.53 ns 907.86 ns 909.23 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +conv_i32_f32 system time: [914.53 ns 915.69 ns 917.01 ns] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe + +conv_i32_f32 assembly (aarch64 unix) + time: [464.55 ns 465.10 ns 465.83 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_i32_f64 compiler-builtins + time: [617.63 ns 617.92 ns 618.27 ns] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +conv_i32_f64 system time: [622.83 ns 624.19 ns 625.61 ns] +Found 6 outliers among 100 measurements (6.00%) + 5 (5.00%) high mild + 1 (1.00%) high severe + +conv_i32_f64 assembly (aarch64 unix) + time: [465.24 ns 466.04 ns 466.95 ns] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +conv_i64_f32 compiler-builtins + time: [852.67 ns 853.92 ns 855.34 ns] +Found 11 outliers among 100 measurements (11.00%) + 3 (3.00%) high mild + 8 (8.00%) high severe + +conv_i64_f32 system time: [906.94 ns 908.04 ns 909.33 ns] +Found 15 outliers among 100 measurements (15.00%) + 2 (2.00%) high mild + 13 (13.00%) high severe + +conv_i64_f32 assembly (aarch64 unix) + time: [510.84 ns 511.27 ns 511.80 ns] +Found 8 outliers among 100 measurements (8.00%) + 3 (3.00%) high mild + 5 (5.00%) high severe + +conv_i64_f64 compiler-builtins + time: [932.35 ns 932.97 ns 933.76 ns] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +conv_i64_f64 system time: [955.91 ns 958.95 ns 962.05 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_i64_f64 assembly (aarch64 unix) + time: [510.19 ns 510.72 ns 511.44 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_i128_f32 compiler-builtins + time: [1.4248 µs 1.4285 µs 1.4323 µs] +Found 12 outliers among 100 measurements (12.00%) + 7 (7.00%) high mild + 5 (5.00%) high severe + +conv_i128_f32 system time: [1.6970 µs 1.7017 µs 1.7069 µs] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_i128_f64 compiler-builtins + time: [1.3132 µs 1.3161 µs 1.3191 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_i128_f64 system time: [1.6071 µs 1.6100 µs 1.6133 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u32 compiler-builtins + time: [640.35 ns 641.00 ns 641.68 ns] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32 system time: [640.87 ns 641.63 ns 642.42 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32 assembly (aarch64 unix) + time: [482.02 ns 482.67 ns 483.38 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_f64_u64 compiler-builtins + time: [638.58 ns 638.98 ns 639.45 ns] +Found 15 outliers among 100 measurements (15.00%) + 1 (1.00%) high mild + 14 (14.00%) high severe + +conv_f64_u64 system time: [642.54 ns 644.07 ns 645.59 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u64 assembly (aarch64 unix) + time: [482.65 ns 483.70 ns 484.87 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +conv_f64_u128 compiler-builtins + time: [1.0631 µs 1.0652 µs 1.0674 µs] +Found 8 outliers among 100 measurements (8.00%) + 7 (7.00%) high mild + 1 (1.00%) high severe + +conv_f64_u128 system time: [821.41 ns 823.45 ns 825.74 ns] +Found 11 outliers among 100 measurements (11.00%) + 8 (8.00%) high mild + 3 (3.00%) high severe + +conv_f64_i32 compiler-builtins + time: [826.76 ns 845.08 ns 870.23 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_f64_i32 system time: [764.12 ns 764.63 ns 765.26 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe + +conv_f64_i32 assembly (aarch64 unix) + time: [484.50 ns 485.98 ns 487.54 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f64_i64 compiler-builtins + time: [797.27 ns 798.19 ns 799.84 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_f64_i64 system time: [768.74 ns 769.52 ns 770.23 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_f64_i64 assembly (aarch64 unix) + time: [480.59 ns 481.03 ns 481.46 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128 compiler-builtins + time: [1.0577 µs 1.0591 µs 1.0606 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128 system time: [1.0181 µs 1.0195 µs 1.0211 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +conv_f32_u32 compiler-builtins + time: [800.40 ns 801.39 ns 802.35 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +conv_f32_u32 system time: [638.12 ns 638.34 ns 638.63 ns] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +conv_f32_u32 assembly (aarch64 unix) + time: [479.37 ns 480.97 ns 483.32 ns] +Found 13 outliers among 100 measurements (13.00%) + 6 (6.00%) high mild + 7 (7.00%) high severe + +conv_f32_u64 compiler-builtins + time: [801.95 ns 803.64 ns 805.75 ns] + +conv_f32_u64 system time: [638.20 ns 638.56 ns 639.07 ns] +Found 10 outliers among 100 measurements (10.00%) + 1 (1.00%) high mild + 9 (9.00%) high severe + +conv_f32_u64 assembly (aarch64 unix) + time: [480.07 ns 480.47 ns 480.86 ns] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_f32_u128 compiler-builtins + time: [1.1579 µs 1.1623 µs 1.1657 µs] +Found 14 outliers among 100 measurements (14.00%) + 2 (2.00%) low severe + 7 (7.00%) high mild + 5 (5.00%) high severe + +conv_f32_u128 system time: [1.0344 µs 1.0394 µs 1.0450 µs] + +conv_f32_i32 compiler-builtins + time: [800.14 ns 801.52 ns 803.26 ns] +Found 10 outliers among 100 measurements (10.00%) + 8 (8.00%) high mild + 2 (2.00%) high severe + +conv_f32_i32 system time: [741.36 ns 741.74 ns 742.13 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_f32_i32 assembly (aarch64 unix) + time: [484.35 ns 486.08 ns 488.11 ns] +Found 17 outliers among 100 measurements (17.00%) + 9 (9.00%) high mild + 8 (8.00%) high severe + +conv_f32_i64 compiler-builtins + time: [800.94 ns 802.68 ns 804.74 ns] + +conv_f32_i64 system time: [748.60 ns 750.68 ns 753.16 ns] +Found 9 outliers among 100 measurements (9.00%) + 4 (4.00%) high mild + 5 (5.00%) high severe + +conv_f32_i64 assembly (aarch64 unix) + time: [480.70 ns 481.23 ns 481.82 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_f32_i128 compiler-builtins + time: [1.1774 µs 1.1829 µs 1.1887 µs] +Found 11 outliers among 100 measurements (11.00%) + 1 (1.00%) low severe + 7 (7.00%) low mild + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f32_i128 system time: [1.1785 µs 1.1853 µs 1.1941 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +div_f32 compiler-builtins + time: [38.852 µs 39.011 µs 39.178 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +div_f32 system time: [41.846 µs 41.920 µs 42.005 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +div_f32 assembly (aarch64 unix) + time: [8.1309 µs 8.1627 µs 8.2005 µs] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +div_f64 compiler-builtins + time: [50.369 µs 50.605 µs 50.857 µs] +Found 15 outliers among 100 measurements (15.00%) + 11 (11.00%) high mild + 4 (4.00%) high severe + +div_f64 system time: [53.506 µs 53.582 µs 53.676 µs] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) high mild + 4 (4.00%) high severe + +div_f64 assembly (aarch64 unix) + time: [8.0695 µs 8.0807 µs 8.0948 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + diff --git a/testcrate/bench-3cee6376-aarch64-macos.txt b/testcrate/bench-3cee6376-aarch64-macos.txt new file mode 100644 index 000000000..131e7a85a --- /dev/null +++ b/testcrate/bench-3cee6376-aarch64-macos.txt @@ -0,0 +1,699 @@ + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + +add_f32/compiler-builtins + time: [36.813 µs 37.048 µs 37.303 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +add_f32/system time: [39.103 µs 39.142 µs 39.189 µs] +Found 8 outliers among 100 measurements (8.00%) + 2 (2.00%) high mild + 6 (6.00%) high severe +add_f32/assembly (aarch64 unix) + time: [8.3786 µs 8.4680 µs 8.5570 µs] + +add_f64/compiler-builtins + time: [35.784 µs 35.819 µs 35.863 µs] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) high mild + 3 (3.00%) high severe +add_f64/system time: [39.634 µs 39.689 µs 39.746 µs] +Found 16 outliers among 100 measurements (16.00%) + 4 (4.00%) high mild + 12 (12.00%) high severe +add_f64/assembly (aarch64 unix) + time: [8.0533 µs 8.0599 µs 8.0670 µs] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +add_f128/compiler-builtins + time: [41.830 µs 41.920 µs 42.005 µs] + +cmp_f32_gt/compiler-builtins + time: [13.405 µs 13.411 µs 13.418 µs] +Found 18 outliers among 100 measurements (18.00%) + 4 (4.00%) high mild + 14 (14.00%) high severe +cmp_f32_gt/system time: [12.348 µs 12.355 µs 12.363 µs] +Found 12 outliers among 100 measurements (12.00%) + 2 (2.00%) high mild + 10 (10.00%) high severe +cmp_f32_gt/assembly (aarch64 unix) + time: [8.1233 µs 8.1625 µs 8.2072 µs] +Found 12 outliers among 100 measurements (12.00%) + 7 (7.00%) high mild + 5 (5.00%) high severe + +cmp_f32_unord/compiler-builtins + time: [11.349 µs 11.467 µs 11.584 µs] +cmp_f32_unord/system time: [8.0714 µs 8.0792 µs 8.0890 µs] +Found 16 outliers among 100 measurements (16.00%) + 4 (4.00%) high mild + 12 (12.00%) high severe +cmp_f32_unord/assembly (aarch64 unix) + time: [8.1121 µs 8.1705 µs 8.2325 µs] +Found 20 outliers among 100 measurements (20.00%) + 3 (3.00%) high mild + 17 (17.00%) high severe + +cmp_f64_gt/compiler-builtins + time: [13.749 µs 13.837 µs 13.934 µs] +Found 20 outliers among 100 measurements (20.00%) + 9 (9.00%) low mild + 7 (7.00%) high mild + 4 (4.00%) high severe +cmp_f64_gt/system time: [12.475 µs 12.515 µs 12.565 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild +cmp_f64_gt/assembly (aarch64 unix) + time: [8.0456 µs 8.0540 µs 8.0653 µs] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +cmp_f64_unord/compiler-builtins + time: [10.723 µs 10.730 µs 10.739 µs] +Found 15 outliers among 100 measurements (15.00%) + 5 (5.00%) high mild + 10 (10.00%) high severe +cmp_f64_unord/system time: [8.0944 µs 8.1296 µs 8.1683 µs] +Found 17 outliers among 100 measurements (17.00%) + 4 (4.00%) high mild + 13 (13.00%) high severe +cmp_f64_unord/assembly (aarch64 unix) + time: [8.1042 µs 8.1337 µs 8.1662 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +cmp_f128_gt/compiler-builtins + time: [20.508 µs 20.558 µs 20.615 µs] +Found 8 outliers among 100 measurements (8.00%) + 2 (2.00%) high mild + 6 (6.00%) high severe + +cmp_f128_unord/compiler-builtins + time: [13.332 µs 13.346 µs 13.360 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_u32_f32/compiler-builtins + time: [621.20 ns 621.89 ns 622.65 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe +conv_u32_f32/system time: [621.44 ns 622.08 ns 622.74 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u32_f32/assembly (aarch64 unix) + time: [465.96 ns 466.65 ns 467.45 ns] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +conv_u32_f64/compiler-builtins + time: [619.71 ns 620.51 ns 621.52 ns] +Found 5 outliers among 100 measurements (5.00%) + 4 (4.00%) high mild + 1 (1.00%) high severe +conv_u32_f64/system time: [466.60 ns 467.14 ns 467.77 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild +conv_u32_f64/assembly (aarch64 unix) + time: [464.02 ns 464.32 ns 464.69 ns] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_u64_f32/compiler-builtins + time: [851.24 ns 852.98 ns 854.77 ns] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +conv_u64_f32/system time: [724.35 ns 729.43 ns 735.07 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild +conv_u64_f32/assembly (aarch64 unix) + time: [513.30 ns 514.64 ns 516.16 ns] +Found 8 outliers among 100 measurements (8.00%) + 8 (8.00%) high mild + +conv_u64_f64/compiler-builtins + time: [850.72 ns 853.26 ns 856.54 ns] +Found 15 outliers among 100 measurements (15.00%) + 2 (2.00%) high mild + 13 (13.00%) high severe +conv_u64_f64/system time: [681.43 ns 682.54 ns 683.79 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u64_f64/assembly (aarch64 unix) + time: [511.37 ns 511.71 ns 512.02 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_u128_f32/compiler-builtins + time: [1.1395 µs 1.1409 µs 1.1424 µs] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe +conv_u128_f32/system time: [1.4348 µs 1.4369 µs 1.4390 µs] +Found 5 outliers among 100 measurements (5.00%) + 4 (4.00%) high mild + 1 (1.00%) high severe + +conv_u128_f64/compiler-builtins + time: [1.0148 µs 1.0157 µs 1.0167 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u128_f64/system time: [1.3404 µs 1.3423 µs 1.3442 µs] +Found 8 outliers among 100 measurements (8.00%) + 7 (7.00%) high mild + 1 (1.00%) high severe + +conv_i32_f32/compiler-builtins + time: [902.89 ns 903.81 ns 904.84 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe +conv_i32_f32/system time: [942.62 ns 949.04 ns 955.77 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_i32_f32/assembly (aarch64 unix) + time: [466.06 ns 466.60 ns 467.27 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_i32_f64/compiler-builtins + time: [618.98 ns 619.24 ns 619.55 ns] +Found 17 outliers among 100 measurements (17.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 13 (13.00%) high severe +conv_i32_f64/system time: [622.18 ns 623.41 ns 624.85 ns] +Found 8 outliers among 100 measurements (8.00%) + 5 (5.00%) high mild + 3 (3.00%) high severe +conv_i32_f64/assembly (aarch64 unix) + time: [466.26 ns 466.76 ns 467.35 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_i64_f32/compiler-builtins + time: [850.11 ns 850.45 ns 850.88 ns] +Found 15 outliers among 100 measurements (15.00%) + 1 (1.00%) low severe + 1 (1.00%) low mild + 3 (3.00%) high mild + 10 (10.00%) high severe +conv_i64_f32/system time: [908.36 ns 908.70 ns 909.10 ns] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe +conv_i64_f32/assembly (aarch64 unix) + time: [513.56 ns 514.44 ns 515.38 ns] +Found 8 outliers among 100 measurements (8.00%) + 8 (8.00%) high mild + +conv_i64_f64/compiler-builtins + time: [935.39 ns 935.78 ns 936.26 ns] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) high mild + 8 (8.00%) high severe +conv_i64_f64/system time: [946.56 ns 947.33 ns 948.20 ns] +Found 8 outliers among 100 measurements (8.00%) + 6 (6.00%) high mild + 2 (2.00%) high severe +conv_i64_f64/assembly (aarch64 unix) + time: [511.55 ns 512.03 ns 512.56 ns] +Found 21 outliers among 100 measurements (21.00%) + 4 (4.00%) high mild + 17 (17.00%) high severe + +conv_i128_f32/compiler-builtins + time: [1.4206 µs 1.4218 µs 1.4232 µs] +Found 10 outliers among 100 measurements (10.00%) + 5 (5.00%) high mild + 5 (5.00%) high severe +conv_i128_f32/system time: [1.6863 µs 1.6891 µs 1.6922 µs] +Found 10 outliers among 100 measurements (10.00%) + 9 (9.00%) high mild + 1 (1.00%) high severe + +conv_i128_f64/compiler-builtins + time: [1.3110 µs 1.3122 µs 1.3136 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +conv_i128_f64/system time: [1.6022 µs 1.6048 µs 1.6090 µs] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32/compiler-builtins + time: [798.65 ns 799.42 ns 800.39 ns] +Found 15 outliers among 100 measurements (15.00%) + 6 (6.00%) high mild + 9 (9.00%) high severe +conv_f64_u32/system time: [639.48 ns 639.88 ns 640.40 ns] +Found 16 outliers among 100 measurements (16.00%) + 1 (1.00%) low mild + 5 (5.00%) high mild + 10 (10.00%) high severe +conv_f64_u32/assembly (aarch64 unix) + time: [480.78 ns 481.35 ns 482.17 ns] +Found 7 outliers among 100 measurements (7.00%) + 5 (5.00%) high mild + 2 (2.00%) high severe + +conv_f64_u64/compiler-builtins + time: [799.56 ns 800.54 ns 801.89 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +conv_f64_u64/system time: [640.72 ns 641.24 ns 641.81 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe +conv_f64_u64/assembly (aarch64 unix) + time: [481.54 ns 482.48 ns 483.53 ns] +Found 6 outliers among 100 measurements (6.00%) + 1 (1.00%) low severe + 1 (1.00%) low mild + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u128/compiler-builtins + time: [1.0510 µs 1.0515 µs 1.0520 µs] +Found 13 outliers among 100 measurements (13.00%) + 1 (1.00%) low mild + 2 (2.00%) high mild + 10 (10.00%) high severe +conv_f64_u128/system time: [818.45 ns 819.23 ns 820.15 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +conv_f64_i32/compiler-builtins + time: [800.56 ns 801.31 ns 802.21 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe +conv_f64_i32/system time: [765.62 ns 766.15 ns 766.80 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +conv_f64_i32/assembly (aarch64 unix) + time: [471.65 ns 472.77 ns 473.89 ns] +Found 10 outliers among 100 measurements (10.00%) + 1 (1.00%) low mild + 8 (8.00%) high mild + 1 (1.00%) high severe + +conv_f64_i64/compiler-builtins + time: [801.00 ns 804.55 ns 808.72 ns] +Found 18 outliers among 100 measurements (18.00%) + 6 (6.00%) high mild + 12 (12.00%) high severe +conv_f64_i64/system time: [770.28 ns 772.47 ns 775.21 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild +conv_f64_i64/assembly (aarch64 unix) + time: [491.56 ns 494.96 ns 499.19 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128/compiler-builtins + time: [1.0637 µs 1.0704 µs 1.0762 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +conv_f64_i128/system time: [1.0022 µs 1.0027 µs 1.0033 µs] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) low severe + 3 (3.00%) high severe + +conv_f32_u32/compiler-builtins + time: [644.56 ns 647.01 ns 649.95 ns] +Found 15 outliers among 100 measurements (15.00%) + 13 (13.00%) high mild + 2 (2.00%) high severe +conv_f32_u32/system time: [648.12 ns 651.20 ns 654.54 ns] +Found 9 outliers among 100 measurements (9.00%) + 7 (7.00%) high mild + 2 (2.00%) high severe +conv_f32_u32/assembly (aarch64 unix) + time: [481.02 ns 482.71 ns 484.60 ns] +Found 12 outliers among 100 measurements (12.00%) + 1 (1.00%) low mild + 10 (10.00%) high mild + 1 (1.00%) high severe + +conv_f32_u64/compiler-builtins + time: [644.14 ns 646.61 ns 649.53 ns] +Found 11 outliers among 100 measurements (11.00%) + 6 (6.00%) high mild + 5 (5.00%) high severe +conv_f32_u64/system time: [646.21 ns 650.17 ns 654.55 ns] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild +conv_f32_u64/assembly (aarch64 unix) + time: [473.36 ns 474.60 ns 476.00 ns] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) low mild + 5 (5.00%) high mild + 2 (2.00%) high severe + +conv_f32_u128/compiler-builtins + time: [1.0820 µs 1.0828 µs 1.0839 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe +conv_f32_u128/system time: [1.0003 µs 1.0042 µs 1.0076 µs] +Found 21 outliers among 100 measurements (21.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 17 (17.00%) high severe + +conv_f32_i32/compiler-builtins + time: [801.13 ns 801.82 ns 802.53 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe +conv_f32_i32/system time: [745.17 ns 745.97 ns 746.78 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe +conv_f32_i32/assembly (aarch64 unix) + time: [469.87 ns 470.65 ns 471.57 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +conv_f32_i64/compiler-builtins + time: [799.44 ns 799.94 ns 800.59 ns] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) high mild + 3 (3.00%) high severe +conv_f32_i64/system time: [744.81 ns 745.17 ns 745.62 ns] +Found 14 outliers among 100 measurements (14.00%) + 5 (5.00%) high mild + 9 (9.00%) high severe +conv_f32_i64/assembly (aarch64 unix) + time: [465.06 ns 466.01 ns 467.12 ns] +Found 13 outliers among 100 measurements (13.00%) + 2 (2.00%) low severe + 5 (5.00%) high mild + 6 (6.00%) high severe + +conv_f32_i128/compiler-builtins + time: [1.1390 µs 1.1515 µs 1.1637 µs] +conv_f32_i128/system time: [1.1315 µs 1.1330 µs 1.1347 µs] +Found 6 outliers among 100 measurements (6.00%) + 3 (3.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe + +div_f32/compiler-builtins + time: [39.408 µs 39.676 µs 39.969 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +div_f32/system time: [42.108 µs 42.248 µs 42.528 µs] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe +div_f32/assembly (aarch64 unix) + time: [8.0724 µs 8.0794 µs 8.0870 µs] +Found 7 outliers among 100 measurements (7.00%) + 5 (5.00%) high mild + 2 (2.00%) high severe + +div_f64/compiler-builtins + time: [49.992 µs 50.014 µs 50.040 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high severe +div_f64/system time: [53.577 µs 53.651 µs 53.743 µs] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe +div_f64/assembly (aarch64 unix) + time: [8.0976 µs 8.1064 µs 8.1158 µs] +Found 6 outliers among 100 measurements (6.00%) + 3 (3.00%) high mild + 3 (3.00%) high severe + +extend_f16_f32/compiler-builtins + time: [804.09 ns 805.38 ns 807.09 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe +extend_f16_f32/system time: [641.07 ns 641.76 ns 642.60 ns] +Found 12 outliers among 100 measurements (12.00%) + 6 (6.00%) high mild + 6 (6.00%) high severe +extend_f16_f32/assembly (aarch64 unix) + time: [456.69 ns 457.14 ns 457.68 ns] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) low mild + 2 (2.00%) high mild + 2 (2.00%) high severe + +extend_f16_f128/compiler-builtins + time: [1.1025 µs 1.1035 µs 1.1045 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +extend_f32_f64/compiler-builtins + time: [799.30 ns 799.68 ns 800.16 ns] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe +extend_f32_f64/system time: [992.48 ns 993.27 ns 994.32 ns] +Found 15 outliers among 100 measurements (15.00%) + 3 (3.00%) high mild + 12 (12.00%) high severe +extend_f32_f64/assembly (aarch64 unix) + time: [457.65 ns 460.39 ns 463.78 ns] + +extend_f32_f128/compiler-builtins + time: [1.0295 µs 1.0311 µs 1.0327 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) low mild + 1 (1.00%) high mild + +extend_f64_f128/compiler-builtins + time: [1.0400 µs 1.0412 µs 1.0426 µs] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +mul_f32/compiler-builtins + time: [25.604 µs 25.705 µs 25.818 µs] +Found 23 outliers among 100 measurements (23.00%) + 17 (17.00%) low severe + 3 (3.00%) high mild + 3 (3.00%) high severe +mul_f32/system time: [29.914 µs 29.977 µs 30.043 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +mul_f32/assembly (aarch64 unix) + time: [8.1384 µs 8.1964 µs 8.2603 µs] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +mul_f64/compiler-builtins + time: [25.596 µs 25.615 µs 25.637 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +mul_f64/system time: [30.931 µs 30.963 µs 31.002 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild +mul_f64/assembly (aarch64 unix) + time: [8.0589 µs 8.0638 µs 8.0695 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +mul_f128/compiler-builtins + time: [54.242 µs 54.306 µs 54.374 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +powi_f32/compiler-builtins + time: [129.91 µs 130.09 µs 130.24 µs] +powi_f32/system time: [126.97 µs 127.34 µs 127.82 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +powi_f64/compiler-builtins + time: [130.08 µs 130.81 µs 131.46 µs] +Found 13 outliers among 100 measurements (13.00%) + 13 (13.00%) high mild +powi_f64/system time: [128.51 µs 128.68 µs 128.88 µs] +Found 21 outliers among 100 measurements (21.00%) + 4 (4.00%) high mild + 17 (17.00%) high severe + +sub_f32/compiler-builtins + time: [37.861 µs 38.012 µs 38.158 µs] +Found 26 outliers among 100 measurements (26.00%) + 18 (18.00%) low mild + 7 (7.00%) high mild + 1 (1.00%) high severe +sub_f32/system time: [39.586 µs 39.628 µs 39.673 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe +sub_f32/assembly (aarch64 unix) + time: [8.0976 µs 8.1584 µs 8.2208 µs] +Found 6 outliers among 100 measurements (6.00%) + 6 (6.00%) high mild + +sub_f64/compiler-builtins + time: [37.755 µs 37.838 µs 37.921 µs] +Found 25 outliers among 100 measurements (25.00%) + 7 (7.00%) low severe + 3 (3.00%) low mild + 4 (4.00%) high mild + 11 (11.00%) high severe +sub_f64/system time: [39.979 µs 40.019 µs 40.064 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +sub_f64/assembly (aarch64 unix) + time: [8.0669 µs 8.0733 µs 8.0801 µs] +Found 7 outliers among 100 measurements (7.00%) + 3 (3.00%) high mild + 4 (4.00%) high severe + +sub_f128/compiler-builtins + time: [68.618 µs 68.899 µs 69.293 µs] +Found 11 outliers among 100 measurements (11.00%) + 2 (2.00%) high mild + 9 (9.00%) high severe + +trunc_f32_f16/compiler-builtins + time: [1.3343 µs 1.3468 µs 1.3608 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe +trunc_f32_f16/system time: [1.2687 µs 1.2714 µs 1.2738 µs] +trunc_f32_f16/assembly (aarch64 unix) + time: [470.06 ns 472.96 ns 475.30 ns] + +trunc_f64_f16/compiler-builtins + time: [1.2729 µs 1.2738 µs 1.2749 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe +trunc_f64_f16/assembly (aarch64 unix) + time: [455.91 ns 456.61 ns 457.33 ns] +Found 12 outliers among 100 measurements (12.00%) + 1 (1.00%) low severe + 2 (2.00%) low mild + 6 (6.00%) high mild + 3 (3.00%) high severe + +trunc_f64_f32/compiler-builtins + time: [1.2240 µs 1.2325 µs 1.2410 µs] +Found 17 outliers among 100 measurements (17.00%) + 4 (4.00%) low mild + 2 (2.00%) high mild + 11 (11.00%) high severe +trunc_f64_f32/system time: [1.2784 µs 1.2835 µs 1.2884 µs] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) low severe + 1 (1.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe +trunc_f64_f32/assembly (aarch64 unix) + time: [455.64 ns 456.08 ns 456.58 ns] +Found 18 outliers among 100 measurements (18.00%) + 3 (3.00%) low severe + 4 (4.00%) low mild + 8 (8.00%) high mild + 3 (3.00%) high severe + +trunc_f128_f16/compiler-builtins + time: [1.2563 µs 1.2666 µs 1.2776 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +trunc_f128_f32/compiler-builtins + time: [1.2459 µs 1.2482 µs 1.2507 µs] +Found 6 outliers among 100 measurements (6.00%) + 2 (2.00%) low mild + 2 (2.00%) high mild + 2 (2.00%) high severe + +trunc_f128_f64/compiler-builtins + time: [1.2821 µs 1.3047 µs 1.3452 µs] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) low severe + 1 (1.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe + + +running 52 tests +test memcmp_builtin_1048576 ... bench: 20,975.52 ns/iter (+/- 239.69) = 49991 MB/s +test memcmp_builtin_16 ... bench: 1.60 ns/iter (+/- 0.05) = 16000 MB/s +test memcmp_builtin_32 ... bench: 1.61 ns/iter (+/- 0.03) = 32000 MB/s +test memcmp_builtin_4096 ... bench: 95.84 ns/iter (+/- 2.82) = 43115 MB/s +test memcmp_builtin_64 ... bench: 2.39 ns/iter (+/- 0.09) = 32000 MB/s +test memcmp_builtin_8 ... bench: 1.60 ns/iter (+/- 0.04) = 8000 MB/s +test memcmp_builtin_unaligned_1048575 ... bench: 22,060.00 ns/iter (+/- 873.55) = 47532 MB/s +test memcmp_builtin_unaligned_15 ... bench: 3.19 ns/iter (+/- 0.02) = 5333 MB/s +test memcmp_builtin_unaligned_31 ... bench: 1.61 ns/iter (+/- 0.01) = 32000 MB/s +test memcmp_builtin_unaligned_4095 ... bench: 96.63 ns/iter (+/- 4.58) = 42666 MB/s +test memcmp_builtin_unaligned_63 ... bench: 2.40 ns/iter (+/- 0.11) = 32000 MB/s +test memcmp_builtin_unaligned_7 ... bench: 3.37 ns/iter (+/- 0.05) = 2666 MB/s +test memcmp_rust_1048576 ... bench: 309,647.23 ns/iter (+/- 6,077.35) = 3386 MB/s +test memcmp_rust_16 ... bench: 5.66 ns/iter (+/- 0.30) = 3200 MB/s +test memcmp_rust_32 ... bench: 10.47 ns/iter (+/- 0.14) = 3200 MB/s +test memcmp_rust_4096 ... bench: 1,124.34 ns/iter (+/- 36.92) = 3644 MB/s +test memcmp_rust_64 ... bench: 19.90 ns/iter (+/- 0.36) = 3368 MB/s +test memcmp_rust_8 ... bench: 3.46 ns/iter (+/- 0.11) = 2666 MB/s +test memcmp_rust_unaligned_1048575 ... bench: 308,613.87 ns/iter (+/- 6,613.18) = 3397 MB/s +test memcmp_rust_unaligned_15 ... bench: 5.35 ns/iter (+/- 0.05) = 3200 MB/s +test memcmp_rust_unaligned_31 ... bench: 9.94 ns/iter (+/- 0.06) = 3555 MB/s +test memcmp_rust_unaligned_4095 ... bench: 1,120.06 ns/iter (+/- 5.03) = 3657 MB/s +test memcmp_rust_unaligned_63 ... bench: 19.64 ns/iter (+/- 0.82) = 3368 MB/s +test memcmp_rust_unaligned_7 ... bench: 3.22 ns/iter (+/- 0.10) = 2666 MB/s +test memcpy_builtin_1048576 ... bench: 12,538.05 ns/iter (+/- 354.79) = 83631 MB/s +test memcpy_builtin_1048576_misalign ... bench: 30,092.56 ns/iter (+/- 8,064.04) = 34845 MB/s +test memcpy_builtin_1048576_offset ... bench: 12,538.36 ns/iter (+/- 359.04) = 83631 MB/s +test memcpy_builtin_4096 ... bench: 44.24 ns/iter (+/- 6.80) = 93090 MB/s +test memcpy_builtin_4096_misalign ... bench: 45.34 ns/iter (+/- 2.13) = 91022 MB/s +test memcpy_builtin_4096_offset ... bench: 44.71 ns/iter (+/- 0.61) = 93090 MB/s +test memcpy_rust_1048576 ... bench: 17,943.33 ns/iter (+/- 243.18) = 58439 MB/s +test memcpy_rust_1048576_misalign ... bench: 15,004.68 ns/iter (+/- 3,978.65) = 69886 MB/s +test memcpy_rust_1048576_offset ... bench: 14,722.06 ns/iter (+/- 479.54) = 71225 MB/s +test memcpy_rust_4096 ... bench: 44.91 ns/iter (+/- 4.62) = 93090 MB/s +test memcpy_rust_4096_misalign ... bench: 76.21 ns/iter (+/- 8.21) = 53894 MB/s +test memcpy_rust_4096_offset ... bench: 76.27 ns/iter (+/- 4.69) = 53894 MB/s +test memmove_builtin_1048576 ... bench: 18,644.50 ns/iter (+/- 379.84) = 56242 MB/s +test memmove_builtin_1048576_misalign ... bench: 18,947.70 ns/iter (+/- 1,226.26) = 55342 MB/s +test memmove_builtin_4096 ... bench: 44.21 ns/iter (+/- 0.79) = 93090 MB/s +test memmove_builtin_4096_misalign ... bench: 47.21 ns/iter (+/- 3.12) = 87148 MB/s +test memmove_rust_1048576 ... bench: 34,813.33 ns/iter (+/- 3,637.47) = 30120 MB/s +test memmove_rust_1048576_misalign ... bench: 35,067.19 ns/iter (+/- 1,699.63) = 29902 MB/s +test memmove_rust_4096 ... bench: 148.69 ns/iter (+/- 1.31) = 27675 MB/s +test memmove_rust_4096_misalign ... bench: 153.81 ns/iter (+/- 1.71) = 26771 MB/s +test memset_builtin_1048576 ... bench: 15,704.12 ns/iter (+/- 12,113.86) = 66771 MB/s +test memset_builtin_1048576_offset ... bench: 17,894.23 ns/iter (+/- 175.12) = 58599 MB/s +test memset_builtin_4096 ... bench: 39.95 ns/iter (+/- 0.19) = 105025 MB/s +test memset_builtin_4096_offset ... bench: 40.48 ns/iter (+/- 3.11) = 102400 MB/s +test memset_rust_1048576 ... bench: 10,600.66 ns/iter (+/- 1,559.93) = 98922 MB/s +test memset_rust_1048576_offset ... bench: 14,810.85 ns/iter (+/- 575.27) = 70801 MB/s +test memset_rust_4096 ... bench: 37.91 ns/iter (+/- 2.77) = 110702 MB/s +test memset_rust_4096_offset ... bench: 59.99 ns/iter (+/- 10.45) = 69423 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 52 measured; 0 filtered out; finished in 97.74s + diff --git a/testcrate/benches/float_add.rs b/testcrate/benches/float_add.rs new file mode 100644 index 000000000..eef1ecc57 --- /dev/null +++ b/testcrate/benches/float_add.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::add; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: add_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: add::__addsf3, + sys_fn: __addsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: add::__adddf3, + sys_fn: __adddf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: add::__addtf3, + crate_fn_ppc: add::__addkf3, + sys_fn: __addtf3, + sys_fn_ppc: __addkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_add, add_f32, add_f64, add_f128); +criterion_main!(float_add); diff --git a/testcrate/benches/float_cmp.rs b/testcrate/benches/float_cmp.rs new file mode 100644 index 000000000..641eb0ac5 --- /dev/null +++ b/testcrate/benches/float_cmp.rs @@ -0,0 +1,202 @@ +#![feature(f128)] + +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +use compiler_builtins::float::cmp; + +/// `gt` symbols are allowed to return differing results, they just get compared +/// to 0. +fn gt_res_eq(a: i32, b: i32) -> bool { + let a_lt_0 = a <= 0; + let b_lt_0 = b <= 0; + (a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0) +} + +float_bench! { + name: cmp_f32_gt, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__gtsf2, + sys_fn: __gtsf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem,nostack), + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f32_unord, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__unordsf2, + sys_fn: __unordsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_gt, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__gtdf2, + sys_fn: __gtdf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_unord, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__unorddf2, + sys_fn: __unorddf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f128_gt, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__gttf2, + crate_fn_ppc: cmp::__gtkf2, + sys_fn: __gttf2, + sys_fn_ppc: __gtkf2, + sys_available: not(feature = "no-sys-f128"), + output_eq: gt_res_eq, + asm: [] +} + +float_bench! { + name: cmp_f128_unord, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__unordtf2, + crate_fn_ppc: cmp::__unordkf2, + sys_fn: __unordtf2, + sys_fn_ppc: __unordkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!( + float_cmp, + cmp_f32_gt, + cmp_f32_unord, + cmp_f64_gt, + cmp_f64_unord, + cmp_f128_gt, + cmp_f128_unord +); +criterion_main!(float_cmp); diff --git a/testcrate/benches/float_conv.rs b/testcrate/benches/float_conv.rs new file mode 100644 index 000000000..bbd3a0685 --- /dev/null +++ b/testcrate/benches/float_conv.rs @@ -0,0 +1,547 @@ +#![feature(f128)] +#![allow(improper_ctypes)] + +use compiler_builtins::float::conv; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +/* unsigned int -> float */ + +float_bench! { + name: conv_u32_f32, + sig: (a: u32) -> f32, + crate_fn: conv::__floatunsisf, + sys_fn: __floatunsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2ss {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u32_f64, + sig: (a: u32) -> f64, + crate_fn: conv::__floatunsidf, + sys_fn: __floatunsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2sd {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f32, + sig: (a: u64) -> f32, + crate_fn: conv::__floatundisf, + sys_fn: __floatundisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f64, + sig: (a: u64) -> f64, + crate_fn: conv::__floatundidf, + sys_fn: __floatundidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u128_f32, + sig: (a: u128) -> f32, + crate_fn: conv::__floatuntisf, + sys_fn: __floatuntisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_u128_f64, + sig: (a: u128) -> f64, + crate_fn: conv::__floatuntidf, + sys_fn: __floatuntidf, + sys_available: all(), + asm: [] +} + +/* signed int -> float */ + +float_bench! { + name: conv_i32_f32, + sig: (a: i32) -> f32, + crate_fn: conv::__floatsisf, + sys_fn: __floatsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i32_f64, + sig: (a: i32) -> f64, + crate_fn: conv::__floatsidf, + sys_fn: __floatsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f32, + sig: (a: i64) -> f32, + crate_fn: conv::__floatdisf, + sys_fn: __floatdisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f64, + sig: (a: i64) -> f64, + crate_fn: conv::__floatdidf, + sys_fn: __floatdidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i128_f32, + sig: (a: i128) -> f32, + crate_fn: conv::__floattisf, + sys_fn: __floattisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_i128_f64, + sig: (a: i128) -> f64, + crate_fn: conv::__floattidf, + sys_fn: __floattidf, + sys_available: all(), + asm: [] +} + +/* float -> unsigned int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u32, + sig: (a: f32) -> u32, + crate_fn: conv::__fixunssfsi, + sys_fn: __fixunssfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u64, + sig: (a: f32) -> u64, + crate_fn: conv::__fixunssfdi, + sys_fn: __fixunssfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u128, + sig: (a: f32) -> u128, + crate_fn: conv::__fixunssfti, + sys_fn: __fixunssfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_u32, + sig: (a: f64) -> u32, + crate_fn: conv::__fixunsdfsi, + sys_fn: __fixunsdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u64, + sig: (a: f64) -> u64, + crate_fn: conv::__fixunsdfdi, + sys_fn: __fixunsdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u128, + sig: (a: f64) -> u128, + crate_fn: conv::__fixunsdfti, + sys_fn: __fixunsdfti, + sys_available: all(), + asm: [] +} + +/* float -> signed int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i32, + sig: (a: f32) -> i32, + crate_fn: conv::__fixsfsi, + sys_fn: __fixsfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i64, + sig: (a: f32) -> i64, + crate_fn: conv::__fixsfdi, + sys_fn: __fixsfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i128, + sig: (a: f32) -> i128, + crate_fn: conv::__fixsfti, + sys_fn: __fixsfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_i32, + sig: (a: f64) -> i32, + crate_fn: conv::__fixdfsi, + sys_fn: __fixdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i64, + sig: (a: f64) -> i64, + crate_fn: conv::__fixdfdi, + sys_fn: __fixdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i128, + sig: (a: f64) -> i128, + crate_fn: conv::__fixdfti, + sys_fn: __fixdfti, + sys_available: all(), + asm: [] +} + +criterion_group!( + float_conv, + conv_u32_f32, + conv_u32_f64, + conv_u64_f32, + conv_u64_f64, + conv_u128_f32, + conv_u128_f64, + conv_i32_f32, + conv_i32_f64, + conv_i64_f32, + conv_i64_f64, + conv_i128_f32, + conv_i128_f64, + conv_f64_u32, + conv_f64_u64, + conv_f64_u128, + conv_f64_i32, + conv_f64_i64, + conv_f64_i128, +); + +// FIXME: ppc64le has a sporadic overflow panic in the crate functions +// +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +criterion_group!( + float_conv_not_ppc64le, + conv_f32_u32, + conv_f32_u64, + conv_f32_u128, + conv_f32_i32, + conv_f32_i64, + conv_f32_i128, +); + +#[cfg(all(target_arch = "powerpc64", target_endian = "little"))] +criterion_main!(float_conv); + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +criterion_main!(float_conv, float_conv_not_ppc64le); diff --git a/testcrate/benches/float_div.rs b/testcrate/benches/float_div.rs new file mode 100644 index 000000000..e679f8ccc --- /dev/null +++ b/testcrate/benches/float_div.rs @@ -0,0 +1,70 @@ +#![feature(f128)] + +use compiler_builtins::float::div; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: div_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: div::__divsf3, + sys_fn: __divsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: div_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: div::__divdf3, + sys_fn: __divdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +criterion_group!(float_div, div_f32, div_f64); +criterion_main!(float_div); diff --git a/testcrate/benches/float_extend.rs b/testcrate/benches/float_extend.rs new file mode 100644 index 000000000..9bd8009e9 --- /dev/null +++ b/testcrate/benches/float_extend.rs @@ -0,0 +1,93 @@ +#![allow(unused_variables)] // "unused" f16 registers +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::extend; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: extend_f16_f32, + sig: (a: f16) -> f32, + crate_fn: extend::__extendhfsf2, + sys_fn: __extendhfsf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `to_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: f32; + asm!( + "fcvt {ret:s}, {a:h}", + a = in(vreg) a.to_bits(), + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: extend_f16_f128, + sig: (a: f16) -> f128, + crate_fn: extend::__extendhftf2, + crate_fn_ppc: extend::__extendhfkf2, + sys_fn: __extendhftf2, + sys_fn_ppc: __extendhfkf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: extend_f32_f64, + sig: (a: f32) -> f64, + crate_fn: extend::__extendsfdf2, + sys_fn: __extendsfdf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: extend_f32_f128, + sig: (a: f32) -> f128, + crate_fn: extend::__extendsftf2, + crate_fn_ppc: extend::__extendsfkf2, + sys_fn: __extendsftf2, + sys_fn_ppc: __extendsfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: extend_f64_f128, + sig: (a: f64) -> f128, + crate_fn: extend::__extenddftf2, + crate_fn_ppc: extend::__extenddfkf2, + sys_fn: __extenddftf2, + sys_fn_ppc: __extenddfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_extend, + extend_f16_f32, + extend_f16_f128, + extend_f32_f64, + extend_f32_f128, + extend_f64_f128, +); +criterion_main!(float_extend); diff --git a/testcrate/benches/float_mul.rs b/testcrate/benches/float_mul.rs new file mode 100644 index 000000000..efa32b285 --- /dev/null +++ b/testcrate/benches/float_mul.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::mul; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: mul_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: mul::__mulsf3, + sys_fn: __mulsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: mul::__muldf3, + sys_fn: __muldf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: mul::__multf3, + crate_fn_ppc: mul::__mulkf3, + sys_fn: __multf3, + sys_fn_ppc: __mulkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_mul, mul_f32, mul_f64, mul_f128); +criterion_main!(float_mul); diff --git a/testcrate/benches/float_pow.rs b/testcrate/benches/float_pow.rs new file mode 100644 index 000000000..252f74012 --- /dev/null +++ b/testcrate/benches/float_pow.rs @@ -0,0 +1,24 @@ +use compiler_builtins::float::pow; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: powi_f32, + sig: (a: f32, b: i32) -> f32, + crate_fn: pow::__powisf2, + sys_fn: __powisf2, + sys_available: all(), + asm: [], +} + +float_bench! { + name: powi_f64, + sig: (a: f64, b: i32) -> f64, + crate_fn: pow::__powidf2, + sys_fn: __powidf2, + sys_available: all(), + asm: [], +} + +criterion_group!(float_add, powi_f32, powi_f64); +criterion_main!(float_add); diff --git a/testcrate/benches/float_sub.rs b/testcrate/benches/float_sub.rs new file mode 100644 index 000000000..6d87604aa --- /dev/null +++ b/testcrate/benches/float_sub.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::sub; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: sub_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: sub::__subsf3, + sys_fn: __subsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: sub::__subdf3, + sys_fn: __subdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: sub::__subtf3, + crate_fn_ppc: sub::__subkf3, + sys_fn: __subtf3, + sys_fn_ppc: __subkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_sub, sub_f32, sub_f64, sub_f128); +criterion_main!(float_sub); diff --git a/testcrate/benches/float_trunc.rs b/testcrate/benches/float_trunc.rs new file mode 100644 index 000000000..1553dacee --- /dev/null +++ b/testcrate/benches/float_trunc.rs @@ -0,0 +1,127 @@ +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::trunc; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: trunc_f32_f16, + sig: (a: f32) -> f16, + crate_fn: trunc::__truncsfhf2, + sys_fn: __truncsfhf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: u16; + asm!( + "fcvt {ret:h}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + f16::from_bits(ret) + }; + ], +} + +float_bench! { + name: trunc_f64_f16, + sig: (a: f64) -> f16, + crate_fn: trunc::__truncdfhf2, + sys_fn: __truncdfhf2, + sys_available: not(feature = "no-sys-f128"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: u16; + asm!( + "fcvt {ret:h}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + f16::from_bits(ret) + }; + ], +} + +float_bench! { + name: trunc_f64_f32, + sig: (a: f64) -> f32, + crate_fn: trunc::__truncdfsf2, + sys_fn: __truncdfsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsd2ss {ret}, {a}", + a = in(xmm_reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "fcvt {ret:s}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: trunc_f128_f16, + sig: (a: f128) -> f16, + crate_fn: trunc::__trunctfhf2, + crate_fn_ppc: trunc::__trunckfhf2, + sys_fn: __trunctfhf2, + sys_fn_ppc: __trunckfhf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: trunc_f128_f32, + sig: (a: f128) -> f32, + crate_fn: trunc::__trunctfsf2, + crate_fn_ppc: trunc::__trunckfsf2, + sys_fn: __trunctfsf2, + sys_fn_ppc: __trunckfsf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: trunc_f128_f64, + sig: (a: f128) -> f64, + crate_fn: trunc::__trunctfdf2, + crate_fn_ppc: trunc::__trunckfdf2, + sys_fn: __trunctfdf2, + sys_fn_ppc: __trunckfdf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_trunc, + trunc_f32_f16, + trunc_f64_f16, + trunc_f64_f32, + trunc_f128_f16, + trunc_f128_f32, + trunc_f128_f64, +); +criterion_main!(float_trunc); diff --git a/testcrate/build.rs b/testcrate/build.rs index 1dad6c5e6..cae83e1fc 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -5,6 +5,8 @@ use std::{collections::HashSet, env}; enum Feature { NoSysF128, NoSysF128IntConvert, + NoSysF16, + NoSysF16F128Convert, } fn main() { @@ -31,6 +33,7 @@ fn main() { { features.insert(Feature::NoSysF128); features.insert(Feature::NoSysF128IntConvert); + features.insert(Feature::NoSysF16F128Convert); } if target.starts_with("i586") || target.starts_with("i686") { @@ -38,6 +41,17 @@ fn main() { features.insert(Feature::NoSysF128IntConvert); } + if target.contains("-unknown-linux-") { + // No `__extendhftf2` on x86, no `__trunctfhf2` on aarch64 + features.insert(Feature::NoSysF16F128Convert); + } + + if target.starts_with("wasm32-") { + // Linking says "error: function signature mismatch: __extendhfsf2" and seems to + // think the signature is either `(i32) -> f32` or `(f32) -> f32` + features.insert(Feature::NoSysF16); + } + for feature in features { let (name, warning) = match feature { Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), @@ -45,6 +59,11 @@ fn main() { "no-sys-f128-int-convert", "using apfloat fallback for f128 to int conversions", ), + Feature::NoSysF16F128Convert => ( + "no-sys-f16-f128-convert", + "skipping using apfloat fallback for f16 <-> f128 conversions", + ), + Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), }; println!("cargo:warning={warning}"); println!("cargo:rustc-cfg=feature=\"{name}\""); diff --git a/testcrate/src/bench.rs b/testcrate/src/bench.rs new file mode 100644 index 000000000..1374d7b4f --- /dev/null +++ b/testcrate/src/bench.rs @@ -0,0 +1,348 @@ +use core::cell::RefCell; + +use alloc::vec::Vec; +use compiler_builtins::float::Float; + +/// Fuzz with these many items to ensure equal functions +pub const CHECK_ITER_ITEMS: u32 = 10_000; +/// Benchmark with this many items to get a variety +pub const BENCH_ITER_ITEMS: u32 = 500; + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// builtin system functions functions +pub fn skip_sys_checks(test_name: &str) -> bool { + const ALWAYS_SKIPPED: &[&str] = &[ + // FIXME(f16_f128): system symbols have incorrect results + // + "extend_f16_f32", + "trunc_f32_f16", + "trunc_f64_f16", + // FIXME(f16_f128): rounding error + // + "mul_f128", + ]; + + // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely + // in their benchmark modules due to runtime panics. + // + const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"]; + + // FIXME(f16_f128): system symbols have incorrect results + // + const X86_NO_SSE_SKIPPED: &[&str] = &["add_f128", "sub_f128", "powi_f32", "powi_f64"]; + + // FIXME(llvm): system symbols have incorrect results on Windows + // + const WINDOWS_SKIPPED: &[&str] = &[ + "conv_f32_u128", + "conv_f32_i128", + "conv_f64_u128", + "conv_f64_i128", + ]; + + if cfg!(target_arch = "arm") { + // The Arm symbols need a different ABI that our macro doesn't handle, just skip it + return true; + } + + if ALWAYS_SKIPPED.contains(&test_name) { + return true; + } + + if cfg!(all(target_arch = "powerpc64", target_endian = "little")) + && PPC64LE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(all(target_arch = "x86", not(target_feature = "sse"))) + && X86_NO_SSE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) { + return true; + } + + false +} + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// assembly functions +pub fn skip_asm_checks(test_name: &str) -> bool { + // FIXME(f16_f128): rounding error + // + const SKIPPED: &[&str] = &["mul_f32", "mul_f64"]; + + SKIPPED.contains(&test_name) +} + +/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten +/// assembly. +#[macro_export] +macro_rules! float_bench { + ( + // Name of this benchmark + name: $name:ident, + // The function signature to be tested + sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty, + // Path to the crate in compiler_builtins + crate_fn: $crate_fn:path, + // Optional alias on ppc + $( crate_fn_ppc: $crate_fn_ppc:path, )? + // Name of the system symbol + sys_fn: $sys_fn:ident, + // Optional alias on ppc + $( sys_fn_ppc: $sys_fn_ppc:path, )? + // Meta saying whether the system symbol is available + sys_available: $sys_available:meta, + // An optional function to validate the results of two functions are equal, if not + // just `$ret_ty::check_eq` + $( output_eq: $output_eq:expr, )? + // Assembly implementations, if any. + asm: [ + $( + #[cfg($asm_meta:meta)] { + $($asm_tt:tt)* + } + );* + $(;)? + ] + $(,)? + ) => {paste::paste! { + #[cfg($sys_available)] + extern "C" { + /// Binding for the system function + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float_bench! { @coalesce_fn $($sys_fn_ppc)? => + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + } + } + + fn $name(c: &mut Criterion) { + use core::hint::black_box; + use compiler_builtins::float::Float; + use $crate::bench::TestIO; + + #[inline(never)] // equalize with external calls + fn crate_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_crate_fn = $crate_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn); + + target_crate_fn( $($arg),* ) + } + + #[inline(always)] // already a branch + #[cfg($sys_available)] + fn sys_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_sys_fn = $sys_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn); + + unsafe { target_sys_fn( $($arg),* ) } + } + + #[inline(never)] // equalize with external calls + #[cfg(any( $($asm_meta),* ))] + fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty { + use core::arch::asm; + $( + #[cfg($asm_meta)] + unsafe { $($asm_tt)* } + )* + } + + let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS); + let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS); + let test_name = stringify!($name); + let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq); + + // Verify math lines up. We run the crate functions even if we don't validate the + // output here to make sure there are no panics or crashes. + + #[cfg($sys_available)] + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let sys_res = sys_fn($($arg),*); + + if $crate::bench::skip_sys_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, sys_res), + "{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}", + ($($arg),* ,) + ); + } + + #[cfg(any( $($asm_meta),* ))] + { + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let asm_res = asm_fn($($arg),*); + + if $crate::bench::skip_asm_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, asm_res), + "{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}", + ($($arg),* ,) + ); + } + } + + let mut group = c.benchmark_group(test_name); + group.bench_function("compiler-builtins", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(crate_fn( $(black_box($arg)),* )); + } + })); + + #[cfg($sys_available)] + group.bench_function("system", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(sys_fn( $(black_box($arg)),* )); + } + })); + + #[cfg(any( $($asm_meta),* ))] + group.bench_function(&format!( + "assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY + ), |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(asm_fn( $(black_box($arg)),* )); + } + })); + + group.finish(); + } + }}; + + // Allow overriding a default + (@coalesce $specified:expr, $default:expr) => { $specified }; + (@coalesce, $default:expr) => { $default }; + + // Allow overriding a function name + (@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => { + fn $specified $($tt)+ + }; + (@coalesce_fn => fn $default_name:ident $($tt:tt)+) => { + fn $default_name $($tt)+ + }; +} + +/// A type used as either an input or output to/from a benchmark function. +pub trait TestIO: Sized { + fn make_testvec(len: u32) -> Vec; + fn check_eq(a: Self, b: Self) -> bool; +} + +macro_rules! impl_testio { + (float $($f_ty:ty),+) => {$( + impl TestIO for $f_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + Float::eq_repr(a, b) + } + } + + impl TestIO for ($f_ty, $f_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + (int $($i_ty:ty),+) => {$( + impl TestIO for $i_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + a == b + } + } + + impl TestIO for ($i_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + ((float, int) ($f_ty:ty, $i_ty:ty)) => { + impl TestIO for ($f_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ivec = RefCell::new(Vec::new()); + let fvec = RefCell::new(Vec::new()); + + crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a)); + crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a)); + + let mut ret = Vec::new(); + let ivec = ivec.into_inner(); + let fvec = fvec.into_inner(); + + for f in fvec { + for i in &ivec { + ret.push((f, *i)); + } + } + + ret + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + } +} + +#[cfg(not(feature = "no-f16-f128"))] +impl_testio!(float f16, f128); +impl_testio!(float f32, f64); +impl_testio!(int i16, i32, i64, i128); +impl_testio!(int u16, u32, u64, u128); +impl_testio!((float, int)(f32, i32)); +impl_testio!((float, int)(f64, i32)); diff --git a/testcrate/src/lib.rs b/testcrate/src/lib.rs index 5ee96ad27..f9b052528 100644 --- a/testcrate/src/lib.rs +++ b/testcrate/src/lib.rs @@ -13,6 +13,12 @@ //! Some floating point tests are disabled for specific architectures, because they do not have //! correct rounding. #![no_std] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] +#![feature(isqrt)] + +pub mod bench; +extern crate alloc; use compiler_builtins::float::Float; use compiler_builtins::int::{Int, MinInt};