From 5ceb15b22a58fe08e0fa56aebc7d60cdcccf3d1d Mon Sep 17 00:00:00 2001 From: not-matthias Date: Tue, 22 Apr 2025 10:08:46 +0200 Subject: [PATCH 1/3] feat(codspeed): add runner ipc via fifo --- Cargo.lock | 50 +++++-- Cargo.toml | 1 + crates/cargo-codspeed/Cargo.toml | 2 +- crates/codspeed/Cargo.toml | 3 + crates/codspeed/src/fifo.rs | 229 +++++++++++++++++++++++++++++++ crates/codspeed/src/lib.rs | 2 + crates/codspeed/src/shared.rs | 15 ++ 7 files changed, 291 insertions(+), 11 deletions(-) create mode 100644 crates/codspeed/src/fifo.rs create mode 100644 crates/codspeed/src/shared.rs diff --git a/Cargo.lock b/Cargo.lock index 6d58a51f..ae5e776d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -83,9 +83,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" [[package]] name = "approx" @@ -386,6 +386,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dfdb4953a096c551ce9ace855a604d702e6e62d77fac690575ae347571717f5" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -511,6 +520,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "ciborium" version = "0.2.2" @@ -583,8 +598,11 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" name = "codspeed" version = "2.10.1" dependencies = [ + "anyhow", + "bincode", "colored", "libc", + "nix", "serde", "serde_json", "tempfile", @@ -1168,9 +1186,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libm" @@ -1260,6 +1278,18 @@ dependencies = [ "typenum", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1857,18 +1887,18 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", @@ -1887,9 +1917,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.39.1" +version = "1.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a" +checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" dependencies = [ "backtrace", "pin-project-lite", diff --git a/Cargo.toml b/Cargo.toml index 4dc8eff2..89c50f5f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ resolver = "2" [workspace.dependencies] +anyhow = "1.0.97" itertools = "0.14.0" serde = { version = "1.0.217", features = ["derive"] } serde_json = "1.0.138" diff --git a/crates/cargo-codspeed/Cargo.toml b/crates/cargo-codspeed/Cargo.toml index 5fccd680..e752e8a8 100644 --- a/crates/cargo-codspeed/Cargo.toml +++ b/crates/cargo-codspeed/Cargo.toml @@ -21,7 +21,7 @@ keywords = ["codspeed", "benchmark", "cargo"] cargo_metadata = "0.19.2" clap = { version = "=4.5.17", features = ["derive", "env"] } termcolor = "1.4" -anyhow = "1.0.86" +anyhow = { workspace = true } itertools = { workspace = true } anstyle = "1.0.8" serde = { workspace = true } diff --git a/crates/codspeed/Cargo.toml b/crates/codspeed/Cargo.toml index 4ff93709..350270a6 100644 --- a/crates/codspeed/Cargo.toml +++ b/crates/codspeed/Cargo.toml @@ -18,8 +18,11 @@ categories = [ keywords = ["codspeed", "benchmark"] [dependencies] +anyhow = { workspace = true } +bincode = "1.3.3" colored = "2.0.0" libc = "^0.2" +nix = { version = "0.29.0", features = ["fs"] } serde = { workspace = true } serde_json = { workspace = true } uuid = { version = "1.12.1", features = ["v4"] } diff --git a/crates/codspeed/src/fifo.rs b/crates/codspeed/src/fifo.rs new file mode 100644 index 00000000..347cb5e9 --- /dev/null +++ b/crates/codspeed/src/fifo.rs @@ -0,0 +1,229 @@ +pub use super::shared::*; +use anyhow::bail; +use nix::libc::O_NONBLOCK; +use nix::sys::stat; +use nix::unistd::{self, unlink}; +use std::fs::{File, OpenOptions}; +use std::io::Read; +use std::os::unix::fs::OpenOptionsExt; +use std::path::{Path, PathBuf}; + +pub struct BenchGuard { + ctl_fifo: FifoIpc, + ack_fifo: FifoIpc, +} + +impl BenchGuard { + pub fn new(ctl_fifo: &str, ack_fifo: &str) -> anyhow::Result { + let mut instance = Self { + ctl_fifo: FifoIpc::connect(ctl_fifo)?.with_writer()?, + ack_fifo: FifoIpc::connect(ack_fifo)?.with_reader()?, + }; + + instance.send_cmd(Command::SetIntegration { + name: "codspeed-rust".into(), + version: env!("CARGO_PKG_VERSION").into(), + })?; // FIXME: Just send it once + instance.send_cmd(Command::StartBenchmark)?; + + Ok(instance) + } + + pub fn new_with_runner_fifo() -> anyhow::Result { + Self::new(RUNNER_CTL_FIFO, RUNNER_ACK_FIFO) + } + + fn send_cmd(&mut self, cmd: Command) -> anyhow::Result<()> { + self.ctl_fifo.send_cmd(cmd)?; + self.ack_fifo.wait_for_ack(); + + Ok(()) + } +} + +impl Drop for BenchGuard { + fn drop(&mut self) { + self.send_cmd(Command::StopBenchmark) + .expect("Failed to send stop command"); + } +} + +pub fn send_cmd(cmd: Command) -> anyhow::Result<()> { + let mut writer = FifoIpc::connect(RUNNER_CTL_FIFO)?.with_writer()?; + writer.send_cmd(cmd).unwrap(); + + let mut reader = FifoIpc::connect(RUNNER_ACK_FIFO)?.with_reader()?; + reader.wait_for_ack(); + + Ok(()) +} + +pub struct FifoIpc { + path: PathBuf, + reader: Option, + writer: Option, +} + +impl FifoIpc { + /// Creates a new FIFO at the specified path and connects to it. + /// + /// ```rust + /// use codspeed::fifo::{FifoIpc, Command}; + /// + /// // Create the reader before the writer (required!): + /// let mut read_fifo = FifoIpc::create("/tmp/doctest.fifo").unwrap().with_reader().unwrap(); + /// + /// // Connect to the FIFO and send a command + /// let mut fifo = FifoIpc::connect("/tmp/doctest.fifo").unwrap().with_writer().unwrap(); + /// fifo.send_cmd(Command::StartBenchmark).unwrap(); + /// + /// // Receive the command in the reader + /// let cmd = read_fifo.recv_cmd().unwrap(); + /// assert_eq!(cmd, Command::StartBenchmark); + /// ``` + pub fn create>(path: P) -> anyhow::Result { + // Remove the previous FIFO (if it exists) + let _ = unlink(path.as_ref()); + + // Create the FIFO with RWX permissions for the owner + unistd::mkfifo(path.as_ref(), stat::Mode::S_IRWXU)?; + + Self::connect(path.as_ref()) + } + + pub fn connect>(path: P) -> anyhow::Result { + let path = path.into(); + + if !path.exists() { + bail!("FIFO does not exist: {}", path.display()); + } + + Ok(Self { + path, + reader: None, + writer: None, + }) + } + + pub fn with_reader(mut self) -> anyhow::Result { + self.reader = Some( + OpenOptions::new() + .write(true) + .read(true) + .custom_flags(O_NONBLOCK) + .open(&self.path)?, + ); + Ok(self) + } + + /// WARNING: Writer must be opened _AFTER_ the reader. + pub fn with_writer(mut self) -> anyhow::Result { + self.writer = Some( + OpenOptions::new() + .write(true) + .custom_flags(O_NONBLOCK) + .open(&self.path)?, + ); + Ok(self) + } + + pub fn recv_cmd(&mut self) -> anyhow::Result { + // First read the length (u32 = 4 bytes) + let mut len_buffer = [0u8; 4]; + self.read_exact(&mut len_buffer)?; + let message_len = u32::from_le_bytes(len_buffer) as usize; + + // Try to read the message + let mut buffer = vec![0u8; message_len]; + loop { + if self.read_exact(&mut buffer).is_ok() { + break; + } + } + + let decoded = bincode::deserialize(&buffer)?; + Ok(decoded) + } + + pub fn send_cmd(&mut self, cmd: Command) -> anyhow::Result<()> { + use std::io::Write; + + let encoded = bincode::serialize(&cmd)?; + self.write_all(&(encoded.len() as u32).to_le_bytes())?; + self.write_all(&encoded)?; + Ok(()) + } + + pub fn wait_for_ack(&mut self) { + loop { + if let Ok(Command::Ack) = self.recv_cmd() { + break; + } + } + } +} + +impl std::io::Write for FifoIpc { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if let Some(writer) = self.writer.as_mut() { + writer.write(buf) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::NotConnected, + "Writer not initialized", + )) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +impl std::io::Read for FifoIpc { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + if let Some(reader) = self.reader.as_mut() { + reader.read(buf) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::NotConnected, + "Reader not initialized", + )) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn test_ipc_write_read() { + let mut fifo = FifoIpc::create("/tmp/test1.fifo") + .unwrap() + .with_reader() + .unwrap() + .with_writer() + .unwrap(); + + fifo.write_all(b"Hello").unwrap(); + let mut buffer = [0; 5]; + fifo.read_exact(&mut buffer).unwrap(); + assert_eq!(&buffer, b"Hello"); + } + + #[test] + fn test_ipc_send_recv_cmd() { + let mut fifo = FifoIpc::create("/tmp/test2.fifo") + .unwrap() + .with_reader() + .unwrap() + .with_writer() + .unwrap(); + + fifo.send_cmd(Command::StartBenchmark).unwrap(); + let cmd = fifo.recv_cmd().unwrap(); + assert_eq!(cmd, Command::StartBenchmark); + } +} diff --git a/crates/codspeed/src/lib.rs b/crates/codspeed/src/lib.rs index bbac6486..87ad3453 100644 --- a/crates/codspeed/src/lib.rs +++ b/crates/codspeed/src/lib.rs @@ -1,6 +1,8 @@ pub mod codspeed; +pub mod fifo; mod macros; mod measurement; mod request; +mod shared; pub mod utils; pub mod walltime; diff --git a/crates/codspeed/src/shared.rs b/crates/codspeed/src/shared.rs new file mode 100644 index 00000000..86c1433e --- /dev/null +++ b/crates/codspeed/src/shared.rs @@ -0,0 +1,15 @@ +//! WARNING: Has to be in sync with `runner`. + +pub const RUNNER_CTL_FIFO: &str = "/tmp/runner.ctl.fifo"; +pub const RUNNER_ACK_FIFO: &str = "/tmp/runner.ack.fifo"; + +#[derive(serde::Serialize, serde::Deserialize, Debug, PartialEq)] +pub enum Command { + CurrentBenchmark { pid: u32, uri: String }, + StartBenchmark, + StopBenchmark, + Ack, + PingPerf, + SetIntegration { name: String, version: String }, + Err, +} From 333eee4dd6331092e43835356557c228dc87aa67 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Tue, 22 Apr 2025 10:10:56 +0200 Subject: [PATCH 2/3] feat(divan_compat): add root frame and ipc with runner --- crates/divan_compat/divan_fork/src/bench/mod.rs | 2 ++ crates/divan_compat/divan_fork/src/divan.rs | 7 +++++++ crates/divan_compat/divan_fork/src/thread_pool.rs | 12 ++++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/crates/divan_compat/divan_fork/src/bench/mod.rs b/crates/divan_compat/divan_fork/src/bench/mod.rs index a8e730b8..0234babf 100644 --- a/crates/divan_compat/divan_fork/src/bench/mod.rs +++ b/crates/divan_compat/divan_fork/src/bench/mod.rs @@ -657,6 +657,7 @@ impl<'a> BenchContext<'a> { let bench_overheads = timer.bench_overheads(); + let _guard = codspeed::fifo::BenchGuard::new_with_runner_fifo(); while { // Conditions for when sampling is over: if elapsed_picos >= max_picos { @@ -810,6 +811,7 @@ impl<'a> BenchContext<'a> { elapsed_picos = elapsed_picos.saturating_add(progress_picos); } } + core::mem::drop(_guard); // Reset flag for ignoring allocations. crate::alloc::IGNORE_ALLOC.set(false); diff --git a/crates/divan_compat/divan_fork/src/divan.rs b/crates/divan_compat/divan_fork/src/divan.rs index 942a0e5c..3fe6688e 100644 --- a/crates/divan_compat/divan_fork/src/divan.rs +++ b/crates/divan_compat/divan_fork/src/divan.rs @@ -428,6 +428,13 @@ mod codspeed { bench_context.samples.time_samples.iter().map(|s| s.duration.picos / 1_000).collect(); let max_time_ns = bench_context.options.max_time.map(|t| t.as_nanos()); + if let Err(error) = ::codspeed::fifo::send_cmd(codspeed::fifo::Command::CurrentBenchmark { + pid: std::process::id(), + uri: uri.clone(), + }) { + eprintln!("Failed to send benchmark URI to runner: {}", error); + } + ::codspeed::walltime::collect_raw_walltime_results( "divan", bench_name, diff --git a/crates/divan_compat/divan_fork/src/thread_pool.rs b/crates/divan_compat/divan_fork/src/thread_pool.rs index c607a936..4ec6118c 100644 --- a/crates/divan_compat/divan_fork/src/thread_pool.rs +++ b/crates/divan_compat/divan_fork/src/thread_pool.rs @@ -201,9 +201,17 @@ impl TaskShared { where F: Fn(usize), { - let task_fn = &(*task.cast::>()).task_fn; + #[inline(never)] + unsafe fn __codspeed_root_frame__(task: *const TaskShared<()>, thread: usize) + where + F: Fn(usize), + { + let task_fn = &(*task.cast::>()).task_fn; + + task_fn(thread); + } - task_fn(thread); + __codspeed_root_frame__::(task, thread); } Self { From 91145111f25b47462b6504962d3249c953fcc718 Mon Sep 17 00:00:00 2001 From: not-matthias Date: Tue, 22 Apr 2025 10:12:45 +0200 Subject: [PATCH 3/3] feat(criterion_compat): add root frame and ipc with runner --- .../criterion_fork/src/analysis/mod.rs | 7 + .../criterion_fork/src/bencher.rs | 1626 +++++++++-------- .../criterion_fork/src/routine.rs | 6 +- 3 files changed, 873 insertions(+), 766 deletions(-) diff --git a/crates/criterion_compat/criterion_fork/src/analysis/mod.rs b/crates/criterion_compat/criterion_fork/src/analysis/mod.rs index bddd90c1..d2431403 100644 --- a/crates/criterion_compat/criterion_fork/src/analysis/mod.rs +++ b/crates/criterion_compat/criterion_fork/src/analysis/mod.rs @@ -297,6 +297,13 @@ mod codspeed { ) { let (uri, bench_name) = create_uri_and_name(id, c); + if let Err(error) = ::codspeed::fifo::send_cmd(codspeed::fifo::Command::CurrentBenchmark { + pid: std::process::id(), + uri: uri.clone(), + }) { + eprintln!("Failed to send benchmark URI to runner: {}", error); + } + let avg_iter_per_round = iters.iter().sum::() / iters.len() as f64; let max_time_ns = Some(c.config.measurement_time.as_nanos()); let times_ns = avg_times.iter().map(|t| *t as u128).collect(); diff --git a/crates/criterion_compat/criterion_fork/src/bencher.rs b/crates/criterion_compat/criterion_fork/src/bencher.rs index 016aa284..198b5ccb 100644 --- a/crates/criterion_compat/criterion_fork/src/bencher.rs +++ b/crates/criterion_compat/criterion_fork/src/bencher.rs @@ -1,764 +1,862 @@ -use std::iter::IntoIterator; -use std::time::Duration; -use std::time::Instant; - -use crate::black_box; -use crate::measurement::{Measurement, WallTime}; -use crate::BatchSize; - -#[cfg(feature = "async")] -use std::future::Future; - -#[cfg(feature = "async")] -use crate::async_executor::AsyncExecutor; - -// ================================== MAINTENANCE NOTE ============================================= -// Any changes made to either Bencher or AsyncBencher will have to be replicated to the other! -// ================================== MAINTENANCE NOTE ============================================= - -/// Timer struct used to iterate a benchmarked function and measure the runtime. -/// -/// This struct provides different timing loops as methods. Each timing loop provides a different -/// way to time a routine and each has advantages and disadvantages. -/// -/// * If you want to do the iteration and measurement yourself (eg. passing the iteration count -/// to a separate process), use `iter_custom`. -/// * If your routine requires no per-iteration setup and returns a value with an expensive `drop` -/// method, use `iter_with_large_drop`. -/// * If your routine requires some per-iteration setup that shouldn't be timed, use `iter_batched` -/// or `iter_batched_ref`. See [`BatchSize`](enum.BatchSize.html) for a discussion of batch sizes. -/// If the setup value implements `Drop` and you don't want to include the `drop` time in the -/// measurement, use `iter_batched_ref`, otherwise use `iter_batched`. These methods are also -/// suitable for benchmarking routines which return a value with an expensive `drop` method, -/// but are more complex than `iter_with_large_drop`. -/// * Otherwise, use `iter`. -pub struct Bencher<'a, M: Measurement = WallTime> { - pub(crate) iterated: bool, // Have we iterated this benchmark? - pub(crate) iters: u64, // Number of times to iterate this benchmark - pub(crate) value: M::Value, // The measured value - pub(crate) measurement: &'a M, // Reference to the measurement object - pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period. -} -impl<'a, M: Measurement> Bencher<'a, M> { - /// Times a `routine` by executing it many times and timing the total elapsed time. - /// - /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor. - /// - /// # Timing model - /// - /// Note that the `Bencher` also times the time required to destroy the output of `routine()`. - /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared - /// to the runtime of the `routine`. - /// - /// ```text - /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next) - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// - /// // The function to benchmark - /// fn foo() { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// c.bench_function("iter", move |b| { - /// b.iter(|| foo()) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter(&mut self, mut routine: R) - where - R: FnMut() -> O, - { - self.iterated = true; - let time_start = Instant::now(); - let start = self.measurement.start(); - for _ in 0..self.iters { - black_box(routine()); - } - self.value = self.measurement.end(start); - self.elapsed_time = time_start.elapsed(); - } - - /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time. - /// - /// Prefer this timing loop in cases where `routine` has to do its own measurements to - /// get accurate timing information (for example in multi-threaded scenarios where you spawn - /// and coordinate with multiple threads). - /// - /// # Timing model - /// Custom, the timing model is whatever is returned as the Duration from `routine`. - /// - /// # Example - /// ```rust - /// #[macro_use] extern crate criterion; - /// use criterion::*; - /// use criterion::black_box; - /// use std::time::Instant; - /// - /// fn foo() { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// c.bench_function("iter", move |b| { - /// b.iter_custom(|iters| { - /// let start = Instant::now(); - /// for _i in 0..iters { - /// black_box(foo()); - /// } - /// start.elapsed() - /// }) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter_custom(&mut self, mut routine: R) - where - R: FnMut(u64) -> M::Value, - { - self.iterated = true; - let time_start = Instant::now(); - self.value = routine(self.iters); - self.elapsed_time = time_start.elapsed(); - } - - #[doc(hidden)] - pub fn iter_with_setup(&mut self, setup: S, routine: R) - where - S: FnMut() -> I, - R: FnMut(I) -> O, - { - self.iter_batched(setup, routine, BatchSize::PerIteration); - } - - /// Times a `routine` by collecting its output on each iteration. This avoids timing the - /// destructor of the value returned by `routine`. - /// - /// WARNING: This requires `O(iters * mem::size_of::())` of memory, and `iters` is not under the - /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead. - /// - /// # Timing model - /// - /// ``` text - /// elapsed = Instant::now + iters * (routine) + Iterator::collect::> - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// - /// fn create_vector() -> Vec { - /// # vec![] - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// c.bench_function("with_drop", move |b| { - /// // This will avoid timing the Vec::drop. - /// b.iter_with_large_drop(|| create_vector()) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - pub fn iter_with_large_drop(&mut self, mut routine: R) - where - R: FnMut() -> O, - { - self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput); - } - - /// Times a `routine` that requires some input by generating a batch of input, then timing the - /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for - /// details on choosing the batch size. Use this when the routine must consume its input. - /// - /// For example, use this loop to benchmark sorting algorithms, because they require unsorted - /// data on each iteration. - /// - /// # Timing model - /// - /// ```text - /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// - /// fn create_scrambled_data() -> Vec { - /// # vec![] - /// // ... - /// } - /// - /// // The sorting algorithm to test - /// fn sort(data: &mut [u64]) { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// let data = create_scrambled_data(); - /// - /// c.bench_function("with_setup", move |b| { - /// // This will avoid timing the to_vec call. - /// b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter_batched(&mut self, mut setup: S, mut routine: R, size: BatchSize) - where - S: FnMut() -> I, - R: FnMut(I) -> O, - { - self.iterated = true; - let batch_size = size.iters_per_batch(self.iters); - assert!(batch_size != 0, "Batch size must not be zero."); - let time_start = Instant::now(); - self.value = self.measurement.zero(); - - if batch_size == 1 { - for _ in 0..self.iters { - let input = black_box(setup()); - - let start = self.measurement.start(); - let output = routine(input); - let end = self.measurement.end(start); - self.value = self.measurement.add(&self.value, &end); - - drop(black_box(output)); - } - } else { - let mut iteration_counter = 0; - - while iteration_counter < self.iters { - let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter); - - let inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); - let mut outputs = Vec::with_capacity(batch_size as usize); - - let start = self.measurement.start(); - outputs.extend(inputs.into_iter().map(&mut routine)); - let end = self.measurement.end(start); - self.value = self.measurement.add(&self.value, &end); - - black_box(outputs); - - iteration_counter += batch_size; - } - } - - self.elapsed_time = time_start.elapsed(); - } - - /// Times a `routine` that requires some input by generating a batch of input, then timing the - /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for - /// details on choosing the batch size. Use this when the routine should accept the input by - /// mutable reference. - /// - /// For example, use this loop to benchmark sorting algorithms, because they require unsorted - /// data on each iteration. - /// - /// # Timing model - /// - /// ```text - /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// - /// fn create_scrambled_data() -> Vec { - /// # vec![] - /// // ... - /// } - /// - /// // The sorting algorithm to test - /// fn sort(data: &mut [u64]) { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// let data = create_scrambled_data(); - /// - /// c.bench_function("with_setup", move |b| { - /// // This will avoid timing the to_vec call. - /// b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter_batched_ref(&mut self, mut setup: S, mut routine: R, size: BatchSize) - where - S: FnMut() -> I, - R: FnMut(&mut I) -> O, - { - self.iterated = true; - let batch_size = size.iters_per_batch(self.iters); - assert!(batch_size != 0, "Batch size must not be zero."); - let time_start = Instant::now(); - self.value = self.measurement.zero(); - - if batch_size == 1 { - for _ in 0..self.iters { - let mut input = black_box(setup()); - - let start = self.measurement.start(); - let output = routine(&mut input); - let end = self.measurement.end(start); - self.value = self.measurement.add(&self.value, &end); - - drop(black_box(output)); - drop(black_box(input)); - } - } else { - let mut iteration_counter = 0; - - while iteration_counter < self.iters { - let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter); - - let mut inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); - let mut outputs = Vec::with_capacity(batch_size as usize); - - let start = self.measurement.start(); - outputs.extend(inputs.iter_mut().map(&mut routine)); - let end = self.measurement.end(start); - self.value = self.measurement.add(&self.value, &end); - - black_box(outputs); - - iteration_counter += batch_size; - } - } - self.elapsed_time = time_start.elapsed(); - } - - // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly - // if they don't. - pub(crate) fn assert_iterated(&mut self) { - assert!( - self.iterated, - "Benchmark function must call Bencher::iter or related method." - ); - self.iterated = false; - } - - /// Convert this bencher into an AsyncBencher, which enables async/await support. - #[cfg(feature = "async")] - pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> { - AsyncBencher { b: self, runner } - } -} - -/// Async/await variant of the Bencher struct. -#[cfg(feature = "async")] -pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> { - b: &'b mut Bencher<'a, M>, - runner: A, -} -#[cfg(feature = "async")] -impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> { - /// Times a `routine` by executing it many times and timing the total elapsed time. - /// - /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor. - /// - /// # Timing model - /// - /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`. - /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared - /// to the runtime of the `routine`. - /// - /// ```text - /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next) - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// use criterion::async_executor::FuturesExecutor; - /// - /// // The function to benchmark - /// async fn foo() { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// c.bench_function("iter", move |b| { - /// b.to_async(FuturesExecutor).iter(|| async { foo().await } ) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter(&mut self, mut routine: R) - where - R: FnMut() -> F, - F: Future, - { - let AsyncBencher { b, runner } = self; - runner.block_on(async { - b.iterated = true; - let time_start = Instant::now(); - let start = b.measurement.start(); - for _ in 0..b.iters { - black_box(routine().await); - } - b.value = b.measurement.end(start); - b.elapsed_time = time_start.elapsed(); - }); - } - - /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time. - /// - /// Prefer this timing loop in cases where `routine` has to do its own measurements to - /// get accurate timing information (for example in multi-threaded scenarios where you spawn - /// and coordinate with multiple threads). - /// - /// # Timing model - /// Custom, the timing model is whatever is returned as the Duration from `routine`. - /// - /// # Example - /// ```rust - /// #[macro_use] extern crate criterion; - /// use criterion::*; - /// use criterion::black_box; - /// use criterion::async_executor::FuturesExecutor; - /// use std::time::Instant; - /// - /// async fn foo() { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// c.bench_function("iter", move |b| { - /// b.to_async(FuturesExecutor).iter_custom(|iters| { - /// async move { - /// let start = Instant::now(); - /// for _i in 0..iters { - /// black_box(foo().await); - /// } - /// start.elapsed() - /// } - /// }) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter_custom(&mut self, mut routine: R) - where - R: FnMut(u64) -> F, - F: Future, - { - let AsyncBencher { b, runner } = self; - runner.block_on(async { - b.iterated = true; - let time_start = Instant::now(); - b.value = routine(b.iters).await; - b.elapsed_time = time_start.elapsed(); - }) - } - - #[doc(hidden)] - pub fn iter_with_setup(&mut self, setup: S, routine: R) - where - S: FnMut() -> I, - R: FnMut(I) -> F, - F: Future, - { - self.iter_batched(setup, routine, BatchSize::PerIteration); - } - - /// Times a `routine` by collecting its output on each iteration. This avoids timing the - /// destructor of the value returned by `routine`. - /// - /// WARNING: This requires `O(iters * mem::size_of::())` of memory, and `iters` is not under the - /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead. - /// - /// # Timing model - /// - /// ``` text - /// elapsed = Instant::now + iters * (routine) + Iterator::collect::> - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// use criterion::async_executor::FuturesExecutor; - /// - /// async fn create_vector() -> Vec { - /// # vec![] - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// c.bench_function("with_drop", move |b| { - /// // This will avoid timing the Vec::drop. - /// b.to_async(FuturesExecutor).iter_with_large_drop(|| async { create_vector().await }) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - pub fn iter_with_large_drop(&mut self, mut routine: R) - where - R: FnMut() -> F, - F: Future, - { - self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput); - } - - #[doc(hidden)] - pub fn iter_with_large_setup(&mut self, setup: S, routine: R) - where - S: FnMut() -> I, - R: FnMut(I) -> F, - F: Future, - { - self.iter_batched(setup, routine, BatchSize::NumBatches(1)); - } - - /// Times a `routine` that requires some input by generating a batch of input, then timing the - /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for - /// details on choosing the batch size. Use this when the routine must consume its input. - /// - /// For example, use this loop to benchmark sorting algorithms, because they require unsorted - /// data on each iteration. - /// - /// # Timing model - /// - /// ```text - /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// use criterion::async_executor::FuturesExecutor; - /// - /// fn create_scrambled_data() -> Vec { - /// # vec![] - /// // ... - /// } - /// - /// // The sorting algorithm to test - /// async fn sort(data: &mut [u64]) { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// let data = create_scrambled_data(); - /// - /// c.bench_function("with_setup", move |b| { - /// // This will avoid timing the to_vec call. - /// b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter_batched(&mut self, mut setup: S, mut routine: R, size: BatchSize) - where - S: FnMut() -> I, - R: FnMut(I) -> F, - F: Future, - { - let AsyncBencher { b, runner } = self; - runner.block_on(async { - b.iterated = true; - let batch_size = size.iters_per_batch(b.iters); - assert!(batch_size != 0, "Batch size must not be zero."); - let time_start = Instant::now(); - b.value = b.measurement.zero(); - - if batch_size == 1 { - for _ in 0..b.iters { - let input = black_box(setup()); - - let start = b.measurement.start(); - let output = routine(input).await; - let end = b.measurement.end(start); - b.value = b.measurement.add(&b.value, &end); - - drop(black_box(output)); - } - } else { - let mut iteration_counter = 0; - - while iteration_counter < b.iters { - let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter); - - let inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); - let mut outputs = Vec::with_capacity(batch_size as usize); - - let start = b.measurement.start(); - // Can't use .extend here like the sync version does - for input in inputs { - outputs.push(routine(input).await); - } - let end = b.measurement.end(start); - b.value = b.measurement.add(&b.value, &end); - - black_box(outputs); - - iteration_counter += batch_size; - } - } - - b.elapsed_time = time_start.elapsed(); - }) - } - - /// Times a `routine` that requires some input by generating a batch of input, then timing the - /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for - /// details on choosing the batch size. Use this when the routine should accept the input by - /// mutable reference. - /// - /// For example, use this loop to benchmark sorting algorithms, because they require unsorted - /// data on each iteration. - /// - /// # Timing model - /// - /// ```text - /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend - /// ``` - /// - /// # Example - /// - /// ```rust - /// #[macro_use] extern crate criterion; - /// - /// use criterion::*; - /// use criterion::async_executor::FuturesExecutor; - /// - /// fn create_scrambled_data() -> Vec { - /// # vec![] - /// // ... - /// } - /// - /// // The sorting algorithm to test - /// async fn sort(data: &mut [u64]) { - /// // ... - /// } - /// - /// fn bench(c: &mut Criterion) { - /// let data = create_scrambled_data(); - /// - /// c.bench_function("with_setup", move |b| { - /// // This will avoid timing the to_vec call. - /// b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput) - /// }); - /// } - /// - /// criterion_group!(benches, bench); - /// criterion_main!(benches); - /// ``` - /// - #[inline(never)] - pub fn iter_batched_ref(&mut self, mut setup: S, mut routine: R, size: BatchSize) - where - S: FnMut() -> I, - R: FnMut(&mut I) -> F, - F: Future, - { - let AsyncBencher { b, runner } = self; - runner.block_on(async { - b.iterated = true; - let batch_size = size.iters_per_batch(b.iters); - assert!(batch_size != 0, "Batch size must not be zero."); - let time_start = Instant::now(); - b.value = b.measurement.zero(); - - if batch_size == 1 { - for _ in 0..b.iters { - let mut input = black_box(setup()); - - let start = b.measurement.start(); - let output = routine(&mut input).await; - let end = b.measurement.end(start); - b.value = b.measurement.add(&b.value, &end); - - drop(black_box(output)); - drop(black_box(input)); - } - } else { - let mut iteration_counter = 0; - - while iteration_counter < b.iters { - let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter); - - let inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); - let mut outputs = Vec::with_capacity(batch_size as usize); - - let start = b.measurement.start(); - // Can't use .extend here like the sync version does - for mut input in inputs { - outputs.push(routine(&mut input).await); - } - let end = b.measurement.end(start); - b.value = b.measurement.add(&b.value, &end); - - black_box(outputs); - - iteration_counter += batch_size; - } - } - b.elapsed_time = time_start.elapsed(); - }); - } -} +#![allow(unused_mut)] + +use std::iter::IntoIterator; +use std::time::Duration; +use std::time::Instant; + +use crate::black_box; +use crate::measurement::{Measurement, WallTime}; +use crate::BatchSize; + +#[cfg(feature = "async")] +use std::future::Future; + +#[cfg(feature = "async")] +use crate::async_executor::AsyncExecutor; + +// ================================== MAINTENANCE NOTE ============================================= +// Any changes made to either Bencher or AsyncBencher will have to be replicated to the other! +// ================================== MAINTENANCE NOTE ============================================= + +/// Timer struct used to iterate a benchmarked function and measure the runtime. +/// +/// This struct provides different timing loops as methods. Each timing loop provides a different +/// way to time a routine and each has advantages and disadvantages. +/// +/// * If you want to do the iteration and measurement yourself (eg. passing the iteration count +/// to a separate process), use `iter_custom`. +/// * If your routine requires no per-iteration setup and returns a value with an expensive `drop` +/// method, use `iter_with_large_drop`. +/// * If your routine requires some per-iteration setup that shouldn't be timed, use `iter_batched` +/// or `iter_batched_ref`. See [`BatchSize`](enum.BatchSize.html) for a discussion of batch sizes. +/// If the setup value implements `Drop` and you don't want to include the `drop` time in the +/// measurement, use `iter_batched_ref`, otherwise use `iter_batched`. These methods are also +/// suitable for benchmarking routines which return a value with an expensive `drop` method, +/// but are more complex than `iter_with_large_drop`. +/// * Otherwise, use `iter`. +pub struct Bencher<'a, M: Measurement = WallTime> { + pub(crate) iterated: bool, // Have we iterated this benchmark? + pub(crate) iters: u64, // Number of times to iterate this benchmark + pub(crate) value: M::Value, // The measured value + pub(crate) measurement: &'a M, // Reference to the measurement object + pub(crate) elapsed_time: Duration, // How much time did it take to perform the iteration? Used for the warmup period. +} +impl<'a, M: Measurement> Bencher<'a, M> { + /// Times a `routine` by executing it many times and timing the total elapsed time. + /// + /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor. + /// + /// # Timing model + /// + /// Note that the `Bencher` also times the time required to destroy the output of `routine()`. + /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared + /// to the runtime of the `routine`. + /// + /// ```text + /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next) + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// + /// // The function to benchmark + /// fn foo() { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// c.bench_function("iter", move |b| { + /// b.iter(|| foo()) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter(&mut self, mut routine: R) + where + R: FnMut() -> O, + { + self.__codspeed_root_frame__iter(routine) + } + + #[inline(never)] + #[allow(non_snake_case, missing_docs)] + pub fn __codspeed_root_frame__iter(&mut self, mut routine: R) + where + R: FnMut() -> O, + { + self.iterated = true; + let time_start = Instant::now(); + let start = self.measurement.start(); + for _ in 0..self.iters { + black_box(routine()); + } + self.value = self.measurement.end(start); + self.elapsed_time = time_start.elapsed(); + } + + /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time. + /// + /// Prefer this timing loop in cases where `routine` has to do its own measurements to + /// get accurate timing information (for example in multi-threaded scenarios where you spawn + /// and coordinate with multiple threads). + /// + /// # Timing model + /// Custom, the timing model is whatever is returned as the Duration from `routine`. + /// + /// # Example + /// ```rust + /// #[macro_use] extern crate criterion; + /// use criterion::*; + /// use criterion::black_box; + /// use std::time::Instant; + /// + /// fn foo() { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// c.bench_function("iter", move |b| { + /// b.iter_custom(|iters| { + /// let start = Instant::now(); + /// for _i in 0..iters { + /// black_box(foo()); + /// } + /// start.elapsed() + /// }) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter_custom(&mut self, mut routine: R) + where + R: FnMut(u64) -> M::Value, + { + self.__codspeed_root_frame__iter_custom(routine) + } + + #[inline(never)] + #[allow(missing_docs, non_snake_case)] + pub fn __codspeed_root_frame__iter_custom(&mut self, mut routine: R) + where + R: FnMut(u64) -> M::Value, + { + self.iterated = true; + let time_start = Instant::now(); + self.value = routine(self.iters); + self.elapsed_time = time_start.elapsed(); + } + + #[doc(hidden)] + pub fn iter_with_setup(&mut self, setup: S, routine: R) + where + S: FnMut() -> I, + R: FnMut(I) -> O, + { + self.iter_batched(setup, routine, BatchSize::PerIteration); + } + + /// Times a `routine` by collecting its output on each iteration. This avoids timing the + /// destructor of the value returned by `routine`. + /// + /// WARNING: This requires `O(iters * mem::size_of::())` of memory, and `iters` is not under the + /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead. + /// + /// # Timing model + /// + /// ``` text + /// elapsed = Instant::now + iters * (routine) + Iterator::collect::> + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// + /// fn create_vector() -> Vec { + /// # vec![] + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// c.bench_function("with_drop", move |b| { + /// // This will avoid timing the Vec::drop. + /// b.iter_with_large_drop(|| create_vector()) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + pub fn iter_with_large_drop(&mut self, mut routine: R) + where + R: FnMut() -> O, + { + self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput); + } + + /// Times a `routine` that requires some input by generating a batch of input, then timing the + /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for + /// details on choosing the batch size. Use this when the routine must consume its input. + /// + /// For example, use this loop to benchmark sorting algorithms, because they require unsorted + /// data on each iteration. + /// + /// # Timing model + /// + /// ```text + /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// + /// fn create_scrambled_data() -> Vec { + /// # vec![] + /// // ... + /// } + /// + /// // The sorting algorithm to test + /// fn sort(data: &mut [u64]) { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// let data = create_scrambled_data(); + /// + /// c.bench_function("with_setup", move |b| { + /// // This will avoid timing the to_vec call. + /// b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter_batched(&mut self, mut setup: S, mut routine: R, size: BatchSize) + where + S: FnMut() -> I, + R: FnMut(I) -> O, + { + self.__codspeed_root_frame__iter_batched(setup, routine, size); + } + + #[inline(never)] + #[allow(missing_docs, non_snake_case)] + pub fn __codspeed_root_frame__iter_batched( + &mut self, + mut setup: S, + mut routine: R, + size: BatchSize, + ) where + S: FnMut() -> I, + R: FnMut(I) -> O, + { + self.iterated = true; + let batch_size = size.iters_per_batch(self.iters); + assert!(batch_size != 0, "Batch size must not be zero."); + let time_start = Instant::now(); + self.value = self.measurement.zero(); + + if batch_size == 1 { + for _ in 0..self.iters { + let input = black_box(setup()); + + let start = self.measurement.start(); + let output = routine(input); + let end = self.measurement.end(start); + self.value = self.measurement.add(&self.value, &end); + + drop(black_box(output)); + } + } else { + let mut iteration_counter = 0; + + while iteration_counter < self.iters { + let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter); + + let inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); + let mut outputs = Vec::with_capacity(batch_size as usize); + + let start = self.measurement.start(); + outputs.extend(inputs.into_iter().map(&mut routine)); + let end = self.measurement.end(start); + self.value = self.measurement.add(&self.value, &end); + + black_box(outputs); + + iteration_counter += batch_size; + } + } + + self.elapsed_time = time_start.elapsed(); + } + + /// Times a `routine` that requires some input by generating a batch of input, then timing the + /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for + /// details on choosing the batch size. Use this when the routine should accept the input by + /// mutable reference. + /// + /// For example, use this loop to benchmark sorting algorithms, because they require unsorted + /// data on each iteration. + /// + /// # Timing model + /// + /// ```text + /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// + /// fn create_scrambled_data() -> Vec { + /// # vec![] + /// // ... + /// } + /// + /// // The sorting algorithm to test + /// fn sort(data: &mut [u64]) { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// let data = create_scrambled_data(); + /// + /// c.bench_function("with_setup", move |b| { + /// // This will avoid timing the to_vec call. + /// b.iter_batched(|| data.clone(), |mut data| sort(&mut data), BatchSize::SmallInput) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter_batched_ref(&mut self, mut setup: S, mut routine: R, size: BatchSize) + where + S: FnMut() -> I, + R: FnMut(&mut I) -> O, + { + self.__codspeed_root_frame__iter_batched_ref(setup, routine, size) + } + + #[inline(never)] + #[allow(missing_docs, non_snake_case)] + pub fn __codspeed_root_frame__iter_batched_ref( + &mut self, + mut setup: S, + mut routine: R, + size: BatchSize, + ) where + S: FnMut() -> I, + R: FnMut(&mut I) -> O, + { + self.iterated = true; + let batch_size = size.iters_per_batch(self.iters); + assert!(batch_size != 0, "Batch size must not be zero."); + let time_start = Instant::now(); + self.value = self.measurement.zero(); + + if batch_size == 1 { + for _ in 0..self.iters { + let mut input = black_box(setup()); + + let start = self.measurement.start(); + let output = routine(&mut input); + let end = self.measurement.end(start); + self.value = self.measurement.add(&self.value, &end); + + drop(black_box(output)); + drop(black_box(input)); + } + } else { + let mut iteration_counter = 0; + + while iteration_counter < self.iters { + let batch_size = ::std::cmp::min(batch_size, self.iters - iteration_counter); + + let mut inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); + let mut outputs = Vec::with_capacity(batch_size as usize); + + let start = self.measurement.start(); + outputs.extend(inputs.iter_mut().map(&mut routine)); + let end = self.measurement.end(start); + self.value = self.measurement.add(&self.value, &end); + + black_box(outputs); + + iteration_counter += batch_size; + } + } + self.elapsed_time = time_start.elapsed(); + } + + // Benchmarks must actually call one of the iter methods. This causes benchmarks to fail loudly + // if they don't. + pub(crate) fn assert_iterated(&mut self) { + assert!( + self.iterated, + "Benchmark function must call Bencher::iter or related method." + ); + self.iterated = false; + } + + /// Convert this bencher into an AsyncBencher, which enables async/await support. + #[cfg(feature = "async")] + pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A, M> { + AsyncBencher { b: self, runner } + } +} + +/// Async/await variant of the Bencher struct. +#[cfg(feature = "async")] +pub struct AsyncBencher<'a, 'b, A: AsyncExecutor, M: Measurement = WallTime> { + b: &'b mut Bencher<'a, M>, + runner: A, +} +#[cfg(feature = "async")] +impl<'a, 'b, A: AsyncExecutor, M: Measurement> AsyncBencher<'a, 'b, A, M> { + /// Times a `routine` by executing it many times and timing the total elapsed time. + /// + /// Prefer this timing loop when `routine` returns a value that doesn't have a destructor. + /// + /// # Timing model + /// + /// Note that the `AsyncBencher` also times the time required to destroy the output of `routine()`. + /// Therefore prefer this timing loop when the runtime of `mem::drop(O)` is negligible compared + /// to the runtime of the `routine`. + /// + /// ```text + /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next) + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// use criterion::async_executor::FuturesExecutor; + /// + /// // The function to benchmark + /// async fn foo() { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// c.bench_function("iter", move |b| { + /// b.to_async(FuturesExecutor).iter(|| async { foo().await } ) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter(&mut self, mut routine: R) + where + R: FnMut() -> F, + F: Future, + { + self.__codspeed_root_frame__iter(routine) + } + + #[inline(never)] + #[allow(non_snake_case, missing_docs)] + pub fn __codspeed_root_frame__iter(&mut self, mut routine: R) + where + R: FnMut() -> F, + F: Future, + { + let AsyncBencher { b, runner } = self; + runner.block_on(async { + b.iterated = true; + let time_start = Instant::now(); + let start = b.measurement.start(); + for _ in 0..b.iters { + black_box(routine().await); + } + b.value = b.measurement.end(start); + b.elapsed_time = time_start.elapsed(); + }); + } + + /// Times a `routine` by executing it many times and relying on `routine` to measure its own execution time. + /// + /// Prefer this timing loop in cases where `routine` has to do its own measurements to + /// get accurate timing information (for example in multi-threaded scenarios where you spawn + /// and coordinate with multiple threads). + /// + /// # Timing model + /// Custom, the timing model is whatever is returned as the Duration from `routine`. + /// + /// # Example + /// ```rust + /// #[macro_use] extern crate criterion; + /// use criterion::*; + /// use criterion::black_box; + /// use criterion::async_executor::FuturesExecutor; + /// use std::time::Instant; + /// + /// async fn foo() { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// c.bench_function("iter", move |b| { + /// b.to_async(FuturesExecutor).iter_custom(|iters| { + /// async move { + /// let start = Instant::now(); + /// for _i in 0..iters { + /// black_box(foo().await); + /// } + /// start.elapsed() + /// } + /// }) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter_custom(&mut self, mut routine: R) + where + R: FnMut(u64) -> F, + F: Future, + { + self.__codspeed_root_frame__iter_custom(routine) + } + + #[inline(never)] + #[allow(non_snake_case, missing_docs)] + pub fn __codspeed_root_frame__iter_custom(&mut self, mut routine: R) + where + R: FnMut(u64) -> F, + F: Future, + { + let AsyncBencher { b, runner } = self; + runner.block_on(async { + b.iterated = true; + let time_start = Instant::now(); + b.value = routine(b.iters).await; + b.elapsed_time = time_start.elapsed(); + }) + } + + #[doc(hidden)] + pub fn iter_with_setup(&mut self, setup: S, routine: R) + where + S: FnMut() -> I, + R: FnMut(I) -> F, + F: Future, + { + self.iter_batched(setup, routine, BatchSize::PerIteration); + } + + /// Times a `routine` by collecting its output on each iteration. This avoids timing the + /// destructor of the value returned by `routine`. + /// + /// WARNING: This requires `O(iters * mem::size_of::())` of memory, and `iters` is not under the + /// control of the caller. If this causes out-of-memory errors, use `iter_batched` instead. + /// + /// # Timing model + /// + /// ``` text + /// elapsed = Instant::now + iters * (routine) + Iterator::collect::> + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// use criterion::async_executor::FuturesExecutor; + /// + /// async fn create_vector() -> Vec { + /// # vec![] + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// c.bench_function("with_drop", move |b| { + /// // This will avoid timing the Vec::drop. + /// b.to_async(FuturesExecutor).iter_with_large_drop(|| async { create_vector().await }) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + pub fn iter_with_large_drop(&mut self, mut routine: R) + where + R: FnMut() -> F, + F: Future, + { + self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput); + } + + #[doc(hidden)] + pub fn iter_with_large_setup(&mut self, setup: S, routine: R) + where + S: FnMut() -> I, + R: FnMut(I) -> F, + F: Future, + { + self.iter_batched(setup, routine, BatchSize::NumBatches(1)); + } + + /// Times a `routine` that requires some input by generating a batch of input, then timing the + /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for + /// details on choosing the batch size. Use this when the routine must consume its input. + /// + /// For example, use this loop to benchmark sorting algorithms, because they require unsorted + /// data on each iteration. + /// + /// # Timing model + /// + /// ```text + /// elapsed = (Instant::now * num_batches) + (iters * (routine + O::drop)) + Vec::extend + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// use criterion::async_executor::FuturesExecutor; + /// + /// fn create_scrambled_data() -> Vec { + /// # vec![] + /// // ... + /// } + /// + /// // The sorting algorithm to test + /// async fn sort(data: &mut [u64]) { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// let data = create_scrambled_data(); + /// + /// c.bench_function("with_setup", move |b| { + /// // This will avoid timing the to_vec call. + /// b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter_batched(&mut self, mut setup: S, mut routine: R, size: BatchSize) + where + S: FnMut() -> I, + R: FnMut(I) -> F, + F: Future, + { + self.__codspeed_root_frame__iter_batched(setup, routine, size); + } + + #[inline(never)] + #[allow(non_snake_case, missing_docs)] + pub fn __codspeed_root_frame__iter_batched( + &mut self, + mut setup: S, + mut routine: R, + size: BatchSize, + ) where + S: FnMut() -> I, + R: FnMut(I) -> F, + F: Future, + { + let AsyncBencher { b, runner } = self; + runner.block_on(async { + b.iterated = true; + let batch_size = size.iters_per_batch(b.iters); + assert!(batch_size != 0, "Batch size must not be zero."); + let time_start = Instant::now(); + b.value = b.measurement.zero(); + + if batch_size == 1 { + for _ in 0..b.iters { + let input = black_box(setup()); + + let start = b.measurement.start(); + let output = routine(input).await; + let end = b.measurement.end(start); + b.value = b.measurement.add(&b.value, &end); + + drop(black_box(output)); + } + } else { + let mut iteration_counter = 0; + + while iteration_counter < b.iters { + let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter); + + let inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); + let mut outputs = Vec::with_capacity(batch_size as usize); + + let start = b.measurement.start(); + // Can't use .extend here like the sync version does + for input in inputs { + outputs.push(routine(input).await); + } + let end = b.measurement.end(start); + b.value = b.measurement.add(&b.value, &end); + + black_box(outputs); + + iteration_counter += batch_size; + } + } + + b.elapsed_time = time_start.elapsed(); + }) + } + + /// Times a `routine` that requires some input by generating a batch of input, then timing the + /// iteration of the benchmark over the input. See [`BatchSize`](enum.BatchSize.html) for + /// details on choosing the batch size. Use this when the routine should accept the input by + /// mutable reference. + /// + /// For example, use this loop to benchmark sorting algorithms, because they require unsorted + /// data on each iteration. + /// + /// # Timing model + /// + /// ```text + /// elapsed = (Instant::now * num_batches) + (iters * routine) + Vec::extend + /// ``` + /// + /// # Example + /// + /// ```rust + /// #[macro_use] extern crate criterion; + /// + /// use criterion::*; + /// use criterion::async_executor::FuturesExecutor; + /// + /// fn create_scrambled_data() -> Vec { + /// # vec![] + /// // ... + /// } + /// + /// // The sorting algorithm to test + /// async fn sort(data: &mut [u64]) { + /// // ... + /// } + /// + /// fn bench(c: &mut Criterion) { + /// let data = create_scrambled_data(); + /// + /// c.bench_function("with_setup", move |b| { + /// // This will avoid timing the to_vec call. + /// b.iter_batched(|| data.clone(), |mut data| async move { sort(&mut data).await }, BatchSize::SmallInput) + /// }); + /// } + /// + /// criterion_group!(benches, bench); + /// criterion_main!(benches); + /// ``` + /// + #[inline(never)] + pub fn iter_batched_ref(&mut self, mut setup: S, mut routine: R, size: BatchSize) + where + S: FnMut() -> I, + R: FnMut(&mut I) -> F, + F: Future, + { + self.__codspeed_root_frame__iter_batched_ref(setup, routine, size) + } + + #[inline(never)] + #[allow(non_snake_case, missing_docs)] + pub fn __codspeed_root_frame__iter_batched_ref( + &mut self, + mut setup: S, + mut routine: R, + size: BatchSize, + ) where + S: FnMut() -> I, + R: FnMut(&mut I) -> F, + F: Future, + { + let AsyncBencher { b, runner } = self; + runner.block_on(async { + b.iterated = true; + let batch_size = size.iters_per_batch(b.iters); + assert!(batch_size != 0, "Batch size must not be zero."); + let time_start = Instant::now(); + b.value = b.measurement.zero(); + + if batch_size == 1 { + for _ in 0..b.iters { + let mut input = black_box(setup()); + + let start = b.measurement.start(); + let output = routine(&mut input).await; + let end = b.measurement.end(start); + b.value = b.measurement.add(&b.value, &end); + + drop(black_box(output)); + drop(black_box(input)); + } + } else { + let mut iteration_counter = 0; + + while iteration_counter < b.iters { + let batch_size = ::std::cmp::min(batch_size, b.iters - iteration_counter); + + let inputs = black_box((0..batch_size).map(|_| setup()).collect::>()); + let mut outputs = Vec::with_capacity(batch_size as usize); + + let start = b.measurement.start(); + // Can't use .extend here like the sync version does + for mut input in inputs { + outputs.push(routine(&mut input).await); + } + let end = b.measurement.end(start); + b.value = b.measurement.add(&b.value, &end); + + black_box(outputs); + + iteration_counter += batch_size; + } + } + b.elapsed_time = time_start.elapsed(); + }); + } +} diff --git a/crates/criterion_compat/criterion_fork/src/routine.rs b/crates/criterion_compat/criterion_fork/src/routine.rs index 88e4318b..f418b44f 100644 --- a/crates/criterion_compat/criterion_fork/src/routine.rs +++ b/crates/criterion_compat/criterion_fork/src/routine.rs @@ -191,8 +191,10 @@ pub(crate) trait Routine { .unwrap(); } - let m_elapsed = self.bench(measurement, &m_iters, parameter); - + let m_elapsed = { + let _guard = codspeed::fifo::BenchGuard::new_with_runner_fifo(); + self.bench(measurement, &m_iters, parameter) + }; let m_iters_f: Vec = m_iters.iter().map(|&x| x as f64).collect(); (