diff --git a/client/js-sys/benches/foo.rs b/client/js-sys/benches/foo.rs new file mode 100644 index 00000000..f6ac7cd9 --- /dev/null +++ b/client/js-sys/benches/foo.rs @@ -0,0 +1,17 @@ +use js_bindgen_test::{Criterion, bench}; +use js_sys::js_sys; + +js_bindgen::embed_js!(module = "foo", name = "bench", "(value) => value"); + +#[js_sys] +extern "js-sys" { + #[js_sys(js_embed = "bench")] + fn val(value: u128) -> u128; +} + +#[bench] +fn bench_foo(c: &mut Criterion) { + c.bench_function("i64,i64", |b| b.iter(|| { + assert_eq!(val(1), 1); + })); +} diff --git a/client/js-sys/src/numeric.rs b/client/js-sys/src/numeric.rs index 583436e5..1530fb58 100644 --- a/client/js-sys/src/numeric.rs +++ b/client/js-sys/src/numeric.rs @@ -182,13 +182,13 @@ unsafe impl Output for u128 { const ASM_TYPE: &str = ASM_PTR_TYPE; const ASM_CONV: Option = Some(OutputAsmConv { import: Some(const_concat!( - ".functype js_sys.numeric.128 (i32, i32, i32, i32, ", + ".functype js_sys.numeric.128 (i64, i64,", ASM_PTR_TYPE, ") -> ()" )), direct: false, conv: "call js_sys.numeric.128", - r#type: "i32, i32, i32, i32", + r#type: "i64,i64", }); const JS_CONV: Option = Some(OutputJsConv { embed: Some(("js_sys", "numeric.128.encode")), @@ -239,13 +239,13 @@ unsafe impl Output for i128 { const ASM_TYPE: &str = ASM_PTR_TYPE; const ASM_CONV: Option = Some(OutputAsmConv { import: Some(const_concat!( - ".functype js_sys.numeric.128 (i32, i32, i32, i32, ", + ".functype js_sys.numeric.128 (i64, i64,", ASM_PTR_TYPE, ") -> ()" )), direct: false, conv: "call js_sys.numeric.128", - r#type: "i32, i32, i32, i32", + r#type: "i64,i64", }); const JS_CONV: Option = Some(OutputJsConv { embed: Some(("js_sys", "numeric.128.encode")), @@ -270,35 +270,47 @@ const _: () = { js_bindgen::embed_js!( module = "js_sys", name = "numeric.128.encode", - "(value) => {{", - " const lo_lo = Number(value & 0xFFFFFFFFn)", - " const lo_hi = Number((value >> 32n) & 0xFFFFFFFFn)", - " const hi_lo = Number((value >> 64n) & 0xFFFFFFFFn)", - " const hi_hi = Number((value >> 96n) & 0xFFFFFFFFn)", - " return [lo_lo, lo_hi, hi_lo, hi_hi]", + "(value) => {{", + " const lo = BigInt.asIntN(64, value & 0xFFFFFFFFFFFFFFFFn)", + " const hi = BigInt.asIntN(64, value >> 64n)", + " return [lo, hi]", "}}", ); js_bindgen::unsafe_embed_asm!( ".globl js_sys.numeric.128", "js_sys.numeric.128:", - " .functype js_sys.numeric.128 (i32, i32, i32, i32, {}) -> ()", - " local.get 4", + " .functype js_sys.numeric.128 (i64, i64, {}) -> ()", + " local.get 2", " local.get 0", - " i32.store 0", - " local.get 4", - " local.get 1", - " i32.store 4", - " local.get 4", + " i64.store 0", " local.get 2", - " i32.store 8", - " local.get 4", - " local.get 3", - " i32.store 12", + " local.get 1", + " i64.store 8", " end_function", interpolate ASM_PTR_TYPE, ); +// js_bindgen::unsafe_embed_asm!( +// ".globl js_sys.numeric.128", +// "js_sys.numeric.128:", +// " .functype js_sys.numeric.128 (i32, i32, i32, i32, {}) -> ()", +// " local.get 4", +// " local.get 0", +// " i32.store 0", +// " local.get 4", +// " local.get 1", +// " i32.store 4", +// " local.get 4", +// " local.get 2", +// " i32.store 8", +// " local.get 4", +// " local.get 3", +// " i32.store 12", +// " end_function", +// interpolate ASM_PTR_TYPE, +// ); + #[cfg(target_arch = "wasm32")] delegate!(u32, *const T:); #[cfg(target_arch = "wasm64")] diff --git a/client/test/Cargo.toml b/client/test/Cargo.toml index 6c34b62a..b9c323f7 100644 --- a/client/test/Cargo.toml +++ b/client/test/Cargo.toml @@ -7,6 +7,17 @@ rust-version = "1.87" [dependencies] js-bindgen-test-macro = { workspace = true } js-sys = { workspace = true, features = ["macro"] } +web-sys = { workspace = true } + +async-trait = "0.1.89" +cast = "0.3" +libm = "0.2.11" +nu-ansi-term = { version = "0.50", default-features = false } +num-traits = { version = "0.2", default-features = false, features = ["libm"] } +once_cell = "1.21.4" +oorandom = "11.1.5" +serde = { version = "1.0", default-features = false, features = ["derive"] } +serde_json = { version = "1.0", default-features = false, features = ["alloc"] } [lints] workspace = true diff --git a/client/test/src/criterion/analysis.rs b/client/test/src/criterion/analysis.rs new file mode 100644 index 00000000..e86ef234 --- /dev/null +++ b/client/test/src/criterion/analysis.rs @@ -0,0 +1,157 @@ +use alloc::vec::Vec; + +use super::benchmark::BenchmarkConfig; +use super::estimate::{ + ConfidenceInterval, Distributions, Estimate, Estimates, PointEstimates, build_estimates, +}; +use super::measurement::Measurement; +use super::report::{BenchmarkId, Report}; +use super::routine::Routine; +use super::stats::bivariate::Data; +use super::stats::bivariate::regression::Slope; +use super::stats::univariate::Sample; +use super::stats::{Distribution, Tails}; +use super::{Criterion, SavedSample, baseline, compare}; + +// Common analysis procedure +pub(crate) async fn common( + id: &BenchmarkId, + routine: &mut dyn Routine, + config: &BenchmarkConfig, + criterion: &Criterion, +) { + criterion.report.benchmark_start(id); + + let (sampling_mode, iters, times); + let sample = routine + .sample(&criterion.measurement, id, config, criterion) + .await; + sampling_mode = sample.0; + iters = sample.1; + times = sample.2; + + criterion.report.analysis(id); + + if times.contains(&0.0) { + return; + } + + let avg_times = iters + .iter() + .zip(times.iter()) + .map(|(&iters, &elapsed)| elapsed / iters) + .collect::>(); + let avg_times = Sample::new(&avg_times); + let labeled_sample = super::stats::univariate::outliers::tukey::classify(avg_times); + + let data = Data::new(&iters, ×); + let (mut distributions, mut estimates) = estimates(avg_times, config); + if sampling_mode.is_linear() { + let (distribution, slope) = regression(&data, config); + + estimates.slope = Some(slope); + distributions.slope = Some(distribution); + } + + let comparison = compare::common(id, avg_times, config).map( + |(t_value, t_distribution, relative_estimates, ..)| { + let p_value = t_distribution.p_value(t_value, Tails::Two); + super::report::ComparisonData { + p_value, + relative_estimates, + significance_threshold: config.significance_level, + noise_threshold: config.noise_threshold, + } + }, + ); + + let measurement_data = super::report::MeasurementData { + avg_times: labeled_sample, + absolute_estimates: estimates.clone(), + comparison, + }; + + criterion + .report + .measurement_complete(id, &measurement_data, criterion.measurement.formatter()); + + baseline::write( + id.desc(), + baseline::BenchmarkBaseline { + file: criterion.location.as_ref().map(|l| l.file.clone()), + module_path: criterion.location.as_ref().map(|l| l.module_path.clone()), + iters: data.x().as_ref().to_vec(), + times: data.y().as_ref().to_vec(), + sample: SavedSample { + sampling_mode, + iters: data.x().as_ref().to_vec(), + times: data.y().as_ref().to_vec(), + }, + estimates, + }, + ); +} + +// Performs a simple linear regression on the sample +fn regression( + data: &Data<'_, f64, f64>, + config: &BenchmarkConfig, +) -> (Distribution, Estimate) { + let cl = config.confidence_level; + + let distribution = data.bootstrap(config.nresamples, |d| (Slope::fit(&d).0,)).0; + + let point = Slope::fit(data); + let (lb, ub) = distribution.confidence_interval(config.confidence_level); + let se = distribution.std_dev(None); + + ( + distribution, + Estimate { + confidence_interval: ConfidenceInterval { + confidence_level: cl, + lower_bound: lb, + upper_bound: ub, + }, + point_estimate: point.0, + standard_error: se, + }, + ) +} + +// Estimates the statistics of the population from the sample +fn estimates(avg_times: &Sample, config: &BenchmarkConfig) -> (Distributions, Estimates) { + fn stats(sample: &Sample) -> (f64, f64, f64, f64) { + let mean = sample.mean(); + let std_dev = sample.std_dev(Some(mean)); + let median = sample.percentiles().median(); + let mad = sample.median_abs_dev(Some(median)); + + (mean, std_dev, median, mad) + } + + let cl = config.confidence_level; + let nresamples = config.nresamples; + + let (mean, std_dev, median, mad) = stats(avg_times); + let points = PointEstimates { + mean, + median, + std_dev, + median_abs_dev: mad, + }; + + let (dist_mean, dist_stddev, dist_median, dist_mad) = avg_times.bootstrap(nresamples, stats); + + let distributions = Distributions { + mean: dist_mean, + slope: None, + median: dist_median, + median_abs_dev: dist_mad, + std_dev: dist_stddev, + }; + + let estimates = build_estimates(&distributions, &points, cl); + + (distributions, estimates) +} diff --git a/client/test/src/criterion/baseline.rs b/client/test/src/criterion/baseline.rs new file mode 100644 index 00000000..b5292700 --- /dev/null +++ b/client/test/src/criterion/baseline.rs @@ -0,0 +1,61 @@ +//! Record previous benchmark data + +use alloc::collections::BTreeMap; +use alloc::string::String; +use alloc::vec::Vec; +use core::cell::RefCell; + +use serde::{Deserialize, Serialize}; + +use super::SavedSample; +use super::estimate::Estimates; +use crate::LazyCell; + +#[cfg_attr(target_feature = "atomics", thread_local)] +static BASELINE: LazyCell>> = + LazyCell::new(|| RefCell::new(BTreeMap::new())); + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub(crate) struct BenchmarkBaseline { + pub(crate) file: Option, + pub(crate) module_path: Option, + pub(crate) iters: Vec, + pub(crate) times: Vec, + pub(crate) sample: SavedSample, + pub(crate) estimates: Estimates, +} + +/// Write the corresponding benchmark ID and corresponding data into the table. +pub(crate) fn write(id: &str, baseline: BenchmarkBaseline) { + BASELINE.borrow_mut().insert(id.into(), baseline); +} + +/// Read the data corresponding to the benchmark ID from the table. +pub(crate) fn read(id: &str) -> Option { + BASELINE.borrow().get(id).cloned() +} + +// /// Used to write previous benchmark data before the benchmark, for later +// /// comparison. +// #[wasm_bindgen] +// pub fn __wbgbench_import(baseline: Vec) { +// match serde_json::from_slice(&baseline) { +// Ok(prev) => { +// *BASELINE.borrow_mut() = prev; +// } +// Err(e) => { +// console_log!("Failed to import previous benchmark {e:?}"); +// } +// } +// } +// +// /// Used to read benchmark data, and then the runner stores it on the local +// /// disk. +// #[wasm_bindgen] +// pub fn __wbgbench_dump() -> Option> { +// let baseline = BASELINE.borrow(); +// if baseline.is_empty() { +// return None; +// } +// serde_json::to_vec(&*baseline).ok() +// } diff --git a/client/test/src/criterion/bencher.rs b/client/test/src/criterion/bencher.rs new file mode 100644 index 00000000..57977cb1 --- /dev/null +++ b/client/test/src/criterion/bencher.rs @@ -0,0 +1,190 @@ +use core::future::Future; +use core::hint::black_box; +use core::time::Duration; + +use super::measurement::Measurement; +use crate::Instant; + +// ================================== MAINTENANCE NOTE +// ============================================= Any changes made to either +// Bencher or AsyncBencher will have to be replicated to the other! +// ================================== MAINTENANCE NOTE +// ============================================= + +/// Timer struct used to iterate a benchmarked function and measure the runtime. +/// +/// This struct provides different timing loops as methods. Each timing loop +/// provides a different way to time a routine and each has advantages and +/// disadvantages. +/// +/// * If you want to do the iteration and measurement yourself (eg. passing the +/// iteration count to a separate process), use [`iter_custom`]. +/// * If your routine requires no per-iteration setup and returns a value with +/// an expensive `drop` method, use [`iter_with_large_drop`]. +/// * If your routine requires some per-iteration setup that shouldn't be timed, +/// use [`iter_batched`] or [`iter_batched_ref`]. See [`BatchSize`] for a +/// discussion of batch sizes. If the setup value implements `Drop` and you +/// don't want to include the `drop` time in the measurement, use +/// [`iter_batched_ref`], otherwise use [`iter_batched`]. These methods are +/// also suitable for benchmarking routines which return a value with an +/// expensive `drop` method, but are more complex than +/// [`iter_with_large_drop`]. +/// * Otherwise, use [`iter`]. +/// +/// [`iter`]: Bencher::iter +/// [`iter_custom`]: Bencher::iter_custom +/// [`iter_future`]: Bencher::iter_future +/// [`iter_custom_future`]: Bencher::iter_custom_future +pub struct Bencher<'a, M: Measurement> { + pub(crate) iterated: bool, // Have we iterated this benchmark? + pub(crate) iters: u64, // Number of times to iterate this benchmark + pub(crate) value: Duration, // The measured value + pub(crate) measurement: &'a M, // Reference to the measurement object + pub(crate) elapsed_time: Duration, /* How much time did it take to perform the iteration? + * Used for the warmup period. */ +} + +impl Bencher<'_, M> { + /// Times a `routine` by executing it many times and timing the total + /// elapsed time. + /// + /// Prefer this timing loop when `routine` returns a value that doesn't have + /// a destructor. + /// + /// # Timing model + /// + /// Note that the `Bencher` also times the time required to destroy the + /// output of `routine()`. Therefore prefer this timing loop when the + /// runtime of `mem::drop(O)` is negligible compared to the runtime of the + /// `routine`. + /// + /// ```text + /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next) + /// ``` + #[inline(never)] + pub fn iter(&mut self, mut routine: R) + where + R: FnMut() -> O, + { + self.iterated = true; + let start = self.measurement.start(); + for _ in 0..self.iters { + black_box(routine()); + } + let end = self.measurement.end(start); + self.value = end; + self.elapsed_time = end; + } + + /// Times a `routine` by executing it many times and relying on `routine` to + /// measure its own execution time. + /// + /// # Timing model + /// Custom, the timing model is whatever is returned as the [`Duration`] + /// from `routine`. + /// + /// # Example + /// ```rust + /// use wasm_bindgen_test::{Criterion, Instant, wasm_bindgen_bench}; + /// + /// fn foo() { + /// // ... + /// } + /// + /// #[wasm_bindgen_bench] + /// fn bench(c: &mut Criterion) { + /// c.bench_function("iter", move |b| { + /// b.iter_custom(|iters| { + /// let start = Instant::now(); + /// for _i in 0..iters { + /// std::hint::black_box(foo()); + /// } + /// start.elapsed() + /// }) + /// }); + /// } + /// ``` + #[inline(never)] + pub fn iter_custom(&mut self, mut routine: R) + where + R: FnMut(u64) -> Duration, + { + self.iterated = true; + let time_start = Instant::now(); + self.value = routine(self.iters); + self.elapsed_time = time_start.elapsed(); + } + + /// Times a `routine` by executing it many times and timing the total + /// elapsed time. + /// + /// Prefer this timing loop when `routine` returns a value that doesn't have + /// a destructor. + /// + /// # Timing model + /// + /// Note that the `Bencher` also times the time required to destroy the + /// output of `routine()`. Therefore prefer this timing loop when the + /// runtime of `mem::drop(O)` is negligible compared to the runtime of the + /// `routine`. + /// + /// ```text + /// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next) + /// ``` + #[inline(never)] + pub async fn iter_future(&mut self, mut routine: R) + where + R: FnMut() -> Fut, + Fut: Future, + { + self.iterated = true; + let start = self.measurement.start(); + for _ in 0..self.iters { + black_box(routine().await); + } + let end = self.measurement.end(start); + self.value = end; + self.elapsed_time = end; + } + + /// Times a `routine` by executing it many times and relying on `routine` to + /// measure its own execution time. + /// + /// # Timing model + /// Custom, the timing model is whatever is returned as the [`Duration`] + /// from `routine`. + /// + /// # Example + /// ```rust + /// use wasm_bindgen_test::{Criterion, Instant, wasm_bindgen_bench}; + /// + /// async fn foo() { + /// // ... + /// } + /// + /// #[wasm_bindgen_bench] + /// async fn bench(c: &mut Criterion) { + /// c.bench_async_function("iter", move |b| { + /// Box::pin(b.iter_custom_future(async |iters| { + /// let start = Instant::now(); + /// for _i in 0..iters { + /// std::hint::black_box(foo().await); + /// } + /// start.elapsed() + /// })) + /// }) + /// .await; + /// } + /// ``` + #[inline(never)] + pub async fn iter_custom_future(&mut self, mut routine: R) + where + R: FnMut(u64) -> Fut, + Fut: Future, + { + self.iterated = true; + let time_start = Instant::now(); + self.value = routine(self.iters).await; + self.elapsed_time = time_start.elapsed(); + } +} diff --git a/client/test/src/criterion/benchmark.rs b/client/test/src/criterion/benchmark.rs new file mode 100644 index 00000000..d81a6947 --- /dev/null +++ b/client/test/src/criterion/benchmark.rs @@ -0,0 +1,17 @@ +use core::time::Duration; + +use super::SamplingMode; + +// TODO: Move the benchmark config stuff to a separate module for easier use. + +/// Struct containing all of the configuration options for a benchmark. +pub struct BenchmarkConfig { + pub confidence_level: f64, + pub measurement_time: Duration, + pub noise_threshold: f64, + pub nresamples: usize, + pub sample_size: usize, + pub significance_level: f64, + pub warm_up_time: Duration, + pub sampling_mode: SamplingMode, +} diff --git a/client/test/src/criterion/compare.rs b/client/test/src/criterion/compare.rs new file mode 100644 index 00000000..93569051 --- /dev/null +++ b/client/test/src/criterion/compare.rs @@ -0,0 +1,111 @@ +use alloc::vec::Vec; + +use super::SavedSample; +use super::benchmark::BenchmarkConfig; +use super::estimate::{ + ChangeDistributions, ChangeEstimates, ChangePointEstimates, Estimates, build_change_estimates, +}; +use super::report::BenchmarkId; +use super::stats::Distribution; +use super::stats::univariate::{self, Sample, mixed}; + +type ComparisonResult = ( + f64, + Distribution, + ChangeEstimates, + ChangeDistributions, + Vec, + Vec, + Vec, + Estimates, +); + +// Common comparison procedure +pub(crate) fn common( + id: &BenchmarkId, + avg_times: &Sample, + config: &BenchmarkConfig, +) -> Option { + let prev = super::baseline::read(id.desc())?; + let SavedSample { iters, times, .. } = prev.sample; + let base_estimates: Estimates = prev.estimates; + + let base_avg_times: Vec = iters + .iter() + .zip(times.iter()) + .map(|(iters, elapsed)| elapsed / iters) + .collect(); + let base_avg_time_sample = Sample::new(&base_avg_times); + + let (t_statistic, t_distribution) = t_test(avg_times, base_avg_time_sample, config); + + let (estimates, relative_distributions) = estimates(avg_times, base_avg_time_sample, config); + Some(( + t_statistic, + t_distribution, + estimates, + relative_distributions, + iters, + times, + base_avg_times.clone(), + base_estimates, + )) +} + +// Performs a two sample t-test +fn t_test( + avg_times: &Sample, + base_avg_times: &Sample, + config: &BenchmarkConfig, +) -> (f64, Distribution) { + let nresamples = config.nresamples; + + let t_statistic = avg_times.t(base_avg_times); + let t_distribution = + mixed::bootstrap(avg_times, base_avg_times, nresamples, |a, b| (a.t(b),)).0; + + // HACK: Filter out non-finite numbers, which can happen sometimes when sample + // size is very small. Downstream code doesn't like non-finite values here. + let t_distribution = Distribution::from( + t_distribution + .iter() + .filter(|a| a.is_finite()) + .copied() + .collect::>() + .into_boxed_slice(), + ); + + (t_statistic, t_distribution) +} + +// Estimates the relative change in the statistics of the population +fn estimates( + avg_times: &Sample, + base_avg_times: &Sample, + config: &BenchmarkConfig, +) -> (ChangeEstimates, ChangeDistributions) { + fn stats(a: &Sample, b: &Sample) -> (f64, f64) { + ( + a.mean() / b.mean() - 1., + a.percentiles().median() / b.percentiles().median() - 1., + ) + } + + let cl = config.confidence_level; + let nresamples = config.nresamples; + + let (dist_mean, dist_median) = + univariate::bootstrap(avg_times, base_avg_times, nresamples, stats); + + let distributions = ChangeDistributions { + mean: dist_mean, + median: dist_median, + }; + + let (mean, median) = stats(avg_times, base_avg_times); + let points = ChangePointEstimates { mean, median }; + + let estimates = build_change_estimates(&distributions, &points, cl); + + (estimates, distributions) +} diff --git a/client/test/src/criterion/estimate.rs b/client/test/src/criterion/estimate.rs new file mode 100644 index 00000000..d1b12e34 --- /dev/null +++ b/client/test/src/criterion/estimate.rs @@ -0,0 +1,118 @@ +use serde::{Deserialize, Serialize}; + +use super::stats::Distribution; + +#[derive(Clone, PartialEq, Deserialize, Serialize, Debug)] +pub struct ConfidenceInterval { + pub confidence_level: f64, + pub lower_bound: f64, + pub upper_bound: f64, +} + +#[derive(Clone, PartialEq, Deserialize, Serialize, Debug)] +pub struct Estimate { + /// The confidence interval for this estimate + pub confidence_interval: ConfidenceInterval, + /// The value of this estimate + pub point_estimate: f64, + /// The standard error of this estimate + pub standard_error: f64, +} + +pub fn build_estimates( + distributions: &Distributions, + points: &PointEstimates, + cl: f64, +) -> Estimates { + let to_estimate = |point_estimate, distribution: &Distribution| { + let (lb, ub) = distribution.confidence_interval(cl); + + Estimate { + confidence_interval: ConfidenceInterval { + confidence_level: cl, + lower_bound: lb, + upper_bound: ub, + }, + point_estimate, + standard_error: distribution.std_dev(None), + } + }; + + Estimates { + mean: to_estimate(points.mean, &distributions.mean), + median: to_estimate(points.median, &distributions.median), + median_abs_dev: to_estimate(points.median_abs_dev, &distributions.median_abs_dev), + slope: None, + std_dev: to_estimate(points.std_dev, &distributions.std_dev), + } +} + +pub fn build_change_estimates( + distributions: &ChangeDistributions, + points: &ChangePointEstimates, + cl: f64, +) -> ChangeEstimates { + let to_estimate = |point_estimate, distribution: &Distribution| { + let (lb, ub) = distribution.confidence_interval(cl); + + Estimate { + confidence_interval: ConfidenceInterval { + confidence_level: cl, + lower_bound: lb, + upper_bound: ub, + }, + point_estimate, + standard_error: distribution.std_dev(None), + } + }; + + ChangeEstimates { + mean: to_estimate(points.mean, &distributions.mean), + median: to_estimate(points.median, &distributions.median), + } +} + +pub struct PointEstimates { + pub mean: f64, + pub median: f64, + pub median_abs_dev: f64, + pub std_dev: f64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Estimates { + pub mean: Estimate, + pub median: Estimate, + pub median_abs_dev: Estimate, + pub slope: Option, + pub std_dev: Estimate, +} +impl Estimates { + pub fn typical(&self) -> &Estimate { + self.slope.as_ref().unwrap_or(&self.mean) + } +} + +pub struct Distributions { + pub mean: Distribution, + pub median: Distribution, + pub median_abs_dev: Distribution, + pub slope: Option>, + pub std_dev: Distribution, +} + +pub struct ChangePointEstimates { + pub mean: f64, + pub median: f64, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct ChangeEstimates { + pub mean: Estimate, + pub median: Estimate, +} + +pub struct ChangeDistributions { + pub mean: Distribution, + pub median: Distribution, +} diff --git a/client/test/src/criterion/format.rs b/client/test/src/criterion/format.rs new file mode 100644 index 00000000..abd06ad1 --- /dev/null +++ b/client/test/src/criterion/format.rs @@ -0,0 +1,79 @@ +use alloc::format; +use alloc::string::String; + +use libm::{fabs, pow}; + +pub fn change(pct: f64, signed: bool) -> String { + if signed { + format!("{:>+6}%", signed_short(pct * 1e2)) + } else { + format!("{:>6}%", short(pct * 1e2)) + } +} + +pub fn time(ns: f64) -> String { + if ns < 1.0 { + format!("{:>6} ps", short(ns * 1e3)) + } else if ns < pow(10f64, 3f64) { + format!("{:>6} ns", short(ns)) + } else if ns < pow(10f64, 6f64) { + format!("{:>6} µs", short(ns / 1e3)) + } else if ns < pow(10f64, 9f64) { + format!("{:>6} ms", short(ns / 1e6)) + } else { + format!("{:>6} s", short(ns / 1e9)) + } +} + +pub fn short(n: f64) -> String { + if n < 10.0 { + format!("{n:.4}") + } else if n < 100.0 { + format!("{n:.3}") + } else if n < 1000.0 { + format!("{n:.2}") + } else if n < 10000.0 { + format!("{n:.1}") + } else { + format!("{n:.0}") + } +} + +fn signed_short(n: f64) -> String { + let n_abs = fabs(n); + + let sign = if n >= 0.0 { '+' } else { '\u{2212}' }; + if n_abs < 10.0 { + format!("{sign}{n_abs:.4}") + } else if n_abs < 100.0 { + format!("{sign}{n_abs:.3}") + } else if n_abs < 1000.0 { + format!("{sign}{n_abs:.2}") + } else if n_abs < 10000.0 { + format!("{sign}{n_abs:.1}") + } else { + format!("{sign}{n_abs:.0}") + } +} + +pub fn iter_count(iterations: u64) -> String { + if iterations < 10_000 { + format!("{iterations} iterations") + } else if iterations < 1_000_000 { + format!("{:.0}k iterations", (iterations as f64) / 1000.0) + } else if iterations < 10_000_000 { + format!("{:.1}M iterations", (iterations as f64) / (1000.0 * 1000.0)) + } else if iterations < 1_000_000_000 { + format!("{:.0}M iterations", (iterations as f64) / (1000.0 * 1000.0)) + } else if iterations < 10_000_000_000 { + format!( + "{:.1}B iterations", + (iterations as f64) / (1000.0 * 1000.0 * 1000.0) + ) + } else { + format!( + "{:.0}B iterations", + (iterations as f64) / (1000.0 * 1000.0 * 1000.0) + ) + } +} diff --git a/client/test/src/criterion/measurement.rs b/client/test/src/criterion/measurement.rs new file mode 100644 index 00000000..7362b82c --- /dev/null +++ b/client/test/src/criterion/measurement.rs @@ -0,0 +1,275 @@ +//! This module defines a set of traits that can be used to plug different +//! measurements (eg. Unix's Processor Time, CPU or GPU performance counters, +//! etc.) into Criterion.rs. It also includes the [`WallTime`] struct which +//! defines the default wall-clock time measurement. +use alloc::format; +use alloc::string::String; +use core::time::Duration; + +use libm::pow; + +use super::Throughput; +use super::format::short; +use crate::Instant; + +/// Trait providing functions to format measured values to string so that they +/// can be displayed on the command line or in the reports. The functions of +/// this trait take measured values in f64 form; implementors can assume that +/// the values are of the same scale as those produced by the associated +/// [`Measurement`] (eg. if your measurement produces values in nanoseconds, the +/// values passed to the formatter will be in nanoseconds). +/// +/// Implementors are encouraged to format the values in a way that is intuitive +/// for humans and uses the SI prefix system. For example, the format used by +/// [`WallTime`] can display the value in units ranging from picoseconds to +/// seconds depending on the magnitude of the elapsed time in nanoseconds. +pub trait ValueFormatter { + /// Format the value (with appropriate unit) and return it as a string. + fn format_value(&self, value: f64) -> String { + let mut values = [value]; + let unit = self.scale_values(value, &mut values); + format!("{:>6} {}", short(values[0]), unit) + } + + /// Format the value as a throughput measurement. The value represents the + /// measurement value; the implementor will have to calculate bytes per + /// second, iterations per cycle, etc. + fn format_throughput(&self, throughput: &Throughput, value: f64) -> String { + let mut values = [value]; + let unit = self.scale_throughputs(value, throughput, &mut values); + format!("{:>6} {}", short(values[0]), unit) + } + + /// Scale the given values to some appropriate unit and return the unit + /// string. + /// + /// The given typical value should be used to choose the unit. This function + /// may be called multiple times with different datasets; the typical value + /// will remain the same to ensure that the units remain consistent within + /// a graph. The typical value will not be NaN. Values will not contain NaN + /// as input, and the transformed values must not contain NaN. + fn scale_values(&self, typical_value: f64, values: &mut [f64]) -> &'static str; + + /// Convert the given measured values into throughput numbers based on the + /// given throughput value, scale them to some appropriate unit, and return + /// the unit string. + /// + /// The given typical value should be used to choose the unit. This function + /// may be called multiple times with different datasets; the typical value + /// will remain the same to ensure that the units remain consistent within + /// a graph. The typical value will not be NaN. Values will not contain NaN + /// as input, and the transformed values must not contain NaN. + fn scale_throughputs( + &self, + typical_value: f64, + throughput: &Throughput, + values: &mut [f64], + ) -> &'static str; + + /// Scale the values and return a unit string designed for machines. + /// + /// For example, this is used for the CSV file output. Implementations + /// should modify the given values slice to apply the desired scaling (if + /// any) and return a string representing the unit the modified values are + /// in. + fn scale_for_machines(&self, values: &mut [f64]) -> &'static str; +} + +/// Trait for all types which define something Criterion.rs can measure. The +/// only measurement currently provided is [`WallTime`], but third party crates +/// or benchmarks may define more. +/// +/// This trait defines two core methods, `start` and `end`. `start` is called at +/// the beginning of a measurement to produce some intermediate value (for +/// example, the wall-clock time at the start of that set of iterations) and +/// `end` is called at the end of the measurement with the value returned by +/// `start`. +pub trait Measurement { + /// This type represents an intermediate value for the measurements. It will + /// be produced by the start function and passed to the end function. An + /// example might be the wall-clock time as of the `start` call. + type Intermediate; + + /// Criterion.rs will call this before iterating the benchmark. + fn start(&self) -> Self::Intermediate; + + /// Criterion.rs will call this after iterating the benchmark to get the + /// measured value. + fn end(&self, i: Self::Intermediate) -> Duration; + + /// Combine two values. Criterion.rs sometimes needs to perform measurements + /// in multiple batches of iterations, so the value from one batch must be + /// added to the sum of the previous batches. + fn add(&self, v1: &Duration, v2: &Duration) -> Duration; + + /// Return a "zero" value for the Value type which can be added to another + /// value. + fn zero(&self) -> Duration; + + /// Converts the measured value to f64 so that it can be used in statistical + /// analysis. + fn to_f64(&self, value: &Duration) -> f64; + + /// Return a trait-object reference to the value formatter for this + /// measurement. + fn formatter(&self) -> &dyn ValueFormatter; +} + +/// Default Formatter +pub(crate) struct DurationFormatter; + +impl DurationFormatter { + fn bytes_per_second(bytes: f64, typical: f64, values: &mut [f64]) -> &'static str { + let bytes_per_second = bytes * (1e9 / typical); + let (denominator, unit) = if bytes_per_second < 1024.0 { + (1.0, " B/s") + } else if bytes_per_second < 1024.0 * 1024.0 { + (1024.0, "KiB/s") + } else if bytes_per_second < 1024.0 * 1024.0 * 1024.0 { + (1024.0 * 1024.0, "MiB/s") + } else { + (1024.0 * 1024.0 * 1024.0, "GiB/s") + }; + + for val in values { + let bytes_per_second = bytes * (1e9 / *val); + *val = bytes_per_second / denominator; + } + + unit + } + + fn bytes_per_second_decimal(bytes: f64, typical: f64, values: &mut [f64]) -> &'static str { + let bytes_per_second = bytes * (1e9 / typical); + let (denominator, unit) = if bytes_per_second < 1000.0 { + (1.0, " B/s") + } else if bytes_per_second < 1000.0 * 1000.0 { + (1000.0, "KB/s") + } else if bytes_per_second < 1000.0 * 1000.0 * 1000.0 { + (1000.0 * 1000.0, "MB/s") + } else { + (1000.0 * 1000.0 * 1000.0, "GB/s") + }; + + for val in values { + let bytes_per_second = bytes * (1e9 / *val); + *val = bytes_per_second / denominator; + } + + unit + } + + fn elements_per_second(elems: f64, typical: f64, values: &mut [f64]) -> &'static str { + let elems_per_second = elems * (1e9 / typical); + let (denominator, unit) = if elems_per_second < 1000.0 { + (1.0, " elem/s") + } else if elems_per_second < 1000.0 * 1000.0 { + (1000.0, "Kelem/s") + } else if elems_per_second < 1000.0 * 1000.0 * 1000.0 { + (1000.0 * 1000.0, "Melem/s") + } else { + (1000.0 * 1000.0 * 1000.0, "Gelem/s") + }; + + for val in values { + let elems_per_second = elems * (1e9 / *val); + *val = elems_per_second / denominator; + } + + unit + } + + fn bits_per_second(bits: f64, typical: f64, values: &mut [f64]) -> &'static str { + let bits_per_second = bits * (1e9 / typical); + let (denominator, unit) = if bits_per_second < 1000.0 { + (1.0, " b/s") + } else if bits_per_second < 1000.0 * 1000.0 { + (1000.0, "Kb/s") + } else if bits_per_second < 1000.0 * 1000.0 * 1000.0 { + (1000.0 * 1000.0, "Mb/s") + } else { + (1000.0 * 1000.0 * 1000.0, "Gb/s") + }; + + for val in values { + let bits_per_second = bits * (1e9 / *val); + *val = bits_per_second / denominator; + } + + unit + } +} +impl ValueFormatter for DurationFormatter { + fn scale_throughputs( + &self, + typical: f64, + throughput: &Throughput, + values: &mut [f64], + ) -> &'static str { + match *throughput { + Throughput::Bytes(bytes) => Self::bytes_per_second(bytes as f64, typical, values), + Throughput::BytesDecimal(bytes) => { + Self::bytes_per_second_decimal(bytes as f64, typical, values) + } + Throughput::Elements(elems) => Self::elements_per_second(elems as f64, typical, values), + Throughput::Bits(bits) => Self::bits_per_second(bits as f64, typical, values), + } + } + + fn scale_values(&self, ns: f64, values: &mut [f64]) -> &'static str { + let (factor, unit) = if ns < pow(10f64, 0f64) { + (pow(10f64, 3f64), "ps") + } else if ns < pow(10f64, 3f64) { + (pow(10f64, 0f64), "ns") + } else if ns < pow(10f64, 6f64) { + (pow(10f64, -3f64), "µs") + } else if ns < pow(10f64, 9f64) { + (pow(10f64, -6f64), "ms") + } else { + (pow(10f64, -9f64), "s") + }; + + for val in values { + *val *= factor; + } + + unit + } + + fn scale_for_machines(&self, _values: &mut [f64]) -> &'static str { + // no scaling is needed + "ns" + } +} + +/// `WallTime` is the default measurement in Criterion.rs. It measures the +/// elapsed time from the beginning of a series of iterations to the end. +pub struct WallTime; + +impl Measurement for WallTime { + type Intermediate = Instant; + + fn start(&self) -> Self::Intermediate { + Instant::now() + } + + fn end(&self, i: Self::Intermediate) -> Duration { + i.elapsed() + } + + fn add(&self, v1: &Duration, v2: &Duration) -> Duration { + *v1 + *v2 + } + + fn zero(&self) -> Duration { + Duration::from_secs(0) + } + + fn to_f64(&self, val: &Duration) -> f64 { + val.as_nanos() as f64 + } + + fn formatter(&self) -> &dyn ValueFormatter { + &DurationFormatter + } +} diff --git a/client/test/src/criterion/mod.rs b/client/test/src/criterion/mod.rs new file mode 100644 index 00000000..191085d3 --- /dev/null +++ b/client/test/src/criterion/mod.rs @@ -0,0 +1,560 @@ +//! A statistics-driven micro-benchmarking library written in Rust. +//! +//! This crate is a microbenchmarking library which aims to provide strong +//! statistical confidence in detecting and estimating the size of performance +//! improvements and regressions, while also being easy to use. +//! +//! See +//! [the user guide](https://bheisler.github.io/criterion.rs/book/index.html) +//! for examples as well as details on the measurement and analysis process, +//! and the output. +//! +//! ## Features: +//! * Collects detailed statistics, providing strong confidence that changes to +//! performance are real, not measurement noise. +//! * Produces detailed charts, providing thorough understanding of your code's +//! performance behavior. + +#![allow( + clippy::cast_possible_truncation, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::iter_not_returning_iterator, + clippy::similar_names, + clippy::struct_field_names, + clippy::transmute_ptr_to_ptr, + clippy::undocumented_unsafe_blocks, + reason = "checked" +)] + +// Needs to be declared before other modules +// in order to be usable there. +mod analysis; +mod baseline; +mod bencher; +mod benchmark; +mod compare; +mod estimate; +mod format; +mod measurement; +mod report; +mod routine; +mod stats; + +use alloc::boxed::Box; +use alloc::string::String; +use alloc::vec; +use alloc::vec::Vec; +use core::future::Future; +use core::pin::Pin; +use core::task::{Context, Poll, Waker}; +use core::time::Duration; + +pub use bencher::Bencher; +use benchmark::BenchmarkConfig; +use libm::{ceil, sqrt}; +pub use measurement::Measurement; +use measurement::WallTime; +use report::WasmReport; +use serde::{Deserialize, Serialize}; + +use crate::console_error; + +/// The benchmark manager +/// +/// `Criterion` lets you configure and execute benchmarks +/// +/// Each benchmark consists of four phases: +/// +/// - **Warm-up**: The routine is repeatedly executed, to let the +/// CPU/OS/JIT/interpreter adapt to the new load +/// - **Measurement**: The routine is repeatedly executed, and timing +/// information is collected into a sample +/// - **Analysis**: The sample is analyzed and distilled into meaningful +/// statistics that get reported to stdout, stored in files, and plotted +/// - **Comparison**: The current sample is compared with the sample obtained in +/// the previous benchmark. +pub struct Criterion { + config: BenchmarkConfig, + report: WasmReport, + measurement: M, + location: Option, +} + +pub(crate) struct Location { + file: String, + module_path: String, +} + +impl Default for Criterion { + /// Creates a benchmark manager with the following default settings: + /// + /// - Sample size: 100 measurements + /// - Warm-up time: 3 s + /// - Measurement time: 5 s + /// - Bootstrap size: 100 000 resamples + /// - Noise threshold: 0.01 (1%) + /// - Confidence level: 0.95 + /// - Significance level: 0.05 + fn default() -> Self { + Self { + config: BenchmarkConfig { + confidence_level: 0.95, + measurement_time: Duration::from_secs(5), + noise_threshold: 0.01, + nresamples: 100_000, + sample_size: 100, + significance_level: 0.05, + warm_up_time: Duration::from_secs(3), + sampling_mode: SamplingMode::Auto, + }, + report: WasmReport, + measurement: WallTime, + location: None, + } + } +} + +impl Criterion { + /// Changes the measurement for the benchmarks run with this runner. See the + /// [`Measurement`] trait for more details + pub fn with_measurement(self, m: M2) -> Criterion { + // Can't use struct update syntax here because they're technically different + // types. + Criterion { + config: self.config, + report: self.report, + measurement: m, + location: self.location, + } + } + + /// Configure file and module paths for use with codspeed. + #[must_use] + pub fn with_location(self, file: &str, module_path: &str) -> Self { + Self { + location: Some(Location { + file: file.into(), + module_path: module_path.into(), + }), + ..self + } + } + + /// Changes the default size of the sample for benchmarks run with this + /// runner. + /// + /// A bigger sample should yield more accurate results if paired with a + /// sufficiently large measurement time. + /// + /// Sample size must be at least 10. + /// + /// # Panics + /// + /// Panics if n < 10 + #[must_use] + pub fn sample_size(mut self, n: usize) -> Self { + assert!(n >= 10); + + self.config.sample_size = n; + self + } + + /// Changes the default warm up time for benchmarks run with this runner. + /// + /// # Panics + /// + /// Panics if the input duration is zero + #[must_use] + pub fn warm_up_time(mut self, dur: Duration) -> Self { + assert!(dur.as_nanos() > 0); + + self.config.warm_up_time = dur; + self + } + + /// + /// With a longer time, the measurement will become more resilient to + /// transitory peak loads caused by external programs + /// + /// **Note**: If the measurement time is too "low", Criterion will + /// automatically increase it + /// + /// # Panics + /// + /// Panics if the input duration in zero + /// Changes the default measurement time for benchmarks run with this + /// runner. + #[must_use] + pub fn measurement_time(mut self, dur: Duration) -> Self { + assert!(dur.as_nanos() > 0); + + self.config.measurement_time = dur; + self + } + + /// Changes the default number of resamples for benchmarks run with this + /// runner. + /// + /// Number of resamples to use for the + /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling) + /// + /// A larger number of resamples reduces the random sampling errors, which + /// are inherent to the bootstrap method, but also increases the analysis + /// time + /// + /// # Panics + /// + /// Panics if the number of resamples is set to zero + #[must_use] + pub fn nresamples(mut self, n: usize) -> Self { + assert!(n > 0); + if n <= 1000 { + console_error!("\nWarning: It is not recommended to reduce nresamples below 1000."); + } + + self.config.nresamples = n; + self + } + + /// Changes the default noise threshold for benchmarks run with this runner. + /// The noise threshold is used to filter out small changes in performance, + /// even if they are statistically significant. Sometimes benchmarking the + /// same code twice will result in small but statistically significant + /// differences solely because of noise. This provides a way to filter + /// out some of these false positives at the cost of making it harder to + /// detect small changes to the true performance of the benchmark. + /// + /// The default is 0.01, meaning that changes smaller than 1% will be + /// ignored. + /// + /// # Panics + /// + /// Panics if the threshold is set to a negative value + #[must_use] + pub fn noise_threshold(mut self, threshold: f64) -> Self { + assert!(threshold >= 0.0); + + self.config.noise_threshold = threshold; + self + } + + /// Changes the default confidence level for benchmarks run with this + /// runner. The confidence level is the desired probability that the true + /// runtime lies within the estimated [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is + /// 0.95, meaning that the confidence interval should capture the true value + /// 95% of the time. + /// + /// # Panics + /// + /// Panics if the confidence level is set to a value outside the `(0, 1)` + /// range + #[must_use] + pub fn confidence_level(mut self, cl: f64) -> Self { + assert!(cl > 0.0 && cl < 1.0); + if cl < 0.5 { + console_error!( + "\nWarning: It is not recommended to reduce confidence level below 0.5." + ); + } + + self.config.confidence_level = cl; + self + } + + /// Changes the default [significance level](https://en.wikipedia.org/wiki/Statistical_significance) + /// for benchmarks run with this runner. This is used to perform a + /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if + /// the measurements from this run are different from the measured + /// performance of the last run. The significance level is the desired + /// probability that two measurements of identical code will be considered + /// 'different' due to noise in the measurements. The default value is 0.05, + /// meaning that approximately 5% of identical benchmarks will register as + /// different due to noise. + /// + /// This presents a trade-off. By setting the significance level closer to + /// 0.0, you can increase the statistical robustness against noise, but it + /// also weakens Criterion.rs' ability to detect small but real changes in + /// the performance. By setting the significance level closer to 1.0, + /// Criterion.rs will be more able to detect small true changes, but will + /// also report more spurious differences. + /// + /// See also the noise threshold setting. + /// + /// # Panics + /// + /// Panics if the significance level is set to a value outside the `(0, 1)` + /// range + #[must_use] + pub fn significance_level(mut self, sl: f64) -> Self { + assert!(sl > 0.0 && sl < 1.0); + + self.config.significance_level = sl; + self + } +} + +impl Criterion +where + M: Measurement + 'static, +{ + /// Benchmarks a function. + /// + /// # Example + /// + /// ```rust + /// use wasm_bindgen_test::{Criterion, wasm_bindgen_bench}; + /// + /// #[wasm_bindgen_bench] + /// fn bench(c: &mut Criterion) { + /// // Setup (construct data, allocate memory, etc) + /// c.bench_function("bench desc", |b| { + /// b.iter(|| { + /// // Code to benchmark goes here + /// }) + /// }); + /// } + /// ``` + pub fn bench_function(&mut self, desc: &str, f: F) -> &mut Self + where + F: FnMut(&mut Bencher<'_, M>), + { + // bench_function never be pending + fn block_on(f: impl Future) { + let mut ctx = Context::from_waker(Waker::noop()); + match core::pin::pin!(f).poll(&mut ctx) { + Poll::Ready(()) => (), + // sync functions not be pending + Poll::Pending => unreachable!(), + } + } + + let id = report::BenchmarkId::new(desc.into()); + block_on(analysis::common( + &id, + &mut routine::Function::new(f), + &self.config, + self, + )); + + self + } + + /// Benchmarks a future. + /// + /// # Example + /// + /// ```rust + /// use wasm_bindgen_test::{Criterion, wasm_bindgen_bench}; + /// + /// #[wasm_bindgen_bench] + /// async fn bench(c: &mut Criterion) { + /// // Setup (construct data, allocate memory, etc) + /// c.bench_async_function("bench desc", |b| { + /// Box::pin(b.iter_future(|| async { + /// // Code to benchmark goes here + /// })) + /// }) + /// .await; + /// } + /// ``` + pub async fn bench_async_function(&mut self, desc: &str, f: F) -> &mut Self + where + for<'b> F: FnMut(&'b mut Bencher<'_, M>) -> Pin + 'b>>, + { + let id = report::BenchmarkId::new(desc.into()); + analysis::common(&id, &mut routine::AsyncFunction::new(f), &self.config, self).await; + self + } +} + +/// Enum representing different ways of measuring the throughput of benchmarked +/// code. If the throughput setting is configured for a benchmark then the +/// estimated throughput will be reported as well as the time per iteration. +// TODO: Remove serialize/deserialize from the public API. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum Throughput { + /// Measure throughput in terms of bytes/second. The value should be the + /// number of bytes processed by one iteration of the benchmarked code. + /// Typically, this would be the length of an input string or `&[u8]`. + Bytes(u64), + + /// Equivalent to Bytes, but the value will be reported in terms of + /// kilobytes (1000 bytes) per second instead of kibibytes (1024 bytes) per + /// second, megabytes instead of mibibytes, and gigabytes instead of + /// gibibytes. + BytesDecimal(u64), + + /// Measure throughput in terms of elements/second. The value should be the + /// number of elements processed by one iteration of the benchmarked code. + /// Typically, this would be the size of a collection, but could also be + /// the number of lines of input text or the number of values to parse. + Elements(u64), + + /// Measure throughput in terms of bits/second. The value should be the + /// number of bits processed by one iteration of the benchmarked code. + /// Typically, this would be the number of bits transferred by a networking + /// function. + Bits(u64), +} + +/// This enum allows the user to control how Criterion.rs chooses the iteration +/// count when sampling. The default is `Auto`, which will choose a method +/// automatically based on the iteration time during the warm-up phase. +#[derive(Debug, Default, Clone, Copy)] +pub enum SamplingMode { + /// Criterion.rs should choose a sampling method automatically. This is the + /// default, and is recommended for most users and most benchmarks. + #[default] + Auto, +} + +impl SamplingMode { + pub(crate) fn choose_sampling_mode( + self, + warmup_mean_execution_time: f64, + sample_count: u64, + target_time: f64, + ) -> ActualSamplingMode { + match self { + Self::Auto => { + // Estimate execution time with linear sampling + let total_runs = sample_count * (sample_count + 1) / 2; + let d = ceil(target_time / warmup_mean_execution_time / total_runs as f64) as u64; + let expected_ns = total_runs as f64 * d as f64 * warmup_mean_execution_time; + + if expected_ns > (2.0 * target_time) { + ActualSamplingMode::Flat + } else { + ActualSamplingMode::Linear + } + } + } + } +} + +/// Enum to represent the sampling mode without Auto. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub(crate) enum ActualSamplingMode { + Linear, + Flat, +} + +impl ActualSamplingMode { + pub(crate) fn iteration_counts( + self, + warmup_mean_execution_time: f64, + sample_count: u64, + target_time: &Duration, + ) -> Vec { + match self { + Self::Linear => { + let n = sample_count; + let met = warmup_mean_execution_time; + let m_ns = target_time.as_nanos(); + // Solve: [d + 2*d + 3*d + ... + n*d] * met = m_ns + let total_runs = n * (n + 1) / 2; + let d = (ceil(m_ns as f64 / met / total_runs as f64) as u64).max(1); + let expected_ns = total_runs as f64 * d as f64 * met; + + if d == 1 { + let recommended_sample_size = + Self::recommend_linear_sample_size(m_ns as f64, met); + let actual_time = Duration::from_nanos(expected_ns as u64); + console_error!( + "\nWarning: Unable to complete {} samples in {:.1?}. You may wish to \ + increase target time to {:.1?}", + n, + target_time, + actual_time + ); + + if recommended_sample_size == n { + console_error!(" or enable flat sampling."); + } else { + console_error!( + ", enable flat sampling, or reduce sample count to {}.", + recommended_sample_size + ); + } + } + + (1..=n).map(|a| a * d).collect::>() + } + Self::Flat => { + let n = sample_count; + let met = warmup_mean_execution_time; + let m_ns = target_time.as_nanos() as f64; + let time_per_sample = m_ns / (n as f64); + // This is pretty simplistic; we could do something smarter to fit into the + // allotted time. + let iterations_per_sample = (ceil(time_per_sample / met) as u64).max(1); + + let expected_ns = met * (iterations_per_sample * n) as f64; + + if iterations_per_sample == 1 { + let recommended_sample_size = Self::recommend_flat_sample_size(m_ns, met); + let actual_time = Duration::from_nanos(expected_ns as u64); + console_error!( + "\nWarning: Unable to complete {} samples in {:.1?}. You may wish to \ + increase target time to {:.1?}", + n, + target_time, + actual_time + ); + + if recommended_sample_size == n { + console_error!("."); + } else { + console_error!(", or reduce sample count to {}.", recommended_sample_size); + } + } + + vec![iterations_per_sample; n as usize] + } + } + } + + fn is_linear(self) -> bool { + matches!(self, Self::Linear) + } + + fn recommend_linear_sample_size(target_time: f64, met: f64) -> u64 { + // Some math shows that n(n+1)/2 * d * met = target_time. d = 1, so it can be + // ignored. This leaves n(n+1) = (2*target_time)/met, or n^2 + n - + // (2*target_time)/met = 0 Which can be solved with the quadratic formula. + // Since A and B are constant 1, this simplifies to sample_size = (-1 +- + // sqrt(1 - 4C))/2, where C = (2*target_time)/met. We don't care about the + // negative solution. Experimentation shows that this actually tends to result + // in twice the desired execution time (probably because of the ceil used to + // calculate d) so instead I use c = target_time/met. + let c = target_time / met; + let sample_size = f64::midpoint(-1.0, sqrt(4.0 * c)); + let sample_size = sample_size as u64; + + // Round down to the nearest 10 to give a margin and avoid excessive precision + let sample_size = (sample_size / 10) * 10; + + // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes + // smaller than 10. + if sample_size < 10 { 10 } else { sample_size } + } + + fn recommend_flat_sample_size(target_time: f64, met: f64) -> u64 { + let sample_size = (target_time / met) as u64; + + // Round down to the nearest 10 to give a margin and avoid excessive precision + let sample_size = (sample_size / 10) * 10; + + // Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes + // smaller than 10. + if sample_size < 10 { 10 } else { sample_size } + } +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub(crate) struct SavedSample { + pub(crate) sampling_mode: ActualSamplingMode, + pub(crate) iters: Vec, + pub(crate) times: Vec, +} diff --git a/client/test/src/criterion/report.rs b/client/test/src/criterion/report.rs new file mode 100644 index 00000000..249e8a7d --- /dev/null +++ b/client/test/src/criterion/report.rs @@ -0,0 +1,255 @@ +use alloc::format; +use alloc::string::{String, ToString}; +use core::fmt; + +use nu_ansi_term::{Color, Style}; +use serde::{Deserialize, Serialize}; + +use super::estimate::{ChangeEstimates, Estimate, Estimates}; +use super::format; +use super::measurement::ValueFormatter; +use super::stats::univariate::outliers::tukey::LabeledSample; +use crate::console_log; + +pub struct ComparisonData { + pub p_value: f64, + pub relative_estimates: ChangeEstimates, + pub significance_threshold: f64, + pub noise_threshold: f64, +} + +pub struct MeasurementData<'a> { + pub avg_times: LabeledSample<'a, f64>, + pub absolute_estimates: Estimates, + pub comparison: Option, +} + +#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct BenchmarkId { + desc: String, +} + +impl BenchmarkId { + pub fn new(desc: String) -> Self { + Self { desc } + } + + pub fn desc(&self) -> &str { + &self.desc + } +} +impl fmt::Display for BenchmarkId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.desc()) + } +} +impl fmt::Debug for BenchmarkId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "BenchmarkId {{ desc: \"{}\" }}", self.desc,) + } +} + +pub trait Report { + fn benchmark_start(&self, _id: &BenchmarkId) {} + fn warmup(&self, _id: &BenchmarkId, _warmup_ns: f64) {} + fn analysis(&self, _id: &BenchmarkId) {} + fn measurement_start( + &self, + _id: &BenchmarkId, + _sample_count: u64, + _estimate_ns: f64, + _iter_count: u64, + ) { + } + fn measurement_complete( + &self, + _id: &BenchmarkId, + _measurements: &MeasurementData, + _formatter: &dyn ValueFormatter, + ) { + } +} + +pub(crate) struct WasmReport; + +impl WasmReport { + fn print(s: &str) { + console_log!("{}", s); + } + + fn with_color(color: Color, s: &str) -> String { + color.paint(s).to_string() + } + + fn green(s: &str) -> String { + Self::with_color(Color::Green, s) + } + + fn yellow(s: &str) -> String { + Self::with_color(Color::Yellow, s) + } + + fn red(s: &str) -> String { + Self::with_color(Color::Red, s) + } + + fn bold(s: String) -> String { + Style::new().bold().paint(s).to_string() + } + + fn faint(s: String) -> String { + Style::new().dimmed().paint(s).to_string() + } + + pub fn outliers(sample: &LabeledSample<'_, f64>) { + let (los, lom, _, him, his) = sample.count(); + let noutliers = los + lom + him + his; + let sample_size = sample.len(); + + if noutliers == 0 { + return; + } + + let percent = |n: usize| 100. * n as f64 / sample_size as f64; + + console_log!( + "{}", + Self::yellow(&format!( + "Found {} outliers among {} measurements ({:.2}%)", + noutliers, + sample_size, + percent(noutliers) + )) + ); + + let print = |n, label| { + if n != 0 { + console_log!(" {} ({:.2}%) {}", n, percent(n), label); + } + }; + + print(los, "low severe"); + print(lom, "low mild"); + print(him, "high mild"); + print(his, "high severe"); + } +} + +impl Report for WasmReport { + fn warmup(&self, _id: &BenchmarkId, warmup_ns: f64) { + Self::print(&format!("Warming up for {}", format::time(warmup_ns))); + } + + fn measurement_start( + &self, + _id: &BenchmarkId, + sample_count: u64, + estimate_ns: f64, + iter_count: u64, + ) { + let iter_string = format::iter_count(iter_count); + + Self::print(&format!( + "Collecting {} samples in estimated {} ({})", + sample_count, + format::time(estimate_ns), + iter_string + )); + } + + fn measurement_complete( + &self, + id: &BenchmarkId, + meas: &MeasurementData, + formatter: &dyn ValueFormatter, + ) { + let typical_estimate = &meas.absolute_estimates.typical(); + + let mut id = id.desc().to_string(); + + if id.len() > 23 { + console_log!("{}", Self::green(&id)); + id.clear(); + } + let id_len = id.len(); + + console_log!( + "{}{}time: [{} {} {}]", + Self::green(&id), + " ".repeat(24 - id_len), + Self::faint(formatter.format_value(typical_estimate.confidence_interval.lower_bound)), + Self::bold(formatter.format_value(typical_estimate.point_estimate)), + Self::faint(formatter.format_value(typical_estimate.confidence_interval.upper_bound)) + ); + + if let Some(ref comp) = meas.comparison { + let different_mean = comp.p_value < comp.significance_threshold; + let mean_est = &comp.relative_estimates.mean; + let point_estimate = mean_est.point_estimate; + let mut point_estimate_str = format::change(point_estimate, true); + // The change in throughput is related to the change in timing. Reducing the + // timing by 50% increases the throughput by 100%. + let explanation_str: String; + + if different_mean { + let comparison = compare_to_threshold(mean_est, comp.noise_threshold); + match comparison { + ComparisonResult::Improved => { + point_estimate_str = Self::green(&Self::bold(point_estimate_str)); + explanation_str = format!("Performance has {}.", Self::green("improved")); + } + ComparisonResult::Regressed => { + point_estimate_str = Self::red(&Self::bold(point_estimate_str)); + explanation_str = format!("Performance has {}.", Self::red("regressed")); + } + ComparisonResult::NonSignificant => { + explanation_str = "Change within noise threshold.".to_string(); + } + } + } else { + explanation_str = "No change in performance detected.".to_string(); + } + + console_log!( + "{}change: [{} {} {}] (p = {:.2} {} {:.2})", + " ".repeat(24), + Self::faint(format::change( + mean_est.confidence_interval.lower_bound, + true + )), + point_estimate_str, + Self::faint(format::change( + mean_est.confidence_interval.upper_bound, + true + )), + comp.p_value, + if different_mean { "<" } else { ">" }, + comp.significance_threshold + ); + + console_log!("{}{}", " ".repeat(24), explanation_str); + } + + Self::outliers(&meas.avg_times); + } +} + +enum ComparisonResult { + Improved, + Regressed, + NonSignificant, +} + +fn compare_to_threshold(estimate: &Estimate, noise: f64) -> ComparisonResult { + let ci = &estimate.confidence_interval; + let lb = ci.lower_bound; + let ub = ci.upper_bound; + + if lb < -noise && ub < -noise { + ComparisonResult::Improved + } else if lb > noise && ub > noise { + ComparisonResult::Regressed + } else { + ComparisonResult::NonSignificant + } +} diff --git a/client/test/src/criterion/routine.rs b/client/test/src/criterion/routine.rs new file mode 100644 index 00000000..80e9bcd8 --- /dev/null +++ b/client/test/src/criterion/routine.rs @@ -0,0 +1,212 @@ +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::future::Future; +use core::marker::PhantomData; +use core::pin::Pin; +use core::time::Duration; + +use async_trait::async_trait; + +use super::benchmark::BenchmarkConfig; +use super::measurement::Measurement; +use super::report::{BenchmarkId, Report}; +use super::{ActualSamplingMode, Bencher, Criterion}; + +/// PRIVATE +#[async_trait(?Send)] +pub(crate) trait Routine { + /// PRIVATE + async fn bench(&mut self, m: &M, iters: &[u64]) -> Vec; + /// PRIVATE + async fn warm_up(&mut self, m: &M, how_long: Duration) -> (u64, u64); + + async fn sample( + &mut self, + measurement: &M, + id: &BenchmarkId, + config: &BenchmarkConfig, + criterion: &Criterion, + ) -> (ActualSamplingMode, Box<[f64]>, Box<[f64]>) { + let wu = config.warm_up_time; + let m_ns = config.measurement_time.as_nanos(); + + criterion.report.warmup(id, wu.as_nanos() as f64); + + let (wu_elapsed, wu_iters) = self.warm_up(measurement, wu).await; + + // Initial guess for the mean execution time + let met = wu_elapsed as f64 / wu_iters as f64; + + let n = config.sample_size as u64; + + let actual_sampling_mode = config + .sampling_mode + .choose_sampling_mode(met, n, m_ns as f64); + + let m_iters = actual_sampling_mode.iteration_counts(met, n, &config.measurement_time); + + let expected_ns = m_iters + .iter() + .copied() + .map(|count| count as f64 * met) + .sum(); + + // Use saturating_add to handle overflow. + let mut total_iters = 0u64; + for count in m_iters.iter().copied() { + total_iters = total_iters.saturating_add(count); + } + + criterion + .report + .measurement_start(id, n, expected_ns, total_iters); + + let m_elapsed = self.bench(measurement, &m_iters).await; + + let m_iters_f: Vec = m_iters.iter().map(|&x| x as f64).collect(); + + ( + actual_sampling_mode, + m_iters_f.into_boxed_slice(), + m_elapsed.into_boxed_slice(), + ) + } +} + +pub struct AsyncFunction { + f: F, + phantom: PhantomData, +} + +impl AsyncFunction { + pub fn new(f: F) -> Self { + Self { + f, + phantom: PhantomData, + } + } +} + +#[async_trait(?Send)] +impl Routine for AsyncFunction +where + M: Measurement, + for<'b> F: FnMut(&'b mut Bencher<'_, M>) -> Pin + 'b>>, +{ + async fn bench(&mut self, m: &M, iters: &[u64]) -> Vec { + let f = &mut self.f; + + let mut b = Bencher { + iterated: false, + iters: 0, + value: m.zero(), + measurement: m, + elapsed_time: Duration::from_millis(0), + }; + + let mut result = Vec::with_capacity(iters.len()); + for iters in iters { + b.iters = *iters; + (*f)(&mut b).await; + result.push(m.to_f64(&b.value)); + } + result + } + + async fn warm_up(&mut self, m: &M, how_long: Duration) -> (u64, u64) { + let f = &mut self.f; + let mut b = Bencher { + iterated: false, + iters: 1, + value: m.zero(), + measurement: m, + elapsed_time: Duration::from_millis(0), + }; + + let mut total_iters = 0; + let mut elapsed_time = Duration::from_millis(0); + loop { + (*f)(&mut b).await; + + total_iters += b.iters; + elapsed_time += b.elapsed_time; + if elapsed_time > how_long { + return (elapsed_time.as_nanos() as u64, total_iters); + } + + b.iters = b.iters.wrapping_mul(2); + } + } +} + +pub struct Function +where + F: FnMut(&mut Bencher<'_, M>), +{ + f: F, + phantom: PhantomData, +} + +impl Function +where + F: FnMut(&mut Bencher<'_, M>), +{ + pub fn new(f: F) -> Self { + Self { + f, + phantom: PhantomData, + } + } +} + +#[async_trait(?Send)] +impl Routine for Function +where + F: FnMut(&mut Bencher<'_, M>), +{ + async fn bench(&mut self, m: &M, iters: &[u64]) -> Vec { + let f = &mut self.f; + + let mut b = Bencher { + iterated: false, + iters: 0, + value: m.zero(), + measurement: m, + elapsed_time: Duration::from_millis(0), + }; + + iters + .iter() + .map(|iters| { + b.iters = *iters; + (*f)(&mut b); + m.to_f64(&b.value) + }) + .collect() + } + + async fn warm_up(&mut self, m: &M, how_long: Duration) -> (u64, u64) { + let f = &mut self.f; + let mut b = Bencher { + iterated: false, + iters: 1, + value: m.zero(), + measurement: m, + elapsed_time: Duration::from_millis(0), + }; + + let mut total_iters = 0; + let mut elapsed_time = Duration::from_millis(0); + loop { + (*f)(&mut b); + + total_iters += b.iters; + elapsed_time += b.elapsed_time; + if elapsed_time > how_long { + return (elapsed_time.as_nanos() as u64, total_iters); + } + + b.iters = b.iters.wrapping_mul(2); + } + } +} diff --git a/client/test/src/criterion/stats/bivariate/mod.rs b/client/test/src/criterion/stats/bivariate/mod.rs new file mode 100644 index 00000000..1de4cec7 --- /dev/null +++ b/client/test/src/criterion/stats/bivariate/mod.rs @@ -0,0 +1,70 @@ +//! Bivariate analysis + +pub mod regression; +mod resamples; + +use super::bivariate::resamples::Resamples; +use super::float::Float; +use super::tuple::{Tuple, TupledDistributionsBuilder}; +use super::univariate::Sample; + +/// Bivariate `(X, Y)` data +/// +/// Invariants: +/// +/// - No `NaN`s in the data +/// - At least two data points in the set +#[derive(Clone, Copy)] +pub struct Data<'a, X, Y>(&'a [X], &'a [Y]); + +impl<'a, X, Y> Data<'a, X, Y> +where + X: Float, + Y: Float, +{ + /// Creates a new data set from two existing slices + pub fn new(xs: &'a [X], ys: &'a [Y]) -> Self { + assert!( + xs.len() == ys.len() + && xs.len() > 1 + && xs.iter().all(|x| !x.is_nan()) + && ys.iter().all(|y| !y.is_nan()) + ); + + Data(xs, ys) + } + + // TODO Remove the `T` parameter in favor of `S::Output` + /// Returns the bootstrap distributions of the parameters estimated by the + /// `statistic` + /// + /// - Multi-threaded + /// - Time: `O(nresamples)` + /// - Memory: `O(nresamples)` + pub fn bootstrap(&self, nresamples: usize, statistic: S) -> T::Distributions + where + S: Fn(Data) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, + { + let mut resamples = Resamples::new(*self); + (0..nresamples) + .map(|_| statistic(resamples.next())) + .fold(T::Builder::new(0), |mut sub_distributions, sample| { + sub_distributions.push(sample); + sub_distributions + }) + .complete() + } + + /// Returns a view into the `X` data + pub fn x(&self) -> &'a Sample { + Sample::new(self.0) + } + + /// Returns a view into the `Y` data + pub fn y(&self) -> &'a Sample { + Sample::new(self.1) + } +} diff --git a/client/test/src/criterion/stats/bivariate/regression.rs b/client/test/src/criterion/stats/bivariate/regression.rs new file mode 100644 index 00000000..83a0d026 --- /dev/null +++ b/client/test/src/criterion/stats/bivariate/regression.rs @@ -0,0 +1,30 @@ +//! Regression analysis + +use super::super::bivariate::Data; +use super::super::dot; +use super::super::float::Float; + +/// A straight line that passes through the origin `y = m * x` +#[derive(Clone, Copy)] +pub struct Slope(pub A) +where + A: Float; + +impl Slope +where + A: Float, +{ + /// Fits the data to a straight line that passes through the origin using + /// ordinary least squares + /// + /// - Time: `O(length)` + pub fn fit(data: &Data<'_, A, A>) -> Self { + let xs = data.0; + let ys = data.1; + + let xy = dot(xs, ys); + let x2 = dot(xs, xs); + + Self(xy / x2) + } +} diff --git a/client/test/src/criterion/stats/bivariate/resamples.rs b/client/test/src/criterion/stats/bivariate/resamples.rs new file mode 100644 index 00000000..9a37b14f --- /dev/null +++ b/client/test/src/criterion/stats/bivariate/resamples.rs @@ -0,0 +1,62 @@ +use alloc::vec::Vec; + +use super::super::bivariate::Data; +use super::super::float::Float; +use super::super::rand_util::{Rng, new_rng}; + +pub struct Resamples<'a, X, Y> +where + X: 'a + Float, + Y: 'a + Float, +{ + rng: Rng, + data: (&'a [X], &'a [Y]), + stage: Option<(Vec, Vec)>, +} + +impl<'a, X, Y> Resamples<'a, X, Y> +where + X: 'a + Float, + Y: 'a + Float, +{ + pub fn new(data: Data<'a, X, Y>) -> Self { + Resamples { + rng: new_rng(), + data: (data.x(), data.y()), + stage: None, + } + } + + pub fn next(&mut self) -> Data<'_, X, Y> { + let n = self.data.0.len(); + + match self.stage { + None => { + let mut stage = (Vec::with_capacity(n), Vec::with_capacity(n)); + + for _ in 0..n { + let i = self.rng.rand_range(0u64..(self.data.0.len() as u64)) as usize; + + stage.0.push(self.data.0[i]); + stage.1.push(self.data.1[i]); + } + + self.stage = Some(stage); + } + Some(ref mut stage) => { + for i in 0..n { + let j = self.rng.rand_range(0u64..(self.data.0.len() as u64)) as usize; + + stage.0[i] = self.data.0[j]; + stage.1[i] = self.data.1[j]; + } + } + } + + if let Some((ref x, ref y)) = self.stage { + Data(x, y) + } else { + unreachable!(); + } + } +} diff --git a/client/test/src/criterion/stats/float.rs b/client/test/src/criterion/stats/float.rs new file mode 100644 index 00000000..27f7b035 --- /dev/null +++ b/client/test/src/criterion/stats/float.rs @@ -0,0 +1,15 @@ +//! Float trait + +use cast::From; +use num_traits::float; + +/// This is an extension of `num_traits::float::Float` that adds safe +/// casting and Sync + Send. Once `num_traits` has these features this +/// can be removed. +pub trait Float: + float::Float + From + From + Sync + Send +{ +} + +impl Float for f32 {} +impl Float for f64 {} diff --git a/client/test/src/criterion/stats/mod.rs b/client/test/src/criterion/stats/mod.rs new file mode 100644 index 00000000..fc67e35c --- /dev/null +++ b/client/test/src/criterion/stats/mod.rs @@ -0,0 +1,112 @@ +//! [Criterion]'s statistics library. +//! +//! [Criterion]: https://github.com/bheisler/criterion.rs +//! +//! **WARNING** This library is criterion's implementation detail and there no +//! plans to stabilize it. In other words, the API may break at any time without +//! notice. + +pub mod bivariate; +pub mod tuple; +pub mod univariate; + +mod float; +mod rand_util; + +use alloc::boxed::Box; +use core::mem; +use core::ops::Deref; + +use float::Float; +use univariate::Sample; + +/// The bootstrap distribution of some parameter +#[derive(Clone)] +pub struct Distribution(Box<[A]>); + +impl Distribution +where + A: Float, +{ + /// Create a distribution from the given values + pub fn from(values: Box<[A]>) -> Self { + Self(values) + } + + /// Computes the confidence interval of the population parameter using + /// percentiles + /// + /// # Panics + /// + /// Panics if the `confidence_level` is not in the `(0, 1)` range. + pub fn confidence_interval(&self, confidence_level: A) -> (A, A) + where + usize: cast::From>, + { + let zero = A::cast(0); + let one = A::cast(1); + let fifty = A::cast(50); + + assert!(confidence_level > zero && confidence_level < one); + + let percentiles = self.percentiles(); + + // FIXME(privacy) this should use the `at_unchecked()` method + ( + percentiles.at(fifty * (one - confidence_level)), + percentiles.at(fifty * (one + confidence_level)), + ) + } + + /// Computes the "likelihood" of seeing the value `t` or "more extreme" + /// values in the distribution. + pub fn p_value(&self, t: A, tails: Tails) -> A { + use core::cmp; + + let n = self.0.len(); + let hits = self.0.iter().filter(|&&x| x < t).count(); + + let tails = A::cast(match tails { + Tails::One => 1, + Tails::Two => 2, + }); + + A::cast(cmp::min(hits, n - hits)) / A::cast(n) * tails + } +} + +impl Deref for Distribution { + type Target = Sample; + + fn deref(&self) -> &Sample { + let slice: &[_] = &self.0; + unsafe { mem::transmute(slice) } + } +} + +/// Number of tails for significance testing +#[derive(Clone, Copy)] +pub enum Tails { + /// One tailed test + One, + /// Two tailed test + Two, +} + +fn dot(xs: &[A], ys: &[A]) -> A +where + A: Float, +{ + xs.iter() + .zip(ys) + .fold(A::cast(0), |acc, (&x, &y)| acc + x * y) +} + +fn sum(xs: &[A]) -> A +where + A: Float, +{ + use core::ops::Add; + + xs.iter().copied().fold(A::cast(0), Add::add) +} diff --git a/client/test/src/criterion/stats/rand_util.rs b/client/test/src/criterion/stats/rand_util.rs new file mode 100644 index 00000000..1ce1cc55 --- /dev/null +++ b/client/test/src/criterion/stats/rand_util.rs @@ -0,0 +1,23 @@ +use core::cell::RefCell; + +use oorandom::Rand64; + +use crate::{LazyCell, SystemTime, UNIX_EPOCH}; + +pub type Rng = Rand64; + +#[cfg_attr(target_feature = "atomics", thread_local)] +static SEED_RAND: LazyCell> = LazyCell::new(|| { + RefCell::new(Rand64::new( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_millis(), + )) +}); + +pub fn new_rng() -> Rng { + let mut r = SEED_RAND.borrow_mut(); + let seed = (u128::from(r.rand_u64()) << 64) | u128::from(r.rand_u64()); + Rand64::new(seed) +} diff --git a/client/test/src/criterion/stats/tuple.rs b/client/test/src/criterion/stats/tuple.rs new file mode 100644 index 00000000..13d32784 --- /dev/null +++ b/client/test/src/criterion/stats/tuple.rs @@ -0,0 +1,256 @@ +//! Helper traits for tupling/untupling + +use alloc::vec::Vec; + +use super::Distribution; + +/// Any tuple: `(A, B, ..)` +pub trait Tuple: Sized { + /// A tuple of distributions associated with this tuple + type Distributions: TupledDistributions; + + /// A tuple of vectors associated with this tuple + type Builder: TupledDistributionsBuilder; +} + +/// A tuple of distributions: `(Distribution, Distribution, ..)` +pub trait TupledDistributions: Sized { + /// A tuple that can be pushed/inserted into the tupled distributions + type Item: Tuple; +} + +/// A tuple of vecs used to build distributions. +pub trait TupledDistributionsBuilder: Sized { + /// A tuple that can be pushed/inserted into the tupled distributions + type Item: Tuple; + + /// Creates a new tuple of vecs + fn new(size: usize) -> Self; + + /// Push one element into each of the vecs + fn push(&mut self, tuple: Self::Item); + + /// Append one tuple of vecs to this one, leaving the vecs in the other + /// tuple empty + fn extend(&mut self, other: &mut Self); + + /// Convert the tuple of vectors into a tuple of distributions + fn complete(self) -> ::Distributions; +} + +impl Tuple for (A,) +where + A: Copy, +{ + type Distributions = (Distribution,); + type Builder = (Vec,); +} + +impl TupledDistributions for (Distribution,) +where + A: Copy, +{ + type Item = (A,); +} +impl TupledDistributionsBuilder for (Vec,) +where + A: Copy, +{ + type Item = (A,); + + fn new(size: usize) -> (Vec,) { + (Vec::with_capacity(size),) + } + + fn push(&mut self, tuple: (A,)) { + (self.0).push(tuple.0); + } + + fn extend(&mut self, other: &mut (Vec,)) { + (self.0).append(&mut other.0); + } + + fn complete(self) -> (Distribution,) { + (Distribution(self.0.into_boxed_slice()),) + } +} + +impl Tuple for (A, B) +where + A: Copy, + B: Copy, +{ + type Distributions = (Distribution, Distribution); + type Builder = (Vec, Vec); +} + +impl TupledDistributions for (Distribution, Distribution) +where + A: Copy, + B: Copy, +{ + type Item = (A, B); +} +impl TupledDistributionsBuilder for (Vec, Vec) +where + A: Copy, + B: Copy, +{ + type Item = (A, B); + + fn new(size: usize) -> (Vec, Vec) { + (Vec::with_capacity(size), Vec::with_capacity(size)) + } + + fn push(&mut self, tuple: (A, B)) { + (self.0).push(tuple.0); + (self.1).push(tuple.1); + } + + fn extend(&mut self, other: &mut (Vec, Vec)) { + (self.0).append(&mut other.0); + (self.1).append(&mut other.1); + } + + fn complete(self) -> (Distribution, Distribution) { + ( + Distribution(self.0.into_boxed_slice()), + Distribution(self.1.into_boxed_slice()), + ) + } +} + +impl Tuple for (A, B, C) +where + A: Copy, + B: Copy, + C: Copy, +{ + type Distributions = (Distribution, Distribution, Distribution); + type Builder = (Vec, Vec, Vec); +} + +impl TupledDistributions for (Distribution, Distribution, Distribution) +where + A: Copy, + B: Copy, + C: Copy, +{ + type Item = (A, B, C); +} +impl TupledDistributionsBuilder for (Vec, Vec, Vec) +where + A: Copy, + B: Copy, + C: Copy, +{ + type Item = (A, B, C); + + fn new(size: usize) -> (Vec, Vec, Vec) { + ( + Vec::with_capacity(size), + Vec::with_capacity(size), + Vec::with_capacity(size), + ) + } + + fn push(&mut self, tuple: (A, B, C)) { + (self.0).push(tuple.0); + (self.1).push(tuple.1); + (self.2).push(tuple.2); + } + + fn extend(&mut self, other: &mut (Vec, Vec, Vec)) { + (self.0).append(&mut other.0); + (self.1).append(&mut other.1); + (self.2).append(&mut other.2); + } + + fn complete(self) -> (Distribution, Distribution, Distribution) { + ( + Distribution(self.0.into_boxed_slice()), + Distribution(self.1.into_boxed_slice()), + Distribution(self.2.into_boxed_slice()), + ) + } +} + +impl Tuple for (A, B, C, D) +where + A: Copy, + B: Copy, + C: Copy, + D: Copy, +{ + type Distributions = ( + Distribution, + Distribution, + Distribution, + Distribution, + ); + type Builder = (Vec, Vec, Vec, Vec); +} + +impl TupledDistributions + for ( + Distribution, + Distribution, + Distribution, + Distribution, + ) +where + A: Copy, + B: Copy, + C: Copy, + D: Copy, +{ + type Item = (A, B, C, D); +} +impl TupledDistributionsBuilder for (Vec, Vec, Vec, Vec) +where + A: Copy, + B: Copy, + C: Copy, + D: Copy, +{ + type Item = (A, B, C, D); + + fn new(size: usize) -> (Vec, Vec, Vec, Vec) { + ( + Vec::with_capacity(size), + Vec::with_capacity(size), + Vec::with_capacity(size), + Vec::with_capacity(size), + ) + } + + fn push(&mut self, tuple: (A, B, C, D)) { + (self.0).push(tuple.0); + (self.1).push(tuple.1); + (self.2).push(tuple.2); + (self.3).push(tuple.3); + } + + fn extend(&mut self, other: &mut (Vec, Vec, Vec, Vec)) { + (self.0).append(&mut other.0); + (self.1).append(&mut other.1); + (self.2).append(&mut other.2); + (self.3).append(&mut other.3); + } + + fn complete( + self, + ) -> ( + Distribution, + Distribution, + Distribution, + Distribution, + ) { + ( + Distribution(self.0.into_boxed_slice()), + Distribution(self.1.into_boxed_slice()), + Distribution(self.2.into_boxed_slice()), + Distribution(self.3.into_boxed_slice()), + ) + } +} diff --git a/client/test/src/criterion/stats/univariate/mixed.rs b/client/test/src/criterion/stats/univariate/mixed.rs new file mode 100644 index 00000000..cb53f140 --- /dev/null +++ b/client/test/src/criterion/stats/univariate/mixed.rs @@ -0,0 +1,44 @@ +//! Mixed bootstrap + +use alloc::vec::Vec; + +use super::super::float::Float; +use super::super::tuple::{Tuple, TupledDistributionsBuilder}; +use super::{Resamples, Sample}; + +/// Performs a *mixed* two-sample bootstrap +pub fn bootstrap( + a: &Sample, + b: &Sample, + nresamples: usize, + statistic: S, +) -> T::Distributions +where + A: Float, + S: Fn(&Sample, &Sample) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, +{ + let n_a = a.len(); + let n_b = b.len(); + let mut c = Vec::with_capacity(n_a + n_b); + c.extend_from_slice(a); + c.extend_from_slice(b); + let c = Sample::new(&c); + + let mut resamples = Resamples::new(c); + (0..nresamples) + .map(|_| { + let resample = resamples.next(); + let a: &Sample = Sample::new(&resample[..n_a]); + let b: &Sample = Sample::new(&resample[n_a..]); + + statistic(a, b) + }) + .fold(T::Builder::new(0), |mut sub_distributions, sample| { + sub_distributions.push(sample); + sub_distributions + }) + .complete() +} diff --git a/client/test/src/criterion/stats/univariate/mod.rs b/client/test/src/criterion/stats/univariate/mod.rs new file mode 100644 index 00000000..5c7e71a9 --- /dev/null +++ b/client/test/src/criterion/stats/univariate/mod.rs @@ -0,0 +1,62 @@ +//! Univariate analysis + +pub mod mixed; +pub mod outliers; +mod percentiles; +mod resamples; +mod sample; + +use core::cmp; + +use libm::{ceil, sqrt}; +pub use percentiles::Percentiles; +use resamples::Resamples; +pub use sample::Sample; + +use super::float::Float; +use super::tuple::{Tuple, TupledDistributionsBuilder}; + +/// Performs a two-sample bootstrap +/// +/// - Multithreaded +/// - Time: `O(nresamples)` +/// - Memory: `O(nresamples)` +pub fn bootstrap( + a: &Sample, + b: &Sample, + nresamples: usize, + statistic: S, +) -> T::Distributions +where + A: Float, + B: Float, + S: Fn(&Sample, &Sample) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, +{ + let nresamples_sqrt = ceil(sqrt(nresamples as f64)) as usize; + let per_chunk = nresamples.div_ceil(nresamples_sqrt); + + let mut a_resamples = Resamples::new(a); + let mut b_resamples = Resamples::new(b); + (0..nresamples_sqrt) + .map(|i| { + let start = i * per_chunk; + let end = cmp::min((i + 1) * per_chunk, nresamples); + let a_resample = a_resamples.next(); + + let mut sub_distributions: T::Builder = TupledDistributionsBuilder::new(end - start); + + for _ in start..end { + let b_resample = b_resamples.next(); + sub_distributions.push(statistic(a_resample, b_resample)); + } + sub_distributions + }) + .fold(T::Builder::new(0), |mut a, mut b| { + a.extend(&mut b); + a + }) + .complete() +} diff --git a/client/test/src/criterion/stats/univariate/outliers/mod.rs b/client/test/src/criterion/stats/univariate/outliers/mod.rs new file mode 100644 index 00000000..afff2d4c --- /dev/null +++ b/client/test/src/criterion/stats/univariate/outliers/mod.rs @@ -0,0 +1,7 @@ +//! Classification of outliers +//! +//! WARNING: There's no formal/mathematical definition of what an outlier +//! actually is. Therefore, all outlier classifiers are *subjective*, however +//! some classifiers that have become *de facto* standard are provided here. + +pub mod tukey; diff --git a/client/test/src/criterion/stats/univariate/outliers/tukey.rs b/client/test/src/criterion/stats/univariate/outliers/tukey.rs new file mode 100644 index 00000000..1d05f7dd --- /dev/null +++ b/client/test/src/criterion/stats/univariate/outliers/tukey.rs @@ -0,0 +1,274 @@ +//! Tukey's method +//! +//! The original method uses two "fences" to classify the data. All the +//! observations "inside" the fences are considered "normal", and the rest are +//! considered outliers. +//! +//! The fences are computed from the quartiles of the sample, according to the +//! following formula: +//! +//! ``` ignore +//! // q1, q3 are the first and third quartiles +//! let iqr = q3 - q1; // The interquartile range +//! let (f1, f2) = (q1 - 1.5 * iqr, q3 + 1.5 * iqr); // the "fences" +//! +//! let is_outlier = |x| if x > f1 && x < f2 { true } else { false }; +//! ``` +//! +//! The classifier provided here adds two extra outer fences: +//! +//! ``` ignore +//! let (f3, f4) = (q1 - 3 * iqr, q3 + 3 * iqr); // the outer "fences" +//! ``` +//! +//! The extra fences add a sense of "severity" to the classification. Data +//! points outside of the outer fences are considered "severe" outliers, whereas +//! points outside the inner fences are just "mild" outliers, and, as the +//! original method, everything inside the inner fences is considered +//! "normal" data. +//! +//! Some ASCII art for the visually oriented people: +//! +//! ``` ignore +//! LOW-ish NORMAL-ish HIGH-ish +//! x | + | o o o o o o o | + | x +//! f3 f1 f2 f4 +//! +//! Legend: +//! o: "normal" data (not an outlier) +//! +: "mild" outlier +//! x: "severe" outlier +//! ``` + +use core::ops::{Deref, Index}; +use core::slice; + +use self::Label::{HighMild, HighSevere, LowMild, LowSevere, NotAnOutlier}; +use super::super::super::float::Float; +use super::super::Sample; + +/// A classified/labeled sample. +/// +/// The labeled data can be accessed using the indexing operator. The order of +/// the data points is retained. +/// +/// NOTE: Due to limitations in the indexing traits, only the label is returned. +/// Once the `IndexGet` trait lands in stdlib, the indexing operation will +/// return a `(data_point, label)` pair. +#[derive(Clone, Copy)] +pub struct LabeledSample<'a, A> +where + A: Float, +{ + fences: (A, A, A, A), + sample: &'a Sample, +} + +impl<'a, A> LabeledSample<'a, A> +where + A: Float, +{ + /// Returns the number of data points per label + /// + /// - Time: `O(length)` + pub fn count(&self) -> (usize, usize, usize, usize, usize) { + let (mut los, mut lom, mut noa, mut him, mut his) = (0, 0, 0, 0, 0); + + for (_, label) in self { + match label { + LowSevere => { + los += 1; + } + LowMild => { + lom += 1; + } + NotAnOutlier => { + noa += 1; + } + HighMild => { + him += 1; + } + HighSevere => { + his += 1; + } + } + } + + (los, lom, noa, him, his) + } + + /// Returns the fences used to classify the outliers + pub fn fences(&self) -> (A, A, A, A) { + self.fences + } + + /// Returns an iterator over the labeled data + pub fn iter(&self) -> Iter<'a, A> { + Iter { + fences: self.fences, + iter: self.sample.iter(), + } + } +} + +impl Deref for LabeledSample<'_, A> +where + A: Float, +{ + type Target = Sample; + + fn deref(&self) -> &Sample { + self.sample + } +} + +// FIXME Use the `IndexGet` trait +impl Index for LabeledSample<'_, A> +where + A: Float, +{ + type Output = Label; + + fn index(&self, i: usize) -> &Label { + static LOW_SEVERE: Label = LowSevere; + static LOW_MILD: Label = LowMild; + static HIGH_MILD: Label = HighMild; + static HIGH_SEVERE: Label = HighSevere; + static NOT_AN_OUTLIER: Label = NotAnOutlier; + + let x = self.sample[i]; + let (lost, lomt, himt, hist) = self.fences; + + if x < lost { + &LOW_SEVERE + } else if x > hist { + &HIGH_SEVERE + } else if x < lomt { + &LOW_MILD + } else if x > himt { + &HIGH_MILD + } else { + &NOT_AN_OUTLIER + } + } +} + +impl<'a, A> IntoIterator for &LabeledSample<'a, A> +where + A: Float, +{ + type Item = (A, Label); + type IntoIter = Iter<'a, A>; + + fn into_iter(self) -> Iter<'a, A> { + self.iter() + } +} + +/// Iterator over the labeled data +pub struct Iter<'a, A> +where + A: Float, +{ + fences: (A, A, A, A), + iter: slice::Iter<'a, A>, +} + +impl Iterator for Iter<'_, A> +where + A: Float, +{ + type Item = (A, Label); + + fn next(&mut self) -> Option<(A, Label)> { + self.iter.next().map(|&x| { + let (lost, lomt, himt, hist) = self.fences; + + let label = if x < lost { + LowSevere + } else if x > hist { + HighSevere + } else if x < lomt { + LowMild + } else if x > himt { + HighMild + } else { + NotAnOutlier + }; + + (x, label) + }) + } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +/// Labels used to classify outliers +pub enum Label { + /// A "mild" outlier in the "high" spectrum + HighMild, + /// A "severe" outlier in the "high" spectrum + HighSevere, + /// A "mild" outlier in the "low" spectrum + LowMild, + /// A "severe" outlier in the "low" spectrum + LowSevere, + /// A normal data point + NotAnOutlier, +} + +impl Label { + /// Checks if the data point has an "unusually" high value + pub fn is_high(&self) -> bool { + matches!(*self, HighMild | HighSevere) + } + + /// Checks if the data point is labeled as a "mild" outlier + pub fn is_mild(&self) -> bool { + matches!(*self, HighMild | LowMild) + } + + /// Checks if the data point has an "unusually" low value + pub fn is_low(&self) -> bool { + matches!(*self, LowMild | LowSevere) + } + + /// Checks if the data point is labeled as an outlier + pub fn is_outlier(&self) -> bool { + !matches!(*self, NotAnOutlier) + } + + /// Checks if the data point is labeled as a "severe" outlier + pub fn is_severe(&self) -> bool { + matches!(*self, HighSevere | LowSevere) + } +} + +/// Classifies the sample, and returns a labeled sample. +/// +/// - Time: `O(N log N) where N = length` +pub fn classify(sample: &Sample) -> LabeledSample<'_, A> +where + A: Float, + usize: cast::From>, +{ + let (q1, _, q3) = sample.percentiles().quartiles(); + let iqr = q3 - q1; + + // Mild + let k_m = A::cast(1.5_f32); + // Severe + let k_s = A::cast(3); + + LabeledSample { + fences: ( + q1 - k_s * iqr, + q1 - k_m * iqr, + q3 + k_m * iqr, + q3 + k_s * iqr, + ), + sample, + } +} diff --git a/client/test/src/criterion/stats/univariate/percentiles.rs b/client/test/src/criterion/stats/univariate/percentiles.rs new file mode 100644 index 00000000..49583f5e --- /dev/null +++ b/client/test/src/criterion/stats/univariate/percentiles.rs @@ -0,0 +1,72 @@ +use alloc::boxed::Box; + +use cast::usize; + +use super::super::float::Float; + +/// A "view" into the percentiles of a sample +pub struct Percentiles(Box<[A]>) +where + A: Float; + +// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module +impl Percentiles +where + A: Float, + usize: cast::From>, +{ + /// Returns the percentile at `p`% + /// + /// Safety: + /// + /// - Make sure that `p` is in the range `[0, 100]` + unsafe fn at_unchecked(&self, p: A) -> A { + unsafe { + let hundred = A::cast(100); + debug_assert!(p >= A::cast(0) && p <= hundred); + debug_assert!(!self.0.is_empty()); + let len = self.0.len() - 1; + + if p == hundred { + self.0[len] + } else { + let rank = (p / hundred) * A::cast(len); + let integer = rank.floor(); + let fraction = rank - integer; + let n = usize(integer).unwrap(); + let &floor = self.0.get_unchecked(n); + let &ceiling = self.0.get_unchecked(n + 1); + + floor + (ceiling - floor) * fraction + } + } + } + + /// Returns the percentile at `p`% + /// + /// # Panics + /// + /// Panics if `p` is outside the closed `[0, 100]` range + pub fn at(&self, p: A) -> A { + let zero = A::cast(0); + let hundred = A::cast(100); + + assert!(p >= zero && p <= hundred); + assert!(!self.0.is_empty()); + unsafe { self.at_unchecked(p) } + } + + /// Returns the 50th percentile + pub fn median(&self) -> A { + self.at(A::cast(50)) + } + + /// Returns the 25th, 50th and 75th percentiles + pub fn quartiles(&self) -> (A, A, A) { + ( + self.at(A::cast(25)), + self.at(A::cast(50)), + self.at(A::cast(75)), + ) + } +} diff --git a/client/test/src/criterion/stats/univariate/resamples.rs b/client/test/src/criterion/stats/univariate/resamples.rs new file mode 100644 index 00000000..08916db5 --- /dev/null +++ b/client/test/src/criterion/stats/univariate/resamples.rs @@ -0,0 +1,60 @@ +use alloc::vec::Vec; +use core::mem; + +use super::super::float::Float; +use super::super::rand_util::{Rng, new_rng}; +use super::super::univariate::Sample; + +pub struct Resamples<'a, A> +where + A: Float, +{ + rng: Rng, + sample: &'a [A], + stage: Option>, +} + +impl<'a, A> Resamples<'a, A> +where + A: 'a + Float, +{ + pub fn new(sample: &'a Sample) -> Self { + let slice = sample; + + Resamples { + rng: new_rng(), + sample: slice, + stage: None, + } + } + + pub fn next(&mut self) -> &Sample { + let n = self.sample.len(); + let rng = &mut self.rng; + + match self.stage { + None => { + let mut stage = Vec::with_capacity(n); + + for _ in 0..n { + let idx = rng.rand_range(0u64..(self.sample.len() as u64)); + stage.push(self.sample[idx as usize]); + } + + self.stage = Some(stage); + } + Some(ref mut stage) => { + for elem in stage.iter_mut() { + let idx = rng.rand_range(0u64..(self.sample.len() as u64)); + *elem = self.sample[idx as usize]; + } + } + } + + if let Some(ref v) = self.stage { + unsafe { mem::transmute::<&[A], &Sample>(v) } + } else { + unreachable!(); + } + } +} diff --git a/client/test/src/criterion/stats/univariate/sample.rs b/client/test/src/criterion/stats/univariate/sample.rs new file mode 100644 index 00000000..ee05a97b --- /dev/null +++ b/client/test/src/criterion/stats/univariate/sample.rs @@ -0,0 +1,178 @@ +use alloc::vec::Vec; +use core::{mem, ops}; + +use super::super::float::Float; +use super::super::sum; +use super::super::tuple::{Tuple, TupledDistributionsBuilder}; +use super::super::univariate::{Percentiles, Resamples}; + +/// A collection of data points drawn from a population +/// +/// Invariants: +/// +/// - The sample contains at least 2 data points +/// - The sample contains no `NaN`s +#[repr(transparent)] +pub struct Sample([A]); + +// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module +impl Sample +where + A: Float, +{ + /// Creates a new sample from an existing slice + /// + /// # Panics + /// + /// Panics if `slice` contains any `NaN` or if `slice` has less than two + /// elements + pub fn new(slice: &[A]) -> &Self { + assert!(slice.len() > 1 && slice.iter().all(|x| !x.is_nan())); + unsafe { mem::transmute(slice) } + } + + /// Returns the arithmetic average of the sample + /// + /// - Time: `O(length)` + pub fn mean(&self) -> A { + let n = self.len(); + + self.sum() / A::cast(n) + } + + /// Returns the median absolute deviation + /// + /// The `median` can be optionally passed along to speed up (2X) the + /// computation + /// + /// - Time: `O(length)` + /// - Memory: `O(length)` + pub fn median_abs_dev(&self, median: Option) -> A + where + usize: cast::From>, + { + let median = median.unwrap_or_else(|| self.percentiles().median()); + + // NB Although this operation can be SIMD accelerated, the gain is negligible + // because the bottle neck is the sorting operation which is part of the + // computation of the median + let abs_devs = self.iter().map(|&x| (x - median).abs()).collect::>(); + + let abs_devs: &Self = Self::new(&abs_devs); + + abs_devs.percentiles().median() * A::cast(1.4826) + } + + /// Returns a "view" into the percentiles of the sample + /// + /// This "view" makes consecutive computations of percentiles much faster + /// (`O(1)`) + /// + /// - Time: `O(N log N) where N = length` + /// - Memory: `O(length)` + pub fn percentiles(&self) -> Percentiles + where + usize: cast::From>, + { + use core::cmp::Ordering; + + // NB This function assumes that there are no `NaN`s in the sample + fn cmp(a: &T, b: &T) -> Ordering + where + T: PartialOrd, + { + match a.partial_cmp(b) { + Some(o) => o, + // Arbitrary way to handle NaNs that should never happen + None => Ordering::Equal, + } + } + + let mut v = self.to_vec().into_boxed_slice(); + v.sort_unstable_by(cmp); + unsafe { mem::transmute(v) } + } + + /// Returns the standard deviation of the sample + /// + /// The `mean` can be optionally passed along to speed up (2X) the + /// computation + /// + /// - Time: `O(length)` + pub fn std_dev(&self, mean: Option) -> A { + self.var(mean).sqrt() + } + + /// Returns the sum of all the elements of the sample + /// + /// - Time: `O(length)` + pub fn sum(&self) -> A { + sum(self) + } + + /// Returns the t score between these two samples + /// + /// - Time: `O(length)` + pub fn t(&self, other: &Self) -> A { + let (x_bar, y_bar) = (self.mean(), other.mean()); + let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar))); + let n_x = A::cast(self.len()); + let n_y = A::cast(other.len()); + let num = x_bar - y_bar; + let den = (s2_x / n_x + s2_y / n_y).sqrt(); + + num / den + } + + /// Returns the variance of the sample + /// + /// The `mean` can be optionally passed along to speed up (2X) the + /// computation + /// + /// - Time: `O(length)` + pub fn var(&self, mean: Option) -> A { + use core::ops::Add; + + let mean = mean.unwrap_or_else(|| self.mean()); + let slice = self; + + let sum = slice + .iter() + .map(|&x| (x - mean).powi(2)) + .fold(A::cast(0), Add::add); + + sum / A::cast(slice.len() - 1) + } + + // TODO Remove the `T` parameter in favor of `S::Output` + /// Returns the bootstrap distributions of the parameters estimated by the + /// 1-sample statistic + /// + /// - Multi-threaded + /// - Time: `O(nresamples)` + /// - Memory: `O(nresamples)` + pub fn bootstrap(&self, nresamples: usize, statistic: S) -> T::Distributions + where + S: Fn(&Self) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, + { + let mut resamples = Resamples::new(self); + (0..nresamples) + .map(|_| statistic(resamples.next())) + .fold(T::Builder::new(0), |mut sub_distributions, sample| { + sub_distributions.push(sample); + sub_distributions + }) + .complete() + } +} + +impl ops::Deref for Sample { + type Target = [A]; + + fn deref(&self) -> &[A] { + &self.0 + } +} diff --git a/client/test/src/lib.rs b/client/test/src/lib.rs index 57e40cd0..efaddb7a 100644 --- a/client/test/src/lib.rs +++ b/client/test/src/lib.rs @@ -1,8 +1,20 @@ +#![cfg_attr(target_feature = "atomics", feature(thread_local))] + +extern crate alloc; + +mod criterion; +mod time; + +use core::ops::Deref; +/// TODO: `no_std` support use std::panic::{self, PanicHookInfo}; use std::sync::Once; -pub use js_bindgen_test_macro::test; +pub use criterion::Criterion; +pub use js_bindgen_test_macro::{bench, test}; use js_sys::{JsString, js_sys}; +use once_cell::unsync::Lazy; +pub use time::{Instant, SystemTime, UNIX_EPOCH}; #[js_sys] extern "js-sys" { @@ -39,3 +51,62 @@ pub fn set_panic_hook() { })); }); } + +pub mod console { + use js_sys::{JsString, js_sys}; + + #[js_sys(namespace = "console")] + extern "js-sys" { + pub fn log(data: &JsString); + pub fn error(data: &JsString); + } + + #[macro_export] + macro_rules! console_log { + ($($t:tt)*) => ( + $crate::console::error( + &format_args!($($t)*).to_string().as_str().into() + ) + ) + } + + #[macro_export] + macro_rules! console_error { + ($($t:tt)*) => ( + $crate::console::error( + &format_args!($($t)*).to_string().as_str().into() + ) + ) + } +} + +/* TODO: Move the following code into `xxx-shared` crate. */ + +pub(crate) struct ThreadLocalWrapper(pub(crate) T); + +#[cfg(not(target_feature = "atomics"))] +// SAFETY: In wasm targets without atomics there is no cross-thread access, so +// treating this wrapper as `Sync` is equivalent to thread-local usage. +unsafe impl Sync for ThreadLocalWrapper {} + +#[cfg(not(target_feature = "atomics"))] +// SAFETY: In wasm targets without atomics there is no cross-thread transfer, so +// treating this wrapper as `Send` is equivalent to thread-local usage. +unsafe impl Send for ThreadLocalWrapper {} + +/// Wrapper around [`Lazy`] adding `Send + Sync` when `atomics` is not enabled. +pub(crate) struct LazyCell T>(ThreadLocalWrapper>); + +impl LazyCell { + pub const fn new(init: F) -> Self { + Self(ThreadLocalWrapper(Lazy::new(init))) + } +} + +impl Deref for LazyCell { + type Target = T; + + fn deref(&self) -> &T { + Lazy::force(&self.0.0) + } +} diff --git a/client/test/src/time/instant.rs b/client/test/src/time/instant.rs new file mode 100644 index 00000000..9cd2b9b5 --- /dev/null +++ b/client/test/src/time/instant.rs @@ -0,0 +1,133 @@ +//! Re-implementation of [`std::time::Instant`]. +//! +//! See . + +use core::ops::Sub; +use core::time::Duration; + +use super::performance; + +/// See [`std::time::Instant`]. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Instant(Duration); + +impl Instant { + /// See [`std::time::Instant::now()`]. + /// + /// # Panics + /// + /// This call will panic if the [`Performance` object] was not found, e.g. + /// calling from a [worklet]. + /// + /// [`Performance` object]: https://developer.mozilla.org/en-US/docs/Web/API/performance_property + /// [worklet]: https://developer.mozilla.org/en-US/docs/Web/API/Worklet + #[must_use] + pub fn now() -> Self { + #[cfg(not(target_feature = "atomics"))] + let now = performance().now(); + #[cfg(target_feature = "atomics")] + let now = performance().time_origin(); + + assert!( + now.is_sign_positive(), + "negative `DOMHighResTimeStamp`s are not supported" + ); + Self(time_stamp_to_duration(now)) + } + + /// See [`std::time::Instant::duration_since()`]. + #[must_use] + pub fn duration_since(&self, earlier: Self) -> Duration { + self.checked_duration_since(earlier).unwrap_or_default() + } + + /// See [`std::time::Instant::checked_duration_since()`]. + #[must_use] + pub fn checked_duration_since(&self, earlier: Self) -> Option { + self.0.checked_sub(earlier.0) + } + + /// See [`std::time::Instant::elapsed()`]. + #[must_use] + pub fn elapsed(&self) -> Duration { + Self::now() - *self + } +} + +impl Sub for Instant { + type Output = Duration; + + /// Returns the amount of time elapsed from another instant to this one, + /// or zero duration if that instant is later than this one. + fn sub(self, rhs: Self) -> Duration { + self.duration_since(rhs) + } +} + +/// Converts a `DOMHighResTimeStamp` to a [`Duration`]. +/// +/// # Note +/// +/// Keep in mind that like [`Duration::from_secs_f64()`] this doesn't do perfect +/// rounding. +#[expect(clippy::pedantic, reason = "checked")] +fn time_stamp_to_duration(time_stamp: f64) -> Duration { + let time_stamp = F64(time_stamp); + + Duration::from_millis(time_stamp.trunc() as u64) + + Duration::from_nanos(F64(time_stamp.fract() * 1.0e6).internal_round_ties_even() as u64) +} + +/// [`f64`] `no_std` compatibility wrapper. +#[derive(Clone, Copy)] +struct F64(f64); + +impl F64 { + /// See [`f64::trunc()`]. + fn trunc(self) -> f64 { + libm::trunc(self.0) + } + + /// See [`f64::fract()`]. + fn fract(self) -> f64 { + self.0 - self.trunc() + } + + /// A specialized version of [`f64::round_ties_even()`]. [`f64`] must be + /// positive and have an exponent smaller than `52`. + /// + /// - We expect `DOMHighResTimeStamp` to always be positive. We check that + /// in [`Instant::now()`]. + /// - We only round the fractional part after multiplying it by `1e6`. A + /// fraction always has a negative exponent. `1e6` has an exponent of + /// `19`. Therefor the resulting exponent can at most be `19`. + /// + /// [`f64::round_ties_even()`]: https://doc.rust-lang.org/1.83.0/std/primitive.f64.html#method.round_ties_even + fn internal_round_ties_even(self) -> f64 { + /// Put `debug_assert!` in a function to clap `coverage(off)` on it. + /// + /// See . + fn check(this: f64) { + debug_assert!(this.is_sign_positive(), "found negative input"); + debug_assert!( + { + let exponent: u64 = this.to_bits() >> 52 & 0x7ff; + exponent < 0x3ff + 52 + }, + "found number with exponent bigger than 51" + ); + } + + check(self.0); + + // See . + + let one_over_e = 1.0 / f64::EPSILON; + // REMOVED: We don't support numbers with exponents bigger than 51. + // REMOVED: We don't support negative numbers. + // REMOVED: We don't support numbers with exponents bigger than 51. + let xplusoneovere = self.0 + one_over_e; + xplusoneovere - one_over_e + // REMOVED: We don't support negative numbers. + } +} diff --git a/client/test/src/time/mod.rs b/client/test/src/time/mod.rs new file mode 100644 index 00000000..51c24604 --- /dev/null +++ b/client/test/src/time/mod.rs @@ -0,0 +1,36 @@ +mod instant; +mod system_time; + +pub use instant::Instant; +use js_sys::{js_bindgen, js_sys}; +pub use system_time::SystemTime; +pub const UNIX_EPOCH: SystemTime = SystemTime::UNIX_EPOCH; + +#[js_sys] +extern "js-sys" { + pub type Performance; + + #[js_sys(js_embed = "performance")] + pub fn performance() -> Performance; + + #[js_sys(js_name = "now")] + pub fn now(self: &Performance) -> f64; + + #[cfg(target_feature = "atomics")] + #[js_sys(property, js_name = "timeOrigin")] + pub fn time_origin(self: &Performance) -> f64; +} + +#[js_sys(namespace = "Date")] +extern "js-sys" { + #[js_sys(js_name = "now")] + pub fn date_now() -> f64; +} + +js_bindgen::embed_js!( + module = "js_bindgen_test", + name = "performance", + "() => {{ + return globalThis.performance + }}" +); diff --git a/client/test/src/time/system_time.rs b/client/test/src/time/system_time.rs new file mode 100644 index 00000000..6de4604f --- /dev/null +++ b/client/test/src/time/system_time.rs @@ -0,0 +1,36 @@ +//! Re-implementation of [`std::time::SystemTime`]. +//! +//! See . + +use core::time::Duration; + +use super::date_now; + +/// See [`std::time::SystemTime`]. +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SystemTime(pub(crate) Duration); + +impl SystemTime { + /// See [`std::time::SystemTime::UNIX_EPOCH`]. + pub const UNIX_EPOCH: Self = Self(Duration::ZERO); + + /// See [`std::time::SystemTime::now()`]. + #[must_use] + pub fn now() -> Self { + #[expect(clippy::cast_possible_truncation, reason = "checked")] + let ms = date_now() as i64; + let ms = ms.try_into().expect("found negative timestamp"); + + Self(Duration::from_millis(ms)) + } + + /// See [`std::time::SystemTime::duration_since()`]. + pub fn duration_since(&self, earlier: Self) -> Result { + // See . + self.0.checked_sub(earlier.0).ok_or(SystemTimeError) + } +} + +/// See [`std::time::SystemTimeError`]. +#[derive(Clone, Debug)] +pub struct SystemTimeError; diff --git a/host/runner/src/js/shared.mjs b/host/runner/src/js/shared.mjs index 8600ba8a..74cd304f 100644 --- a/host/runner/src/js/shared.mjs +++ b/host/runner/src/js/shared.mjs @@ -66,14 +66,6 @@ export async function runTests(module, report) { } const testFn = instance.exports[test.importName]; let result; - if (test.shouldPanic) { - report(0 /* Stream.Stdout */, [ - { text: `test ${test.name} - should panic ... `, color: 0 /* Color.Default */ }, - ]); - } - else { - report(0 /* Stream.Stdout */, [testText]); - } interceptFlag = true; try { testFn(); @@ -83,6 +75,14 @@ export async function runTests(module, report) { result = { success: false, stack: error.stack }; } interceptFlag = false; + if (test.shouldPanic) { + report(0 /* Stream.Stdout */, [ + { text: `test ${test.name} - should panic ... `, color: 0 /* Color.Default */ }, + ]); + } + else { + report(0 /* Stream.Stdout */, [testText]); + } if (test.shouldPanic) { if (result.success) { report(0 /* Stream.Stdout */, [failedText, newLineText]); diff --git a/host/runner/src/js/shared.mts b/host/runner/src/js/shared.mts index 5519371e..4686efb3 100644 --- a/host/runner/src/js/shared.mts +++ b/host/runner/src/js/shared.mts @@ -96,14 +96,6 @@ export async function runTests( const testFn = instance.exports[test.importName] as () => void let result: { success: true } | { success: false; stack: string } - if (test.shouldPanic) { - report(Stream.Stdout, [ - { text: `test ${test.name} - should panic ... `, color: Color.Default }, - ]) - } else { - report(Stream.Stdout, [testText]) - } - interceptFlag = true try { @@ -115,6 +107,14 @@ export async function runTests( interceptFlag = false + if (test.shouldPanic) { + report(Stream.Stdout, [ + { text: `test ${test.name} - should panic ... `, color: Color.Default }, + ]) + } else { + report(Stream.Stdout, [testText]) + } + if (test.shouldPanic) { if (result.success) { report(Stream.Stdout, [failedText, newLineText]) diff --git a/host/runner/src/js/test-data.d.json.ts b/host/runner/src/js/test-data.d.json.ts index ccd80bcf..d4a84c65 100644 --- a/host/runner/src/js/test-data.d.json.ts +++ b/host/runner/src/js/test-data.d.json.ts @@ -1,5 +1,6 @@ type TestData = { worker: WorkerKind + bench: boolean, noCapture: boolean filteredCount: number tests: TestEntry[] diff --git a/host/runner/src/main.rs b/host/runner/src/main.rs index e388c34c..1432055d 100644 --- a/host/runner/src/main.rs +++ b/host/runner/src/main.rs @@ -45,6 +45,9 @@ struct Cli { /// don't capture `console.*()` of each task, allow printing directly. #[arg(long, alias = "nocapture")] no_capture: bool, + /// Run benchmarks. + #[arg(long)] + bench: bool, /// Configure formatting of output. #[arg(long, value_enum)] format: Option, @@ -81,32 +84,41 @@ fn main() -> Result<()> { .with_context(|| format!("failed to read Wasm file: {}", wasm_path.display()))?; let args = TestArgs::new(cli); - let (tests, filtered_count) = TestEntry::read( + let (tests, test_filtered_count) = TestEntry::read_tests( &wasm_bytes, args.filter.as_ref(), args.ignored_only, args.exact, )?; + let (benches, bench_filtered_count) = + TestEntry::read_benches(&wasm_bytes, args.filter.as_ref(), args.exact)?; + if args.list_only { match args.list_format { Some(FormatSetting::Terse) => { for test in &tests { println!("{}: test", test.name); } + for bench in &benches { + println!("{}: benchmark", bench.name); + } } None => { for test in &tests { println!("{}: test", test.name); } + for bench in &benches { + println!("{}: benchmark", bench.name); + } println!(); - println!("{} tests, 0 benchmarks", tests.len()); + println!("{} tests, {} benchmarks", tests.len(), benches.len()); } } return Ok(()); } - if tests.is_empty() { + if !args.bench && tests.is_empty() || args.bench && benches.is_empty() { const GREEN: &str = "\u{001b}[32m"; const RESET: &str = "\u{001b}[0m"; @@ -115,16 +127,29 @@ fn main() -> Result<()> { println!(); println!( "test result: {GREEN}ok{RESET}. 0 passed; 0 failed; 0 ignored; 0 measured; \ - {filtered_count} filtered out; finished in 0.00s" + {bench_filtered_count} filtered out; finished in 0.00s" ); println!(); return Ok(()); } + let filtered_count = if args.bench { + bench_filtered_count + } else { + test_filtered_count + }; + + let tests = if args.bench { + benches + } else { + tests + }; + // The JS file has the same name, just a different file extension. let imports_path = wasm_path.with_extension("mjs"); let test_data = TestData { - no_capture: args.no_capture, + bench: args.bench, + no_capture: args.no_capture || args.bench, filtered_count, tests, }; @@ -154,6 +179,7 @@ struct TestArgs { list_format: Option, ignored_only: bool, exact: bool, + bench: bool, } impl TestArgs { @@ -165,6 +191,7 @@ impl TestArgs { list_format: cli.format, ignored_only: cli.ignored, exact: cli.exact, + bench: cli.bench, } } } @@ -172,6 +199,7 @@ impl TestArgs { #[derive(Serialize)] #[serde(rename_all = "camelCase")] struct TestData { + bench: bool, no_capture: bool, filtered_count: usize, tests: Vec, @@ -193,7 +221,67 @@ enum TestAttr { } impl TestEntry { - fn read( + fn read_benches( + wasm_bytes: &[u8], + filter: &[String], + exact: bool, + ) -> Result<(Vec, usize)> { + let mut tests = Vec::new(); + let mut total = 0; + + for payload in WasmParser::new(0).parse_all(wasm_bytes) { + if let Payload::CustomSection(section) = payload? + && section.name() == "js_bindgen.bench" + { + let mut data = section.data(); + + while !data.is_empty() { + let len = u32::from_le_bytes( + data.split_off(..4) + .context("invalid test encoding")? + .try_into()?, + ) as usize; + let data = data.split_off(..len).context("invalid test encoding")?; + + let import_name = str::from_utf8(data)?; + let name = import_name + .split_once("::") + .unwrap_or_else(|| panic!("unexpected test name: {import_name}")) + .1; + + total += 1; + + let matches_filter = filter.is_empty() + || filter.iter().any(|filter| { + if exact { + filter == name + } else { + name.contains(filter) + } + }); + + if matches_filter { + tests.push(Self { + name: name.to_string(), + import_name: import_name.to_string(), + ignore: TestAttr::None, + should_panic: TestAttr::None, + }); + } + } + + // Section with the same name can never appear again. + break; + } + } + + tests.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + let filtered_count = total - tests.len(); + + Ok((tests, filtered_count)) + } + + fn read_tests( wasm_bytes: &[u8], filter: &[String], ignored_only: bool, diff --git a/host/test-macro/src/lib.rs b/host/test-macro/src/lib.rs index 3c94b56b..165ff938 100644 --- a/host/test-macro/src/lib.rs +++ b/host/test-macro/src/lib.rs @@ -15,6 +15,16 @@ enum TestAttribute { WithText(String), } +#[proc_macro_attribute] +pub fn bench( + attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + bench_internal(attr.into(), item.into()) + .unwrap_or_else(Error::into_compile_error) + .into() +} + #[proc_macro_attribute] pub fn test( attr: proc_macro::TokenStream, @@ -25,7 +35,7 @@ pub fn test( .into() } -fn test_internal(attr: TokenStream, item: TokenStream) -> Result { +fn parse_crate(attr: TokenStream) -> Result { let mut crate_: Option = None; meta::parser(|meta| { @@ -44,6 +54,68 @@ fn test_internal(attr: TokenStream, item: TokenStream) -> Result { let crate_ = crate_.unwrap_or_else(|| parse_quote!(::js_bindgen_test)); + Ok(crate_) +} + +fn bench_internal(attr: TokenStream, item: TokenStream) -> Result { + let crate_ = parse_crate(attr)?; + let function: ItemFn = syn::parse2(item)?; + + if let Some(asyncness) = function.sig.asyncness { + return Err(Error::new_spanned( + asyncness, + "`async` benchmark not supported", + )); + } + + if let ReturnType::Type(..) = function.sig.output { + return Err(Error::new_spanned( + function.sig.output, + "benchmark with return value not supported", + )); + } + + let ident = &function.sig.ident; + let foreign_bench = quote! { + ::core::concat!(::core::module_path!(), "::", ::core::stringify!(#ident)) + }; + + Ok(quote! { + #function + + const _: () = { + const TEST: &::core::primitive::str = #foreign_bench; + const TEST_LEN: ::core::primitive::usize = ::core::primitive::str::len(TEST); + const TEST_PTR: *const ::core::primitive::u8 = ::core::primitive::str::as_ptr(TEST); + const TEST_ARR: [::core::primitive::u8; TEST_LEN] = unsafe { *(TEST_PTR as *const _) }; + + + const LEN_ARR: [::core::primitive::u8; 4] = ::core::primitive::u32::to_le_bytes(TEST_LEN as u32); + + #[repr(C)] + struct Layout( + [::core::primitive::u8; 4], + [::core::primitive::u8; TEST_LEN], + ); + + #[unsafe(link_section = "js_bindgen.bench")] + static CUSTOM_SECTION: Layout = Layout(LEN_ARR, TEST_ARR); + }; + + const _: () = { + #[unsafe(export_name = #foreign_bench)] + extern "C" fn __jbg_bench() { + #crate_::set_panic_hook(); + let mut bencher = Criterion::default() + .with_location(file!(), module_path!()); + #ident(&mut bencher); + } + }; + }) +} + +fn test_internal(attr: TokenStream, item: TokenStream) -> Result { + let crate_ = parse_crate(attr)?; let mut function: ItemFn = syn::parse2(item)?; let span = function.span(); let mut ignore = TestAttribute::None;