diff --git a/client/js-sys/benches/foo.rs b/client/js-sys/benches/foo.rs
new file mode 100644
index 00000000..f6ac7cd9
--- /dev/null
+++ b/client/js-sys/benches/foo.rs
@@ -0,0 +1,17 @@
+use js_bindgen_test::{Criterion, bench};
+use js_sys::js_sys;
+
+js_bindgen::embed_js!(module = "foo", name = "bench", "(value) => value");
+
+#[js_sys]
+extern "js-sys" {
+	#[js_sys(js_embed = "bench")]
+	fn val(value: u128) -> u128;
+}
+
+#[bench]
+fn bench_foo(c: &mut Criterion) {
+	c.bench_function("i64,i64", |b| b.iter(|| {
+        assert_eq!(val(1), 1);
+    }));
+}
diff --git a/client/js-sys/src/numeric.rs b/client/js-sys/src/numeric.rs
index 583436e5..1530fb58 100644
--- a/client/js-sys/src/numeric.rs
+++ b/client/js-sys/src/numeric.rs
@@ -182,13 +182,13 @@ unsafe impl Output for u128 {
 	const ASM_TYPE: &str = ASM_PTR_TYPE;
 	const ASM_CONV: Option<OutputAsmConv> = Some(OutputAsmConv {
 		import: Some(const_concat!(
-			".functype js_sys.numeric.128 (i32, i32, i32, i32, ",
+			".functype js_sys.numeric.128 (i64, i64,",
 			ASM_PTR_TYPE,
 			") -> ()"
 		)),
 		direct: false,
 		conv: "call js_sys.numeric.128",
-		r#type: "i32, i32, i32, i32",
+		r#type: "i64,i64",
 	});
 	const JS_CONV: Option<OutputJsConv> = Some(OutputJsConv {
 		embed: Some(("js_sys", "numeric.128.encode")),
@@ -239,13 +239,13 @@ unsafe impl Output for i128 {
 	const ASM_TYPE: &str = ASM_PTR_TYPE;
 	const ASM_CONV: Option<OutputAsmConv> = Some(OutputAsmConv {
 		import: Some(const_concat!(
-			".functype js_sys.numeric.128 (i32, i32, i32, i32, ",
+			".functype js_sys.numeric.128 (i64, i64,",
 			ASM_PTR_TYPE,
 			") -> ()"
 		)),
 		direct: false,
 		conv: "call js_sys.numeric.128",
-		r#type: "i32, i32, i32, i32",
+		r#type: "i64,i64",
 	});
 	const JS_CONV: Option<OutputJsConv> = Some(OutputJsConv {
 		embed: Some(("js_sys", "numeric.128.encode")),
@@ -270,35 +270,47 @@ const _: () = {
 js_bindgen::embed_js!(
 	module = "js_sys",
 	name = "numeric.128.encode",
-	"(value) => {{",
-	"	const lo_lo = Number(value & 0xFFFFFFFFn)",
-	"	const lo_hi = Number((value >> 32n) & 0xFFFFFFFFn)",
-	"	const hi_lo = Number((value >> 64n) & 0xFFFFFFFFn)",
-	"	const hi_hi = Number((value >> 96n) & 0xFFFFFFFFn)",
-	"	return [lo_lo, lo_hi, hi_lo, hi_hi]",
+    "(value) => {{",
+	"	const lo = BigInt.asIntN(64, value & 0xFFFFFFFFFFFFFFFFn)",
+	"	const hi = BigInt.asIntN(64, value >> 64n)",
+	"	return [lo, hi]",
 	"}}",
 );
 
 js_bindgen::unsafe_embed_asm!(
 	".globl js_sys.numeric.128",
 	"js_sys.numeric.128:",
-	"	.functype js_sys.numeric.128 (i32, i32, i32, i32, {}) -> ()",
-	"	local.get 4",
+	"	.functype js_sys.numeric.128 (i64, i64, {}) -> ()",
+	"	local.get 2",
 	"	local.get 0",
-	"	i32.store 0",
-	"	local.get 4",
-	"	local.get 1",
-	"	i32.store 4",
-	"	local.get 4",
+	"	i64.store 0",
 	"	local.get 2",
-	"	i32.store 8",
-	"	local.get 4",
-	"	local.get 3",
-	"	i32.store 12",
+	"	local.get 1",
+	"	i64.store 8",
 	"	end_function",
 	interpolate ASM_PTR_TYPE,
 );
 
+// js_bindgen::unsafe_embed_asm!(
+// 	".globl js_sys.numeric.128",
+// 	"js_sys.numeric.128:",
+// 	"	.functype js_sys.numeric.128 (i32, i32, i32, i32, {}) -> ()",
+// 	"	local.get 4",
+// 	"	local.get 0",
+// 	"	i32.store 0",
+// 	"	local.get 4",
+// 	"	local.get 1",
+// 	"	i32.store 4",
+// 	"	local.get 4",
+// 	"	local.get 2",
+// 	"	i32.store 8",
+// 	"	local.get 4",
+// 	"	local.get 3",
+// 	"	i32.store 12",
+// 	"	end_function",
+// 	interpolate ASM_PTR_TYPE,
+// );
+
 #[cfg(target_arch = "wasm32")]
 delegate!(u32, *const T:<T>);
 #[cfg(target_arch = "wasm64")]
diff --git a/client/test/Cargo.toml b/client/test/Cargo.toml
index 6c34b62a..b9c323f7 100644
--- a/client/test/Cargo.toml
+++ b/client/test/Cargo.toml
@@ -7,6 +7,17 @@ rust-version = "1.87"
 [dependencies]
 js-bindgen-test-macro = { workspace = true }
 js-sys = { workspace = true, features = ["macro"] }
+web-sys = { workspace = true }
+
+async-trait = "0.1.89"
+cast = "0.3"
+libm = "0.2.11"
+nu-ansi-term = { version = "0.50", default-features = false }
+num-traits = { version = "0.2", default-features = false, features = ["libm"] }
+once_cell = "1.21.4"
+oorandom = "11.1.5"
+serde = { version = "1.0", default-features = false, features = ["derive"] }
+serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
 
 [lints]
 workspace = true
diff --git a/client/test/src/criterion/analysis.rs b/client/test/src/criterion/analysis.rs
new file mode 100644
index 00000000..e86ef234
--- /dev/null
+++ b/client/test/src/criterion/analysis.rs
@@ -0,0 +1,157 @@
+use alloc::vec::Vec;
+
+use super::benchmark::BenchmarkConfig;
+use super::estimate::{
+	ConfidenceInterval, Distributions, Estimate, Estimates, PointEstimates, build_estimates,
+};
+use super::measurement::Measurement;
+use super::report::{BenchmarkId, Report};
+use super::routine::Routine;
+use super::stats::bivariate::Data;
+use super::stats::bivariate::regression::Slope;
+use super::stats::univariate::Sample;
+use super::stats::{Distribution, Tails};
+use super::{Criterion, SavedSample, baseline, compare};
+
+// Common analysis procedure
+pub(crate) async fn common<M: Measurement>(
+	id: &BenchmarkId,
+	routine: &mut dyn Routine<M>,
+	config: &BenchmarkConfig,
+	criterion: &Criterion<M>,
+) {
+	criterion.report.benchmark_start(id);
+
+	let (sampling_mode, iters, times);
+	let sample = routine
+		.sample(&criterion.measurement, id, config, criterion)
+		.await;
+	sampling_mode = sample.0;
+	iters = sample.1;
+	times = sample.2;
+
+	criterion.report.analysis(id);
+
+	if times.contains(&0.0) {
+		return;
+	}
+
+	let avg_times = iters
+		.iter()
+		.zip(times.iter())
+		.map(|(&iters, &elapsed)| elapsed / iters)
+		.collect::<Vec<f64>>();
+	let avg_times = Sample::new(&avg_times);
+	let labeled_sample = super::stats::univariate::outliers::tukey::classify(avg_times);
+
+	let data = Data::new(&iters, &times);
+	let (mut distributions, mut estimates) = estimates(avg_times, config);
+	if sampling_mode.is_linear() {
+		let (distribution, slope) = regression(&data, config);
+
+		estimates.slope = Some(slope);
+		distributions.slope = Some(distribution);
+	}
+
+	let comparison = compare::common(id, avg_times, config).map(
+		|(t_value, t_distribution, relative_estimates, ..)| {
+			let p_value = t_distribution.p_value(t_value, Tails::Two);
+			super::report::ComparisonData {
+				p_value,
+				relative_estimates,
+				significance_threshold: config.significance_level,
+				noise_threshold: config.noise_threshold,
+			}
+		},
+	);
+
+	let measurement_data = super::report::MeasurementData {
+		avg_times: labeled_sample,
+		absolute_estimates: estimates.clone(),
+		comparison,
+	};
+
+	criterion
+		.report
+		.measurement_complete(id, &measurement_data, criterion.measurement.formatter());
+
+	baseline::write(
+		id.desc(),
+		baseline::BenchmarkBaseline {
+			file: criterion.location.as_ref().map(|l| l.file.clone()),
+			module_path: criterion.location.as_ref().map(|l| l.module_path.clone()),
+			iters: data.x().as_ref().to_vec(),
+			times: data.y().as_ref().to_vec(),
+			sample: SavedSample {
+				sampling_mode,
+				iters: data.x().as_ref().to_vec(),
+				times: data.y().as_ref().to_vec(),
+			},
+			estimates,
+		},
+	);
+}
+
+// Performs a simple linear regression on the sample
+fn regression(
+	data: &Data<'_, f64, f64>,
+	config: &BenchmarkConfig,
+) -> (Distribution<f64>, Estimate) {
+	let cl = config.confidence_level;
+
+	let distribution = data.bootstrap(config.nresamples, |d| (Slope::fit(&d).0,)).0;
+
+	let point = Slope::fit(data);
+	let (lb, ub) = distribution.confidence_interval(config.confidence_level);
+	let se = distribution.std_dev(None);
+
+	(
+		distribution,
+		Estimate {
+			confidence_interval: ConfidenceInterval {
+				confidence_level: cl,
+				lower_bound: lb,
+				upper_bound: ub,
+			},
+			point_estimate: point.0,
+			standard_error: se,
+		},
+	)
+}
+
+// Estimates the statistics of the population from the sample
+fn estimates(avg_times: &Sample<f64>, config: &BenchmarkConfig) -> (Distributions, Estimates) {
+	fn stats(sample: &Sample<f64>) -> (f64, f64, f64, f64) {
+		let mean = sample.mean();
+		let std_dev = sample.std_dev(Some(mean));
+		let median = sample.percentiles().median();
+		let mad = sample.median_abs_dev(Some(median));
+
+		(mean, std_dev, median, mad)
+	}
+
+	let cl = config.confidence_level;
+	let nresamples = config.nresamples;
+
+	let (mean, std_dev, median, mad) = stats(avg_times);
+	let points = PointEstimates {
+		mean,
+		median,
+		std_dev,
+		median_abs_dev: mad,
+	};
+
+	let (dist_mean, dist_stddev, dist_median, dist_mad) = avg_times.bootstrap(nresamples, stats);
+
+	let distributions = Distributions {
+		mean: dist_mean,
+		slope: None,
+		median: dist_median,
+		median_abs_dev: dist_mad,
+		std_dev: dist_stddev,
+	};
+
+	let estimates = build_estimates(&distributions, &points, cl);
+
+	(distributions, estimates)
+}
diff --git a/client/test/src/criterion/baseline.rs b/client/test/src/criterion/baseline.rs
new file mode 100644
index 00000000..b5292700
--- /dev/null
+++ b/client/test/src/criterion/baseline.rs
@@ -0,0 +1,61 @@
+//! Record previous benchmark data
+
+use alloc::collections::BTreeMap;
+use alloc::string::String;
+use alloc::vec::Vec;
+use core::cell::RefCell;
+
+use serde::{Deserialize, Serialize};
+
+use super::SavedSample;
+use super::estimate::Estimates;
+use crate::LazyCell;
+
+#[cfg_attr(target_feature = "atomics", thread_local)]
+static BASELINE: LazyCell<RefCell<BTreeMap<String, BenchmarkBaseline>>> =
+	LazyCell::new(|| RefCell::new(BTreeMap::new()));
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub(crate) struct BenchmarkBaseline {
+	pub(crate) file: Option<String>,
+	pub(crate) module_path: Option<String>,
+	pub(crate) iters: Vec<f64>,
+	pub(crate) times: Vec<f64>,
+	pub(crate) sample: SavedSample,
+	pub(crate) estimates: Estimates,
+}
+
+/// Write the corresponding benchmark ID and corresponding data into the table.
+pub(crate) fn write(id: &str, baseline: BenchmarkBaseline) {
+	BASELINE.borrow_mut().insert(id.into(), baseline);
+}
+
+/// Read the data corresponding to the benchmark ID from the table.
+pub(crate) fn read(id: &str) -> Option<BenchmarkBaseline> {
+	BASELINE.borrow().get(id).cloned()
+}
+
+// /// Used to write previous benchmark data before the benchmark, for later
+// /// comparison.
+// #[wasm_bindgen]
+// pub fn __wbgbench_import(baseline: Vec<u8>) {
+// 	match serde_json::from_slice(&baseline) {
+// 		Ok(prev) => {
+// 			*BASELINE.borrow_mut() = prev;
+// 		}
+// 		Err(e) => {
+// 			console_log!("Failed to import previous benchmark {e:?}");
+// 		}
+// 	}
+// }
+//
+// /// Used to read benchmark data, and then the runner stores it on the local
+// /// disk.
+// #[wasm_bindgen]
+// pub fn __wbgbench_dump() -> Option<Vec<u8>> {
+// 	let baseline = BASELINE.borrow();
+// 	if baseline.is_empty() {
+// 		return None;
+// 	}
+// 	serde_json::to_vec(&*baseline).ok()
+// }
diff --git a/client/test/src/criterion/bencher.rs b/client/test/src/criterion/bencher.rs
new file mode 100644
index 00000000..57977cb1
--- /dev/null
+++ b/client/test/src/criterion/bencher.rs
@@ -0,0 +1,190 @@
+use core::future::Future;
+use core::hint::black_box;
+use core::time::Duration;
+
+use super::measurement::Measurement;
+use crate::Instant;
+
+// ================================== MAINTENANCE NOTE
+// ============================================= Any changes made to either
+// Bencher or AsyncBencher will have to be replicated to the other!
+// ================================== MAINTENANCE NOTE
+// =============================================
+
+/// Timer struct used to iterate a benchmarked function and measure the runtime.
+///
+/// This struct provides different timing loops as methods. Each timing loop
+/// provides a different way to time a routine and each has advantages and
+/// disadvantages.
+///
+/// * If you want to do the iteration and measurement yourself (eg. passing the
+///   iteration count to a separate process), use [`iter_custom`].
+/// * If your routine requires no per-iteration setup and returns a value with
+///   an expensive `drop` method, use [`iter_with_large_drop`].
+/// * If your routine requires some per-iteration setup that shouldn't be timed,
+///   use [`iter_batched`] or [`iter_batched_ref`]. See [`BatchSize`] for a
+///   discussion of batch sizes. If the setup value implements `Drop` and you
+///   don't want to include the `drop` time in the measurement, use
+///   [`iter_batched_ref`], otherwise use [`iter_batched`]. These methods are
+///   also suitable for benchmarking routines which return a value with an
+///   expensive `drop` method, but are more complex than
+///   [`iter_with_large_drop`].
+/// * Otherwise, use [`iter`].
+///
+/// [`iter`]: Bencher::iter
+/// [`iter_custom`]: Bencher::iter_custom
+/// [`iter_future`]: Bencher::iter_future
+/// [`iter_custom_future`]: Bencher::iter_custom_future
+pub struct Bencher<'a, M: Measurement> {
+	pub(crate) iterated: bool,     // Have we iterated this benchmark?
+	pub(crate) iters: u64,         // Number of times to iterate this benchmark
+	pub(crate) value: Duration,    // The measured value
+	pub(crate) measurement: &'a M, // Reference to the measurement object
+	pub(crate) elapsed_time: Duration, /* How much time did it take to perform the iteration?
+	                                * Used for the warmup period. */
+}
+
+impl<M: Measurement> Bencher<'_, M> {
+	/// Times a `routine` by executing it many times and timing the total
+	/// elapsed time.
+	///
+	/// Prefer this timing loop when `routine` returns a value that doesn't have
+	/// a destructor.
+	///
+	/// # Timing model
+	///
+	/// Note that the `Bencher` also times the time required to destroy the
+	/// output of `routine()`. Therefore prefer this timing loop when the
+	/// runtime of `mem::drop(O)` is negligible compared to the runtime of the
+	/// `routine`.
+	///
+	/// ```text
+	/// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
+	/// ```
+	#[inline(never)]
+	pub fn iter<O, R>(&mut self, mut routine: R)
+	where
+		R: FnMut() -> O,
+	{
+		self.iterated = true;
+		let start = self.measurement.start();
+		for _ in 0..self.iters {
+			black_box(routine());
+		}
+		let end = self.measurement.end(start);
+		self.value = end;
+		self.elapsed_time = end;
+	}
+
+	/// Times a `routine` by executing it many times and relying on `routine` to
+	/// measure its own execution time.
+	///
+	/// # Timing model
+	/// Custom, the timing model is whatever is returned as the [`Duration`]
+	/// from `routine`.
+	///
+	/// # Example
+	/// ```rust
+	/// use wasm_bindgen_test::{Criterion, Instant, wasm_bindgen_bench};
+	///
+	/// fn foo() {
+	/// 	// ...
+	/// }
+	///
+	/// #[wasm_bindgen_bench]
+	/// fn bench(c: &mut Criterion) {
+	/// 	c.bench_function("iter", move |b| {
+	/// 		b.iter_custom(|iters| {
+	/// 			let start = Instant::now();
+	/// 			for _i in 0..iters {
+	/// 				std::hint::black_box(foo());
+	/// 			}
+	/// 			start.elapsed()
+	/// 		})
+	/// 	});
+	/// }
+	/// ```
+	#[inline(never)]
+	pub fn iter_custom<R>(&mut self, mut routine: R)
+	where
+		R: FnMut(u64) -> Duration,
+	{
+		self.iterated = true;
+		let time_start = Instant::now();
+		self.value = routine(self.iters);
+		self.elapsed_time = time_start.elapsed();
+	}
+
+	/// Times a `routine` by executing it many times and timing the total
+	/// elapsed time.
+	///
+	/// Prefer this timing loop when `routine` returns a value that doesn't have
+	/// a destructor.
+	///
+	/// # Timing model
+	///
+	/// Note that the `Bencher` also times the time required to destroy the
+	/// output of `routine()`. Therefore prefer this timing loop when the
+	/// runtime of `mem::drop(O)` is negligible compared to the runtime of the
+	/// `routine`.
+	///
+	/// ```text
+	/// elapsed = Instant::now + iters * (routine + mem::drop(O) + Range::next)
+	/// ```
+	#[inline(never)]
+	pub async fn iter_future<O, R, Fut>(&mut self, mut routine: R)
+	where
+		R: FnMut() -> Fut,
+		Fut: Future<Output = O>,
+	{
+		self.iterated = true;
+		let start = self.measurement.start();
+		for _ in 0..self.iters {
+			black_box(routine().await);
+		}
+		let end = self.measurement.end(start);
+		self.value = end;
+		self.elapsed_time = end;
+	}
+
+	/// Times a `routine` by executing it many times and relying on `routine` to
+	/// measure its own execution time.
+	///
+	/// # Timing model
+	/// Custom, the timing model is whatever is returned as the [`Duration`]
+	/// from `routine`.
+	///
+	/// # Example
+	/// ```rust
+	/// use wasm_bindgen_test::{Criterion, Instant, wasm_bindgen_bench};
+	///
+	/// async fn foo() {
+	/// 	// ...
+	/// }
+	///
+	/// #[wasm_bindgen_bench]
+	/// async fn bench(c: &mut Criterion) {
+	/// 	c.bench_async_function("iter", move |b| {
+	/// 		Box::pin(b.iter_custom_future(async |iters| {
+	/// 			let start = Instant::now();
+	/// 			for _i in 0..iters {
+	/// 				std::hint::black_box(foo().await);
+	/// 			}
+	/// 			start.elapsed()
+	/// 		}))
+	/// 	})
+	/// 	.await;
+	/// }
+	/// ```
+	#[inline(never)]
+	pub async fn iter_custom_future<R, Fut>(&mut self, mut routine: R)
+	where
+		R: FnMut(u64) -> Fut,
+		Fut: Future<Output = Duration>,
+	{
+		self.iterated = true;
+		let time_start = Instant::now();
+		self.value = routine(self.iters).await;
+		self.elapsed_time = time_start.elapsed();
+	}
+}
diff --git a/client/test/src/criterion/benchmark.rs b/client/test/src/criterion/benchmark.rs
new file mode 100644
index 00000000..d81a6947
--- /dev/null
+++ b/client/test/src/criterion/benchmark.rs
@@ -0,0 +1,17 @@
+use core::time::Duration;
+
+use super::SamplingMode;
+
+// TODO: Move the benchmark config stuff to a separate module for easier use.
+
+/// Struct containing all of the configuration options for a benchmark.
+pub struct BenchmarkConfig {
+	pub confidence_level: f64,
+	pub measurement_time: Duration,
+	pub noise_threshold: f64,
+	pub nresamples: usize,
+	pub sample_size: usize,
+	pub significance_level: f64,
+	pub warm_up_time: Duration,
+	pub sampling_mode: SamplingMode,
+}
diff --git a/client/test/src/criterion/compare.rs b/client/test/src/criterion/compare.rs
new file mode 100644
index 00000000..93569051
--- /dev/null
+++ b/client/test/src/criterion/compare.rs
@@ -0,0 +1,111 @@
+use alloc::vec::Vec;
+
+use super::SavedSample;
+use super::benchmark::BenchmarkConfig;
+use super::estimate::{
+	ChangeDistributions, ChangeEstimates, ChangePointEstimates, Estimates, build_change_estimates,
+};
+use super::report::BenchmarkId;
+use super::stats::Distribution;
+use super::stats::univariate::{self, Sample, mixed};
+
+type ComparisonResult = (
+	f64,
+	Distribution<f64>,
+	ChangeEstimates,
+	ChangeDistributions,
+	Vec<f64>,
+	Vec<f64>,
+	Vec<f64>,
+	Estimates,
+);
+
+// Common comparison procedure
+pub(crate) fn common(
+	id: &BenchmarkId,
+	avg_times: &Sample<f64>,
+	config: &BenchmarkConfig,
+) -> Option<ComparisonResult> {
+	let prev = super::baseline::read(id.desc())?;
+	let SavedSample { iters, times, .. } = prev.sample;
+	let base_estimates: Estimates = prev.estimates;
+
+	let base_avg_times: Vec<f64> = iters
+		.iter()
+		.zip(times.iter())
+		.map(|(iters, elapsed)| elapsed / iters)
+		.collect();
+	let base_avg_time_sample = Sample::new(&base_avg_times);
+
+	let (t_statistic, t_distribution) = t_test(avg_times, base_avg_time_sample, config);
+
+	let (estimates, relative_distributions) = estimates(avg_times, base_avg_time_sample, config);
+	Some((
+		t_statistic,
+		t_distribution,
+		estimates,
+		relative_distributions,
+		iters,
+		times,
+		base_avg_times.clone(),
+		base_estimates,
+	))
+}
+
+// Performs a two sample t-test
+fn t_test(
+	avg_times: &Sample<f64>,
+	base_avg_times: &Sample<f64>,
+	config: &BenchmarkConfig,
+) -> (f64, Distribution<f64>) {
+	let nresamples = config.nresamples;
+
+	let t_statistic = avg_times.t(base_avg_times);
+	let t_distribution =
+		mixed::bootstrap(avg_times, base_avg_times, nresamples, |a, b| (a.t(b),)).0;
+
+	// HACK: Filter out non-finite numbers, which can happen sometimes when sample
+	// size is very small. Downstream code doesn't like non-finite values here.
+	let t_distribution = Distribution::from(
+		t_distribution
+			.iter()
+			.filter(|a| a.is_finite())
+			.copied()
+			.collect::<Vec<_>>()
+			.into_boxed_slice(),
+	);
+
+	(t_statistic, t_distribution)
+}
+
+// Estimates the relative change in the statistics of the population
+fn estimates(
+	avg_times: &Sample<f64>,
+	base_avg_times: &Sample<f64>,
+	config: &BenchmarkConfig,
+) -> (ChangeEstimates, ChangeDistributions) {
+	fn stats(a: &Sample<f64>, b: &Sample<f64>) -> (f64, f64) {
+		(
+			a.mean() / b.mean() - 1.,
+			a.percentiles().median() / b.percentiles().median() - 1.,
+		)
+	}
+
+	let cl = config.confidence_level;
+	let nresamples = config.nresamples;
+
+	let (dist_mean, dist_median) =
+		univariate::bootstrap(avg_times, base_avg_times, nresamples, stats);
+
+	let distributions = ChangeDistributions {
+		mean: dist_mean,
+		median: dist_median,
+	};
+
+	let (mean, median) = stats(avg_times, base_avg_times);
+	let points = ChangePointEstimates { mean, median };
+
+	let estimates = build_change_estimates(&distributions, &points, cl);
+
+	(estimates, distributions)
+}
diff --git a/client/test/src/criterion/estimate.rs b/client/test/src/criterion/estimate.rs
new file mode 100644
index 00000000..d1b12e34
--- /dev/null
+++ b/client/test/src/criterion/estimate.rs
@@ -0,0 +1,118 @@
+use serde::{Deserialize, Serialize};
+
+use super::stats::Distribution;
+
+#[derive(Clone, PartialEq, Deserialize, Serialize, Debug)]
+pub struct ConfidenceInterval {
+	pub confidence_level: f64,
+	pub lower_bound: f64,
+	pub upper_bound: f64,
+}
+
+#[derive(Clone, PartialEq, Deserialize, Serialize, Debug)]
+pub struct Estimate {
+	/// The confidence interval for this estimate
+	pub confidence_interval: ConfidenceInterval,
+	/// The value of this estimate
+	pub point_estimate: f64,
+	/// The standard error of this estimate
+	pub standard_error: f64,
+}
+
+pub fn build_estimates(
+	distributions: &Distributions,
+	points: &PointEstimates,
+	cl: f64,
+) -> Estimates {
+	let to_estimate = |point_estimate, distribution: &Distribution<f64>| {
+		let (lb, ub) = distribution.confidence_interval(cl);
+
+		Estimate {
+			confidence_interval: ConfidenceInterval {
+				confidence_level: cl,
+				lower_bound: lb,
+				upper_bound: ub,
+			},
+			point_estimate,
+			standard_error: distribution.std_dev(None),
+		}
+	};
+
+	Estimates {
+		mean: to_estimate(points.mean, &distributions.mean),
+		median: to_estimate(points.median, &distributions.median),
+		median_abs_dev: to_estimate(points.median_abs_dev, &distributions.median_abs_dev),
+		slope: None,
+		std_dev: to_estimate(points.std_dev, &distributions.std_dev),
+	}
+}
+
+pub fn build_change_estimates(
+	distributions: &ChangeDistributions,
+	points: &ChangePointEstimates,
+	cl: f64,
+) -> ChangeEstimates {
+	let to_estimate = |point_estimate, distribution: &Distribution<f64>| {
+		let (lb, ub) = distribution.confidence_interval(cl);
+
+		Estimate {
+			confidence_interval: ConfidenceInterval {
+				confidence_level: cl,
+				lower_bound: lb,
+				upper_bound: ub,
+			},
+			point_estimate,
+			standard_error: distribution.std_dev(None),
+		}
+	};
+
+	ChangeEstimates {
+		mean: to_estimate(points.mean, &distributions.mean),
+		median: to_estimate(points.median, &distributions.median),
+	}
+}
+
+pub struct PointEstimates {
+	pub mean: f64,
+	pub median: f64,
+	pub median_abs_dev: f64,
+	pub std_dev: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Estimates {
+	pub mean: Estimate,
+	pub median: Estimate,
+	pub median_abs_dev: Estimate,
+	pub slope: Option<Estimate>,
+	pub std_dev: Estimate,
+}
+impl Estimates {
+	pub fn typical(&self) -> &Estimate {
+		self.slope.as_ref().unwrap_or(&self.mean)
+	}
+}
+
+pub struct Distributions {
+	pub mean: Distribution<f64>,
+	pub median: Distribution<f64>,
+	pub median_abs_dev: Distribution<f64>,
+	pub slope: Option<Distribution<f64>>,
+	pub std_dev: Distribution<f64>,
+}
+
+pub struct ChangePointEstimates {
+	pub mean: f64,
+	pub median: f64,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ChangeEstimates {
+	pub mean: Estimate,
+	pub median: Estimate,
+}
+
+pub struct ChangeDistributions {
+	pub mean: Distribution<f64>,
+	pub median: Distribution<f64>,
+}
diff --git a/client/test/src/criterion/format.rs b/client/test/src/criterion/format.rs
new file mode 100644
index 00000000..abd06ad1
--- /dev/null
+++ b/client/test/src/criterion/format.rs
@@ -0,0 +1,79 @@
+use alloc::format;
+use alloc::string::String;
+
+use libm::{fabs, pow};
+
+pub fn change(pct: f64, signed: bool) -> String {
+	if signed {
+		format!("{:>+6}%", signed_short(pct * 1e2))
+	} else {
+		format!("{:>6}%", short(pct * 1e2))
+	}
+}
+
+pub fn time(ns: f64) -> String {
+	if ns < 1.0 {
+		format!("{:>6} ps", short(ns * 1e3))
+	} else if ns < pow(10f64, 3f64) {
+		format!("{:>6} ns", short(ns))
+	} else if ns < pow(10f64, 6f64) {
+		format!("{:>6} µs", short(ns / 1e3))
+	} else if ns < pow(10f64, 9f64) {
+		format!("{:>6} ms", short(ns / 1e6))
+	} else {
+		format!("{:>6} s", short(ns / 1e9))
+	}
+}
+
+pub fn short(n: f64) -> String {
+	if n < 10.0 {
+		format!("{n:.4}")
+	} else if n < 100.0 {
+		format!("{n:.3}")
+	} else if n < 1000.0 {
+		format!("{n:.2}")
+	} else if n < 10000.0 {
+		format!("{n:.1}")
+	} else {
+		format!("{n:.0}")
+	}
+}
+
+fn signed_short(n: f64) -> String {
+	let n_abs = fabs(n);
+
+	let sign = if n >= 0.0 { '+' } else { '\u{2212}' };
+	if n_abs < 10.0 {
+		format!("{sign}{n_abs:.4}")
+	} else if n_abs < 100.0 {
+		format!("{sign}{n_abs:.3}")
+	} else if n_abs < 1000.0 {
+		format!("{sign}{n_abs:.2}")
+	} else if n_abs < 10000.0 {
+		format!("{sign}{n_abs:.1}")
+	} else {
+		format!("{sign}{n_abs:.0}")
+	}
+}
+
+pub fn iter_count(iterations: u64) -> String {
+	if iterations < 10_000 {
+		format!("{iterations} iterations")
+	} else if iterations < 1_000_000 {
+		format!("{:.0}k iterations", (iterations as f64) / 1000.0)
+	} else if iterations < 10_000_000 {
+		format!("{:.1}M iterations", (iterations as f64) / (1000.0 * 1000.0))
+	} else if iterations < 1_000_000_000 {
+		format!("{:.0}M iterations", (iterations as f64) / (1000.0 * 1000.0))
+	} else if iterations < 10_000_000_000 {
+		format!(
+			"{:.1}B iterations",
+			(iterations as f64) / (1000.0 * 1000.0 * 1000.0)
+		)
+	} else {
+		format!(
+			"{:.0}B iterations",
+			(iterations as f64) / (1000.0 * 1000.0 * 1000.0)
+		)
+	}
+}
diff --git a/client/test/src/criterion/measurement.rs b/client/test/src/criterion/measurement.rs
new file mode 100644
index 00000000..7362b82c
--- /dev/null
+++ b/client/test/src/criterion/measurement.rs
@@ -0,0 +1,275 @@
+//! This module defines a set of traits that can be used to plug different
+//! measurements (eg. Unix's Processor Time, CPU or GPU performance counters,
+//! etc.) into Criterion.rs. It also includes the [`WallTime`] struct which
+//! defines the default wall-clock time measurement.
+use alloc::format;
+use alloc::string::String;
+use core::time::Duration;
+
+use libm::pow;
+
+use super::Throughput;
+use super::format::short;
+use crate::Instant;
+
+/// Trait providing functions to format measured values to string so that they
+/// can be displayed on the command line or in the reports. The functions of
+/// this trait take measured values in f64 form; implementors can assume that
+/// the values are of the same scale as those produced by the associated
+/// [`Measurement`] (eg. if your measurement produces values in nanoseconds, the
+/// values passed to the formatter will be in nanoseconds).
+///
+/// Implementors are encouraged to format the values in a way that is intuitive
+/// for humans and uses the SI prefix system. For example, the format used by
+/// [`WallTime`] can display the value in units ranging from picoseconds to
+/// seconds depending on the magnitude of the elapsed time in nanoseconds.
+pub trait ValueFormatter {
+	/// Format the value (with appropriate unit) and return it as a string.
+	fn format_value(&self, value: f64) -> String {
+		let mut values = [value];
+		let unit = self.scale_values(value, &mut values);
+		format!("{:>6} {}", short(values[0]), unit)
+	}
+
+	/// Format the value as a throughput measurement. The value represents the
+	/// measurement value; the implementor will have to calculate bytes per
+	/// second, iterations per cycle, etc.
+	fn format_throughput(&self, throughput: &Throughput, value: f64) -> String {
+		let mut values = [value];
+		let unit = self.scale_throughputs(value, throughput, &mut values);
+		format!("{:>6} {}", short(values[0]), unit)
+	}
+
+	/// Scale the given values to some appropriate unit and return the unit
+	/// string.
+	///
+	/// The given typical value should be used to choose the unit. This function
+	/// may be called multiple times with different datasets; the typical value
+	/// will remain the same to ensure that the units remain consistent within
+	/// a graph. The typical value will not be NaN. Values will not contain NaN
+	/// as input, and the transformed values must not contain NaN.
+	fn scale_values(&self, typical_value: f64, values: &mut [f64]) -> &'static str;
+
+	/// Convert the given measured values into throughput numbers based on the
+	/// given throughput value, scale them to some appropriate unit, and return
+	/// the unit string.
+	///
+	/// The given typical value should be used to choose the unit. This function
+	/// may be called multiple times with different datasets; the typical value
+	/// will remain the same to ensure that the units remain consistent within
+	/// a graph. The typical value will not be NaN. Values will not contain NaN
+	/// as input, and the transformed values must not contain NaN.
+	fn scale_throughputs(
+		&self,
+		typical_value: f64,
+		throughput: &Throughput,
+		values: &mut [f64],
+	) -> &'static str;
+
+	/// Scale the values and return a unit string designed for machines.
+	///
+	/// For example, this is used for the CSV file output. Implementations
+	/// should modify the given values slice to apply the desired scaling (if
+	/// any) and return a string representing the unit the modified values are
+	/// in.
+	fn scale_for_machines(&self, values: &mut [f64]) -> &'static str;
+}
+
+/// Trait for all types which define something Criterion.rs can measure. The
+/// only measurement currently provided is [`WallTime`], but third party crates
+/// or benchmarks may define more.
+///
+/// This trait defines two core methods, `start` and `end`. `start` is called at
+/// the beginning of a measurement to produce some intermediate value (for
+/// example, the wall-clock time at the start of that set of iterations) and
+/// `end` is called at the end of the measurement with the value returned by
+/// `start`.
+pub trait Measurement {
+	/// This type represents an intermediate value for the measurements. It will
+	/// be produced by the start function and passed to the end function. An
+	/// example might be the wall-clock time as of the `start` call.
+	type Intermediate;
+
+	/// Criterion.rs will call this before iterating the benchmark.
+	fn start(&self) -> Self::Intermediate;
+
+	/// Criterion.rs will call this after iterating the benchmark to get the
+	/// measured value.
+	fn end(&self, i: Self::Intermediate) -> Duration;
+
+	/// Combine two values. Criterion.rs sometimes needs to perform measurements
+	/// in multiple batches of iterations, so the value from one batch must be
+	/// added to the sum of the previous batches.
+	fn add(&self, v1: &Duration, v2: &Duration) -> Duration;
+
+	/// Return a "zero" value for the Value type which can be added to another
+	/// value.
+	fn zero(&self) -> Duration;
+
+	/// Converts the measured value to f64 so that it can be used in statistical
+	/// analysis.
+	fn to_f64(&self, value: &Duration) -> f64;
+
+	/// Return a trait-object reference to the value formatter for this
+	/// measurement.
+	fn formatter(&self) -> &dyn ValueFormatter;
+}
+
+/// Default Formatter
+pub(crate) struct DurationFormatter;
+
+impl DurationFormatter {
+	fn bytes_per_second(bytes: f64, typical: f64, values: &mut [f64]) -> &'static str {
+		let bytes_per_second = bytes * (1e9 / typical);
+		let (denominator, unit) = if bytes_per_second < 1024.0 {
+			(1.0, "  B/s")
+		} else if bytes_per_second < 1024.0 * 1024.0 {
+			(1024.0, "KiB/s")
+		} else if bytes_per_second < 1024.0 * 1024.0 * 1024.0 {
+			(1024.0 * 1024.0, "MiB/s")
+		} else {
+			(1024.0 * 1024.0 * 1024.0, "GiB/s")
+		};
+
+		for val in values {
+			let bytes_per_second = bytes * (1e9 / *val);
+			*val = bytes_per_second / denominator;
+		}
+
+		unit
+	}
+
+	fn bytes_per_second_decimal(bytes: f64, typical: f64, values: &mut [f64]) -> &'static str {
+		let bytes_per_second = bytes * (1e9 / typical);
+		let (denominator, unit) = if bytes_per_second < 1000.0 {
+			(1.0, "  B/s")
+		} else if bytes_per_second < 1000.0 * 1000.0 {
+			(1000.0, "KB/s")
+		} else if bytes_per_second < 1000.0 * 1000.0 * 1000.0 {
+			(1000.0 * 1000.0, "MB/s")
+		} else {
+			(1000.0 * 1000.0 * 1000.0, "GB/s")
+		};
+
+		for val in values {
+			let bytes_per_second = bytes * (1e9 / *val);
+			*val = bytes_per_second / denominator;
+		}
+
+		unit
+	}
+
+	fn elements_per_second(elems: f64, typical: f64, values: &mut [f64]) -> &'static str {
+		let elems_per_second = elems * (1e9 / typical);
+		let (denominator, unit) = if elems_per_second < 1000.0 {
+			(1.0, " elem/s")
+		} else if elems_per_second < 1000.0 * 1000.0 {
+			(1000.0, "Kelem/s")
+		} else if elems_per_second < 1000.0 * 1000.0 * 1000.0 {
+			(1000.0 * 1000.0, "Melem/s")
+		} else {
+			(1000.0 * 1000.0 * 1000.0, "Gelem/s")
+		};
+
+		for val in values {
+			let elems_per_second = elems * (1e9 / *val);
+			*val = elems_per_second / denominator;
+		}
+
+		unit
+	}
+
+	fn bits_per_second(bits: f64, typical: f64, values: &mut [f64]) -> &'static str {
+		let bits_per_second = bits * (1e9 / typical);
+		let (denominator, unit) = if bits_per_second < 1000.0 {
+			(1.0, "  b/s")
+		} else if bits_per_second < 1000.0 * 1000.0 {
+			(1000.0, "Kb/s")
+		} else if bits_per_second < 1000.0 * 1000.0 * 1000.0 {
+			(1000.0 * 1000.0, "Mb/s")
+		} else {
+			(1000.0 * 1000.0 * 1000.0, "Gb/s")
+		};
+
+		for val in values {
+			let bits_per_second = bits * (1e9 / *val);
+			*val = bits_per_second / denominator;
+		}
+
+		unit
+	}
+}
+impl ValueFormatter for DurationFormatter {
+	fn scale_throughputs(
+		&self,
+		typical: f64,
+		throughput: &Throughput,
+		values: &mut [f64],
+	) -> &'static str {
+		match *throughput {
+			Throughput::Bytes(bytes) => Self::bytes_per_second(bytes as f64, typical, values),
+			Throughput::BytesDecimal(bytes) => {
+				Self::bytes_per_second_decimal(bytes as f64, typical, values)
+			}
+			Throughput::Elements(elems) => Self::elements_per_second(elems as f64, typical, values),
+			Throughput::Bits(bits) => Self::bits_per_second(bits as f64, typical, values),
+		}
+	}
+
+	fn scale_values(&self, ns: f64, values: &mut [f64]) -> &'static str {
+		let (factor, unit) = if ns < pow(10f64, 0f64) {
+			(pow(10f64, 3f64), "ps")
+		} else if ns < pow(10f64, 3f64) {
+			(pow(10f64, 0f64), "ns")
+		} else if ns < pow(10f64, 6f64) {
+			(pow(10f64, -3f64), "µs")
+		} else if ns < pow(10f64, 9f64) {
+			(pow(10f64, -6f64), "ms")
+		} else {
+			(pow(10f64, -9f64), "s")
+		};
+
+		for val in values {
+			*val *= factor;
+		}
+
+		unit
+	}
+
+	fn scale_for_machines(&self, _values: &mut [f64]) -> &'static str {
+		// no scaling is needed
+		"ns"
+	}
+}
+
+/// `WallTime` is the default measurement in Criterion.rs. It measures the
+/// elapsed time from the beginning of a series of iterations to the end.
+pub struct WallTime;
+
+impl Measurement for WallTime {
+	type Intermediate = Instant;
+
+	fn start(&self) -> Self::Intermediate {
+		Instant::now()
+	}
+
+	fn end(&self, i: Self::Intermediate) -> Duration {
+		i.elapsed()
+	}
+
+	fn add(&self, v1: &Duration, v2: &Duration) -> Duration {
+		*v1 + *v2
+	}
+
+	fn zero(&self) -> Duration {
+		Duration::from_secs(0)
+	}
+
+	fn to_f64(&self, val: &Duration) -> f64 {
+		val.as_nanos() as f64
+	}
+
+	fn formatter(&self) -> &dyn ValueFormatter {
+		&DurationFormatter
+	}
+}
diff --git a/client/test/src/criterion/mod.rs b/client/test/src/criterion/mod.rs
new file mode 100644
index 00000000..191085d3
--- /dev/null
+++ b/client/test/src/criterion/mod.rs
@@ -0,0 +1,560 @@
+//! A statistics-driven micro-benchmarking library written in Rust.
+//!
+//! This crate is a microbenchmarking library which aims to provide strong
+//! statistical confidence in detecting and estimating the size of performance
+//! improvements and regressions, while also being easy to use.
+//!
+//! See
+//! [the user guide](https://bheisler.github.io/criterion.rs/book/index.html)
+//! for examples as well as details on the measurement and analysis process,
+//! and the output.
+//!
+//! ## Features:
+//! * Collects detailed statistics, providing strong confidence that changes to
+//!   performance are real, not measurement noise.
+//! * Produces detailed charts, providing thorough understanding of your code's
+//!   performance behavior.
+
+#![allow(
+	clippy::cast_possible_truncation,
+	clippy::cast_precision_loss,
+	clippy::cast_sign_loss,
+	clippy::iter_not_returning_iterator,
+	clippy::similar_names,
+	clippy::struct_field_names,
+	clippy::transmute_ptr_to_ptr,
+	clippy::undocumented_unsafe_blocks,
+	reason = "checked"
+)]
+
+// Needs to be declared before other modules
+// in order to be usable there.
+mod analysis;
+mod baseline;
+mod bencher;
+mod benchmark;
+mod compare;
+mod estimate;
+mod format;
+mod measurement;
+mod report;
+mod routine;
+mod stats;
+
+use alloc::boxed::Box;
+use alloc::string::String;
+use alloc::vec;
+use alloc::vec::Vec;
+use core::future::Future;
+use core::pin::Pin;
+use core::task::{Context, Poll, Waker};
+use core::time::Duration;
+
+pub use bencher::Bencher;
+use benchmark::BenchmarkConfig;
+use libm::{ceil, sqrt};
+pub use measurement::Measurement;
+use measurement::WallTime;
+use report::WasmReport;
+use serde::{Deserialize, Serialize};
+
+use crate::console_error;
+
+/// The benchmark manager
+///
+/// `Criterion` lets you configure and execute benchmarks
+///
+/// Each benchmark consists of four phases:
+///
+/// - **Warm-up**: The routine is repeatedly executed, to let the
+///   CPU/OS/JIT/interpreter adapt to the new load
+/// - **Measurement**: The routine is repeatedly executed, and timing
+///   information is collected into a sample
+/// - **Analysis**: The sample is analyzed and distilled into meaningful
+///   statistics that get reported to stdout, stored in files, and plotted
+/// - **Comparison**: The current sample is compared with the sample obtained in
+///   the previous benchmark.
+pub struct Criterion<M: Measurement = WallTime> {
+	config: BenchmarkConfig,
+	report: WasmReport,
+	measurement: M,
+	location: Option<Location>,
+}
+
+pub(crate) struct Location {
+	file: String,
+	module_path: String,
+}
+
+impl Default for Criterion {
+	/// Creates a benchmark manager with the following default settings:
+	///
+	/// - Sample size: 100 measurements
+	/// - Warm-up time: 3 s
+	/// - Measurement time: 5 s
+	/// - Bootstrap size: 100 000 resamples
+	/// - Noise threshold: 0.01 (1%)
+	/// - Confidence level: 0.95
+	/// - Significance level: 0.05
+	fn default() -> Self {
+		Self {
+			config: BenchmarkConfig {
+				confidence_level: 0.95,
+				measurement_time: Duration::from_secs(5),
+				noise_threshold: 0.01,
+				nresamples: 100_000,
+				sample_size: 100,
+				significance_level: 0.05,
+				warm_up_time: Duration::from_secs(3),
+				sampling_mode: SamplingMode::Auto,
+			},
+			report: WasmReport,
+			measurement: WallTime,
+			location: None,
+		}
+	}
+}
+
+impl<M: Measurement> Criterion<M> {
+	/// Changes the measurement for the benchmarks run with this runner. See the
+	/// [`Measurement`] trait for more details
+	pub fn with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2> {
+		// Can't use struct update syntax here because they're technically different
+		// types.
+		Criterion {
+			config: self.config,
+			report: self.report,
+			measurement: m,
+			location: self.location,
+		}
+	}
+
+	/// Configure file and module paths for use with codspeed.
+	#[must_use]
+	pub fn with_location(self, file: &str, module_path: &str) -> Self {
+		Self {
+			location: Some(Location {
+				file: file.into(),
+				module_path: module_path.into(),
+			}),
+			..self
+		}
+	}
+
+	/// Changes the default size of the sample for benchmarks run with this
+	/// runner.
+	///
+	/// A bigger sample should yield more accurate results if paired with a
+	/// sufficiently large measurement time.
+	///
+	/// Sample size must be at least 10.
+	///
+	/// # Panics
+	///
+	/// Panics if n < 10
+	#[must_use]
+	pub fn sample_size(mut self, n: usize) -> Self {
+		assert!(n >= 10);
+
+		self.config.sample_size = n;
+		self
+	}
+
+	/// Changes the default warm up time for benchmarks run with this runner.
+	///
+	/// # Panics
+	///
+	/// Panics if the input duration is zero
+	#[must_use]
+	pub fn warm_up_time(mut self, dur: Duration) -> Self {
+		assert!(dur.as_nanos() > 0);
+
+		self.config.warm_up_time = dur;
+		self
+	}
+
+	///
+	/// With a longer time, the measurement will become more resilient to
+	/// transitory peak loads caused by external programs
+	///
+	/// **Note**: If the measurement time is too "low", Criterion will
+	/// automatically increase it
+	///
+	/// # Panics
+	///
+	/// Panics if the input duration in zero
+	/// Changes the default measurement time for benchmarks run with this
+	/// runner.
+	#[must_use]
+	pub fn measurement_time(mut self, dur: Duration) -> Self {
+		assert!(dur.as_nanos() > 0);
+
+		self.config.measurement_time = dur;
+		self
+	}
+
+	/// Changes the default number of resamples for benchmarks run with this
+	/// runner.
+	///
+	/// Number of resamples to use for the
+	/// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
+	///
+	/// A larger number of resamples reduces the random sampling errors, which
+	/// are inherent to the bootstrap method, but also increases the analysis
+	/// time
+	///
+	/// # Panics
+	///
+	/// Panics if the number of resamples is set to zero
+	#[must_use]
+	pub fn nresamples(mut self, n: usize) -> Self {
+		assert!(n > 0);
+		if n <= 1000 {
+			console_error!("\nWarning: It is not recommended to reduce nresamples below 1000.");
+		}
+
+		self.config.nresamples = n;
+		self
+	}
+
+	/// Changes the default noise threshold for benchmarks run with this runner.
+	/// The noise threshold is used to filter out small changes in performance,
+	/// even if they are statistically significant. Sometimes benchmarking the
+	/// same code twice will result in small but statistically significant
+	/// differences solely because of noise. This provides a way to filter
+	/// out some of these false positives at the cost of making it harder to
+	/// detect small changes to the true performance of the benchmark.
+	///
+	/// The default is 0.01, meaning that changes smaller than 1% will be
+	/// ignored.
+	///
+	/// # Panics
+	///
+	/// Panics if the threshold is set to a negative value
+	#[must_use]
+	pub fn noise_threshold(mut self, threshold: f64) -> Self {
+		assert!(threshold >= 0.0);
+
+		self.config.noise_threshold = threshold;
+		self
+	}
+
+	/// Changes the default confidence level for benchmarks run with this
+	/// runner. The confidence level is the desired probability that the true
+	/// runtime lies within the estimated [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
+	/// 0.95, meaning that the confidence interval should capture the true value
+	/// 95% of the time.
+	///
+	/// # Panics
+	///
+	/// Panics if the confidence level is set to a value outside the `(0, 1)`
+	/// range
+	#[must_use]
+	pub fn confidence_level(mut self, cl: f64) -> Self {
+		assert!(cl > 0.0 && cl < 1.0);
+		if cl < 0.5 {
+			console_error!(
+				"\nWarning: It is not recommended to reduce confidence level below 0.5."
+			);
+		}
+
+		self.config.confidence_level = cl;
+		self
+	}
+
+	/// Changes the default [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
+	/// for benchmarks run with this runner. This is used to perform a
+	/// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
+	/// the measurements from this run are different from the measured
+	/// performance of the last run. The significance level is the desired
+	/// probability that two measurements of identical code will be considered
+	/// 'different' due to noise in the measurements. The default value is 0.05,
+	/// meaning that approximately 5% of identical benchmarks will register as
+	/// different due to noise.
+	///
+	/// This presents a trade-off. By setting the significance level closer to
+	/// 0.0, you can increase the statistical robustness against noise, but it
+	/// also weakens Criterion.rs' ability to detect small but real changes in
+	/// the performance. By setting the significance level closer to 1.0,
+	/// Criterion.rs will be more able to detect small true changes, but will
+	/// also report more spurious differences.
+	///
+	/// See also the noise threshold setting.
+	///
+	/// # Panics
+	///
+	/// Panics if the significance level is set to a value outside the `(0, 1)`
+	/// range
+	#[must_use]
+	pub fn significance_level(mut self, sl: f64) -> Self {
+		assert!(sl > 0.0 && sl < 1.0);
+
+		self.config.significance_level = sl;
+		self
+	}
+}
+
+impl<M> Criterion<M>
+where
+	M: Measurement + 'static,
+{
+	/// Benchmarks a function.
+	///
+	/// # Example
+	///
+	/// ```rust
+	/// use wasm_bindgen_test::{Criterion, wasm_bindgen_bench};
+	///
+	/// #[wasm_bindgen_bench]
+	/// fn bench(c: &mut Criterion) {
+	/// 	// Setup (construct data, allocate memory, etc)
+	/// 	c.bench_function("bench desc", |b| {
+	/// 		b.iter(|| {
+	/// 			// Code to benchmark goes here
+	/// 		})
+	/// 	});
+	/// }
+	/// ```
+	pub fn bench_function<F>(&mut self, desc: &str, f: F) -> &mut Self
+	where
+		F: FnMut(&mut Bencher<'_, M>),
+	{
+		// bench_function never be pending
+		fn block_on(f: impl Future<Output = ()>) {
+			let mut ctx = Context::from_waker(Waker::noop());
+			match core::pin::pin!(f).poll(&mut ctx) {
+				Poll::Ready(()) => (),
+				// sync functions not be pending
+				Poll::Pending => unreachable!(),
+			}
+		}
+
+		let id = report::BenchmarkId::new(desc.into());
+		block_on(analysis::common(
+			&id,
+			&mut routine::Function::new(f),
+			&self.config,
+			self,
+		));
+
+		self
+	}
+
+	/// Benchmarks a future.
+	///
+	/// # Example
+	///
+	/// ```rust
+	/// use wasm_bindgen_test::{Criterion, wasm_bindgen_bench};
+	///
+	/// #[wasm_bindgen_bench]
+	/// async fn bench(c: &mut Criterion) {
+	/// 	// Setup (construct data, allocate memory, etc)
+	/// 	c.bench_async_function("bench desc", |b| {
+	/// 		Box::pin(b.iter_future(|| async {
+	/// 			// Code to benchmark goes here
+	/// 		}))
+	/// 	})
+	/// 	.await;
+	/// }
+	/// ```
+	pub async fn bench_async_function<F>(&mut self, desc: &str, f: F) -> &mut Self
+	where
+		for<'b> F: FnMut(&'b mut Bencher<'_, M>) -> Pin<Box<dyn Future<Output = ()> + 'b>>,
+	{
+		let id = report::BenchmarkId::new(desc.into());
+		analysis::common(&id, &mut routine::AsyncFunction::new(f), &self.config, self).await;
+		self
+	}
+}
+
+/// Enum representing different ways of measuring the throughput of benchmarked
+/// code. If the throughput setting is configured for a benchmark then the
+/// estimated throughput will be reported as well as the time per iteration.
+// TODO: Remove serialize/deserialize from the public API.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub enum Throughput {
+	/// Measure throughput in terms of bytes/second. The value should be the
+	/// number of bytes processed by one iteration of the benchmarked code.
+	/// Typically, this would be the length of an input string or `&[u8]`.
+	Bytes(u64),
+
+	/// Equivalent to Bytes, but the value will be reported in terms of
+	/// kilobytes (1000 bytes) per second instead of kibibytes (1024 bytes) per
+	/// second, megabytes instead of mibibytes, and gigabytes instead of
+	/// gibibytes.
+	BytesDecimal(u64),
+
+	/// Measure throughput in terms of elements/second. The value should be the
+	/// number of elements processed by one iteration of the benchmarked code.
+	/// Typically, this would be the size of a collection, but could also be
+	/// the number of lines of input text or the number of values to parse.
+	Elements(u64),
+
+	/// Measure throughput in terms of bits/second. The value should be the
+	/// number of bits processed by one iteration of the benchmarked code.
+	/// Typically, this would be the number of bits transferred by a networking
+	/// function.
+	Bits(u64),
+}
+
+/// This enum allows the user to control how Criterion.rs chooses the iteration
+/// count when sampling. The default is `Auto`, which will choose a method
+/// automatically based on the iteration time during the warm-up phase.
+#[derive(Debug, Default, Clone, Copy)]
+pub enum SamplingMode {
+	/// Criterion.rs should choose a sampling method automatically. This is the
+	/// default, and is recommended for most users and most benchmarks.
+	#[default]
+	Auto,
+}
+
+impl SamplingMode {
+	pub(crate) fn choose_sampling_mode(
+		self,
+		warmup_mean_execution_time: f64,
+		sample_count: u64,
+		target_time: f64,
+	) -> ActualSamplingMode {
+		match self {
+			Self::Auto => {
+				// Estimate execution time with linear sampling
+				let total_runs = sample_count * (sample_count + 1) / 2;
+				let d = ceil(target_time / warmup_mean_execution_time / total_runs as f64) as u64;
+				let expected_ns = total_runs as f64 * d as f64 * warmup_mean_execution_time;
+
+				if expected_ns > (2.0 * target_time) {
+					ActualSamplingMode::Flat
+				} else {
+					ActualSamplingMode::Linear
+				}
+			}
+		}
+	}
+}
+
+/// Enum to represent the sampling mode without Auto.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub(crate) enum ActualSamplingMode {
+	Linear,
+	Flat,
+}
+
+impl ActualSamplingMode {
+	pub(crate) fn iteration_counts(
+		self,
+		warmup_mean_execution_time: f64,
+		sample_count: u64,
+		target_time: &Duration,
+	) -> Vec<u64> {
+		match self {
+			Self::Linear => {
+				let n = sample_count;
+				let met = warmup_mean_execution_time;
+				let m_ns = target_time.as_nanos();
+				// Solve: [d + 2*d + 3*d + ... + n*d] * met = m_ns
+				let total_runs = n * (n + 1) / 2;
+				let d = (ceil(m_ns as f64 / met / total_runs as f64) as u64).max(1);
+				let expected_ns = total_runs as f64 * d as f64 * met;
+
+				if d == 1 {
+					let recommended_sample_size =
+						Self::recommend_linear_sample_size(m_ns as f64, met);
+					let actual_time = Duration::from_nanos(expected_ns as u64);
+					console_error!(
+						"\nWarning: Unable to complete {} samples in {:.1?}. You may wish to \
+						 increase target time to {:.1?}",
+						n,
+						target_time,
+						actual_time
+					);
+
+					if recommended_sample_size == n {
+						console_error!(" or enable flat sampling.");
+					} else {
+						console_error!(
+							", enable flat sampling, or reduce sample count to {}.",
+							recommended_sample_size
+						);
+					}
+				}
+
+				(1..=n).map(|a| a * d).collect::<Vec<u64>>()
+			}
+			Self::Flat => {
+				let n = sample_count;
+				let met = warmup_mean_execution_time;
+				let m_ns = target_time.as_nanos() as f64;
+				let time_per_sample = m_ns / (n as f64);
+				// This is pretty simplistic; we could do something smarter to fit into the
+				// allotted time.
+				let iterations_per_sample = (ceil(time_per_sample / met) as u64).max(1);
+
+				let expected_ns = met * (iterations_per_sample * n) as f64;
+
+				if iterations_per_sample == 1 {
+					let recommended_sample_size = Self::recommend_flat_sample_size(m_ns, met);
+					let actual_time = Duration::from_nanos(expected_ns as u64);
+					console_error!(
+						"\nWarning: Unable to complete {} samples in {:.1?}. You may wish to \
+						 increase target time to {:.1?}",
+						n,
+						target_time,
+						actual_time
+					);
+
+					if recommended_sample_size == n {
+						console_error!(".");
+					} else {
+						console_error!(", or reduce sample count to {}.", recommended_sample_size);
+					}
+				}
+
+				vec![iterations_per_sample; n as usize]
+			}
+		}
+	}
+
+	fn is_linear(self) -> bool {
+		matches!(self, Self::Linear)
+	}
+
+	fn recommend_linear_sample_size(target_time: f64, met: f64) -> u64 {
+		// Some math shows that n(n+1)/2 * d * met = target_time. d = 1, so it can be
+		// ignored. This leaves n(n+1) = (2*target_time)/met, or n^2 + n -
+		// (2*target_time)/met = 0 Which can be solved with the quadratic formula.
+		// Since A and B are constant 1, this simplifies to sample_size = (-1 +-
+		// sqrt(1 - 4C))/2, where C = (2*target_time)/met. We don't care about the
+		// negative solution. Experimentation shows that this actually tends to result
+		// in twice the desired execution time (probably because of the ceil used to
+		// calculate d) so instead I use c = target_time/met.
+		let c = target_time / met;
+		let sample_size = f64::midpoint(-1.0, sqrt(4.0 * c));
+		let sample_size = sample_size as u64;
+
+		// Round down to the nearest 10 to give a margin and avoid excessive precision
+		let sample_size = (sample_size / 10) * 10;
+
+		// Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes
+		// smaller than 10.
+		if sample_size < 10 { 10 } else { sample_size }
+	}
+
+	fn recommend_flat_sample_size(target_time: f64, met: f64) -> u64 {
+		let sample_size = (target_time / met) as u64;
+
+		// Round down to the nearest 10 to give a margin and avoid excessive precision
+		let sample_size = (sample_size / 10) * 10;
+
+		// Clamp it to be at least 10, since criterion.rs doesn't allow sample sizes
+		// smaller than 10.
+		if sample_size < 10 { 10 } else { sample_size }
+	}
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub(crate) struct SavedSample {
+	pub(crate) sampling_mode: ActualSamplingMode,
+	pub(crate) iters: Vec<f64>,
+	pub(crate) times: Vec<f64>,
+}
diff --git a/client/test/src/criterion/report.rs b/client/test/src/criterion/report.rs
new file mode 100644
index 00000000..249e8a7d
--- /dev/null
+++ b/client/test/src/criterion/report.rs
@@ -0,0 +1,255 @@
+use alloc::format;
+use alloc::string::{String, ToString};
+use core::fmt;
+
+use nu_ansi_term::{Color, Style};
+use serde::{Deserialize, Serialize};
+
+use super::estimate::{ChangeEstimates, Estimate, Estimates};
+use super::format;
+use super::measurement::ValueFormatter;
+use super::stats::univariate::outliers::tukey::LabeledSample;
+use crate::console_log;
+
+pub struct ComparisonData {
+	pub p_value: f64,
+	pub relative_estimates: ChangeEstimates,
+	pub significance_threshold: f64,
+	pub noise_threshold: f64,
+}
+
+pub struct MeasurementData<'a> {
+	pub avg_times: LabeledSample<'a, f64>,
+	pub absolute_estimates: Estimates,
+	pub comparison: Option<ComparisonData>,
+}
+
+#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct BenchmarkId {
+	desc: String,
+}
+
+impl BenchmarkId {
+	pub fn new(desc: String) -> Self {
+		Self { desc }
+	}
+
+	pub fn desc(&self) -> &str {
+		&self.desc
+	}
+}
+impl fmt::Display for BenchmarkId {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		f.write_str(self.desc())
+	}
+}
+impl fmt::Debug for BenchmarkId {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		write!(f, "BenchmarkId {{ desc: \"{}\" }}", self.desc,)
+	}
+}
+
+pub trait Report {
+	fn benchmark_start(&self, _id: &BenchmarkId) {}
+	fn warmup(&self, _id: &BenchmarkId, _warmup_ns: f64) {}
+	fn analysis(&self, _id: &BenchmarkId) {}
+	fn measurement_start(
+		&self,
+		_id: &BenchmarkId,
+		_sample_count: u64,
+		_estimate_ns: f64,
+		_iter_count: u64,
+	) {
+	}
+	fn measurement_complete(
+		&self,
+		_id: &BenchmarkId,
+		_measurements: &MeasurementData,
+		_formatter: &dyn ValueFormatter,
+	) {
+	}
+}
+
+pub(crate) struct WasmReport;
+
+impl WasmReport {
+	fn print(s: &str) {
+		console_log!("{}", s);
+	}
+
+	fn with_color(color: Color, s: &str) -> String {
+		color.paint(s).to_string()
+	}
+
+	fn green(s: &str) -> String {
+		Self::with_color(Color::Green, s)
+	}
+
+	fn yellow(s: &str) -> String {
+		Self::with_color(Color::Yellow, s)
+	}
+
+	fn red(s: &str) -> String {
+		Self::with_color(Color::Red, s)
+	}
+
+	fn bold(s: String) -> String {
+		Style::new().bold().paint(s).to_string()
+	}
+
+	fn faint(s: String) -> String {
+		Style::new().dimmed().paint(s).to_string()
+	}
+
+	pub fn outliers(sample: &LabeledSample<'_, f64>) {
+		let (los, lom, _, him, his) = sample.count();
+		let noutliers = los + lom + him + his;
+		let sample_size = sample.len();
+
+		if noutliers == 0 {
+			return;
+		}
+
+		let percent = |n: usize| 100. * n as f64 / sample_size as f64;
+
+		console_log!(
+			"{}",
+			Self::yellow(&format!(
+				"Found {} outliers among {} measurements ({:.2}%)",
+				noutliers,
+				sample_size,
+				percent(noutliers)
+			))
+		);
+
+		let print = |n, label| {
+			if n != 0 {
+				console_log!("  {} ({:.2}%) {}", n, percent(n), label);
+			}
+		};
+
+		print(los, "low severe");
+		print(lom, "low mild");
+		print(him, "high mild");
+		print(his, "high severe");
+	}
+}
+
+impl Report for WasmReport {
+	fn warmup(&self, _id: &BenchmarkId, warmup_ns: f64) {
+		Self::print(&format!("Warming up for {}", format::time(warmup_ns)));
+	}
+
+	fn measurement_start(
+		&self,
+		_id: &BenchmarkId,
+		sample_count: u64,
+		estimate_ns: f64,
+		iter_count: u64,
+	) {
+		let iter_string = format::iter_count(iter_count);
+
+		Self::print(&format!(
+			"Collecting {} samples in estimated {} ({})",
+			sample_count,
+			format::time(estimate_ns),
+			iter_string
+		));
+	}
+
+	fn measurement_complete(
+		&self,
+		id: &BenchmarkId,
+		meas: &MeasurementData,
+		formatter: &dyn ValueFormatter,
+	) {
+		let typical_estimate = &meas.absolute_estimates.typical();
+
+		let mut id = id.desc().to_string();
+
+		if id.len() > 23 {
+			console_log!("{}", Self::green(&id));
+			id.clear();
+		}
+		let id_len = id.len();
+
+		console_log!(
+			"{}{}time:   [{} {} {}]",
+			Self::green(&id),
+			" ".repeat(24 - id_len),
+			Self::faint(formatter.format_value(typical_estimate.confidence_interval.lower_bound)),
+			Self::bold(formatter.format_value(typical_estimate.point_estimate)),
+			Self::faint(formatter.format_value(typical_estimate.confidence_interval.upper_bound))
+		);
+
+		if let Some(ref comp) = meas.comparison {
+			let different_mean = comp.p_value < comp.significance_threshold;
+			let mean_est = &comp.relative_estimates.mean;
+			let point_estimate = mean_est.point_estimate;
+			let mut point_estimate_str = format::change(point_estimate, true);
+			// The change in throughput is related to the change in timing. Reducing the
+			// timing by 50% increases the throughput by 100%.
+			let explanation_str: String;
+
+			if different_mean {
+				let comparison = compare_to_threshold(mean_est, comp.noise_threshold);
+				match comparison {
+					ComparisonResult::Improved => {
+						point_estimate_str = Self::green(&Self::bold(point_estimate_str));
+						explanation_str = format!("Performance has {}.", Self::green("improved"));
+					}
+					ComparisonResult::Regressed => {
+						point_estimate_str = Self::red(&Self::bold(point_estimate_str));
+						explanation_str = format!("Performance has {}.", Self::red("regressed"));
+					}
+					ComparisonResult::NonSignificant => {
+						explanation_str = "Change within noise threshold.".to_string();
+					}
+				}
+			} else {
+				explanation_str = "No change in performance detected.".to_string();
+			}
+
+			console_log!(
+				"{}change: [{} {} {}] (p = {:.2} {} {:.2})",
+				" ".repeat(24),
+				Self::faint(format::change(
+					mean_est.confidence_interval.lower_bound,
+					true
+				)),
+				point_estimate_str,
+				Self::faint(format::change(
+					mean_est.confidence_interval.upper_bound,
+					true
+				)),
+				comp.p_value,
+				if different_mean { "<" } else { ">" },
+				comp.significance_threshold
+			);
+
+			console_log!("{}{}", " ".repeat(24), explanation_str);
+		}
+
+		Self::outliers(&meas.avg_times);
+	}
+}
+
+enum ComparisonResult {
+	Improved,
+	Regressed,
+	NonSignificant,
+}
+
+fn compare_to_threshold(estimate: &Estimate, noise: f64) -> ComparisonResult {
+	let ci = &estimate.confidence_interval;
+	let lb = ci.lower_bound;
+	let ub = ci.upper_bound;
+
+	if lb < -noise && ub < -noise {
+		ComparisonResult::Improved
+	} else if lb > noise && ub > noise {
+		ComparisonResult::Regressed
+	} else {
+		ComparisonResult::NonSignificant
+	}
+}
diff --git a/client/test/src/criterion/routine.rs b/client/test/src/criterion/routine.rs
new file mode 100644
index 00000000..80e9bcd8
--- /dev/null
+++ b/client/test/src/criterion/routine.rs
@@ -0,0 +1,212 @@
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::future::Future;
+use core::marker::PhantomData;
+use core::pin::Pin;
+use core::time::Duration;
+
+use async_trait::async_trait;
+
+use super::benchmark::BenchmarkConfig;
+use super::measurement::Measurement;
+use super::report::{BenchmarkId, Report};
+use super::{ActualSamplingMode, Bencher, Criterion};
+
+/// PRIVATE
+#[async_trait(?Send)]
+pub(crate) trait Routine<M: Measurement> {
+	/// PRIVATE
+	async fn bench(&mut self, m: &M, iters: &[u64]) -> Vec<f64>;
+	/// PRIVATE
+	async fn warm_up(&mut self, m: &M, how_long: Duration) -> (u64, u64);
+
+	async fn sample(
+		&mut self,
+		measurement: &M,
+		id: &BenchmarkId,
+		config: &BenchmarkConfig,
+		criterion: &Criterion<M>,
+	) -> (ActualSamplingMode, Box<[f64]>, Box<[f64]>) {
+		let wu = config.warm_up_time;
+		let m_ns = config.measurement_time.as_nanos();
+
+		criterion.report.warmup(id, wu.as_nanos() as f64);
+
+		let (wu_elapsed, wu_iters) = self.warm_up(measurement, wu).await;
+
+		// Initial guess for the mean execution time
+		let met = wu_elapsed as f64 / wu_iters as f64;
+
+		let n = config.sample_size as u64;
+
+		let actual_sampling_mode = config
+			.sampling_mode
+			.choose_sampling_mode(met, n, m_ns as f64);
+
+		let m_iters = actual_sampling_mode.iteration_counts(met, n, &config.measurement_time);
+
+		let expected_ns = m_iters
+			.iter()
+			.copied()
+			.map(|count| count as f64 * met)
+			.sum();
+
+		// Use saturating_add to handle overflow.
+		let mut total_iters = 0u64;
+		for count in m_iters.iter().copied() {
+			total_iters = total_iters.saturating_add(count);
+		}
+
+		criterion
+			.report
+			.measurement_start(id, n, expected_ns, total_iters);
+
+		let m_elapsed = self.bench(measurement, &m_iters).await;
+
+		let m_iters_f: Vec<f64> = m_iters.iter().map(|&x| x as f64).collect();
+
+		(
+			actual_sampling_mode,
+			m_iters_f.into_boxed_slice(),
+			m_elapsed.into_boxed_slice(),
+		)
+	}
+}
+
+pub struct AsyncFunction<M: Measurement, F> {
+	f: F,
+	phantom: PhantomData<M>,
+}
+
+impl<M: Measurement, F> AsyncFunction<M, F> {
+	pub fn new(f: F) -> Self {
+		Self {
+			f,
+			phantom: PhantomData,
+		}
+	}
+}
+
+#[async_trait(?Send)]
+impl<M, F> Routine<M> for AsyncFunction<M, F>
+where
+	M: Measurement,
+	for<'b> F: FnMut(&'b mut Bencher<'_, M>) -> Pin<Box<dyn Future<Output = ()> + 'b>>,
+{
+	async fn bench(&mut self, m: &M, iters: &[u64]) -> Vec<f64> {
+		let f = &mut self.f;
+
+		let mut b = Bencher {
+			iterated: false,
+			iters: 0,
+			value: m.zero(),
+			measurement: m,
+			elapsed_time: Duration::from_millis(0),
+		};
+
+		let mut result = Vec::with_capacity(iters.len());
+		for iters in iters {
+			b.iters = *iters;
+			(*f)(&mut b).await;
+			result.push(m.to_f64(&b.value));
+		}
+		result
+	}
+
+	async fn warm_up(&mut self, m: &M, how_long: Duration) -> (u64, u64) {
+		let f = &mut self.f;
+		let mut b = Bencher {
+			iterated: false,
+			iters: 1,
+			value: m.zero(),
+			measurement: m,
+			elapsed_time: Duration::from_millis(0),
+		};
+
+		let mut total_iters = 0;
+		let mut elapsed_time = Duration::from_millis(0);
+		loop {
+			(*f)(&mut b).await;
+
+			total_iters += b.iters;
+			elapsed_time += b.elapsed_time;
+			if elapsed_time > how_long {
+				return (elapsed_time.as_nanos() as u64, total_iters);
+			}
+
+			b.iters = b.iters.wrapping_mul(2);
+		}
+	}
+}
+
+pub struct Function<M: Measurement, F>
+where
+	F: FnMut(&mut Bencher<'_, M>),
+{
+	f: F,
+	phantom: PhantomData<M>,
+}
+
+impl<M: Measurement, F> Function<M, F>
+where
+	F: FnMut(&mut Bencher<'_, M>),
+{
+	pub fn new(f: F) -> Self {
+		Self {
+			f,
+			phantom: PhantomData,
+		}
+	}
+}
+
+#[async_trait(?Send)]
+impl<M: Measurement, F> Routine<M> for Function<M, F>
+where
+	F: FnMut(&mut Bencher<'_, M>),
+{
+	async fn bench(&mut self, m: &M, iters: &[u64]) -> Vec<f64> {
+		let f = &mut self.f;
+
+		let mut b = Bencher {
+			iterated: false,
+			iters: 0,
+			value: m.zero(),
+			measurement: m,
+			elapsed_time: Duration::from_millis(0),
+		};
+
+		iters
+			.iter()
+			.map(|iters| {
+				b.iters = *iters;
+				(*f)(&mut b);
+				m.to_f64(&b.value)
+			})
+			.collect()
+	}
+
+	async fn warm_up(&mut self, m: &M, how_long: Duration) -> (u64, u64) {
+		let f = &mut self.f;
+		let mut b = Bencher {
+			iterated: false,
+			iters: 1,
+			value: m.zero(),
+			measurement: m,
+			elapsed_time: Duration::from_millis(0),
+		};
+
+		let mut total_iters = 0;
+		let mut elapsed_time = Duration::from_millis(0);
+		loop {
+			(*f)(&mut b);
+
+			total_iters += b.iters;
+			elapsed_time += b.elapsed_time;
+			if elapsed_time > how_long {
+				return (elapsed_time.as_nanos() as u64, total_iters);
+			}
+
+			b.iters = b.iters.wrapping_mul(2);
+		}
+	}
+}
diff --git a/client/test/src/criterion/stats/bivariate/mod.rs b/client/test/src/criterion/stats/bivariate/mod.rs
new file mode 100644
index 00000000..1de4cec7
--- /dev/null
+++ b/client/test/src/criterion/stats/bivariate/mod.rs
@@ -0,0 +1,70 @@
+//! Bivariate analysis
+
+pub mod regression;
+mod resamples;
+
+use super::bivariate::resamples::Resamples;
+use super::float::Float;
+use super::tuple::{Tuple, TupledDistributionsBuilder};
+use super::univariate::Sample;
+
+/// Bivariate `(X, Y)` data
+///
+/// Invariants:
+///
+/// - No `NaN`s in the data
+/// - At least two data points in the set
+#[derive(Clone, Copy)]
+pub struct Data<'a, X, Y>(&'a [X], &'a [Y]);
+
+impl<'a, X, Y> Data<'a, X, Y>
+where
+	X: Float,
+	Y: Float,
+{
+	/// Creates a new data set from two existing slices
+	pub fn new(xs: &'a [X], ys: &'a [Y]) -> Self {
+		assert!(
+			xs.len() == ys.len()
+				&& xs.len() > 1
+				&& xs.iter().all(|x| !x.is_nan())
+				&& ys.iter().all(|y| !y.is_nan())
+		);
+
+		Data(xs, ys)
+	}
+
+	// TODO Remove the `T` parameter in favor of `S::Output`
+	/// Returns the bootstrap distributions of the parameters estimated by the
+	/// `statistic`
+	///
+	/// - Multi-threaded
+	/// - Time: `O(nresamples)`
+	/// - Memory: `O(nresamples)`
+	pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions
+	where
+		S: Fn(Data<X, Y>) -> T + Sync,
+		T: Tuple + Send,
+		T::Distributions: Send,
+		T::Builder: Send,
+	{
+		let mut resamples = Resamples::new(*self);
+		(0..nresamples)
+			.map(|_| statistic(resamples.next()))
+			.fold(T::Builder::new(0), |mut sub_distributions, sample| {
+				sub_distributions.push(sample);
+				sub_distributions
+			})
+			.complete()
+	}
+
+	/// Returns a view into the `X` data
+	pub fn x(&self) -> &'a Sample<X> {
+		Sample::new(self.0)
+	}
+
+	/// Returns a view into the `Y` data
+	pub fn y(&self) -> &'a Sample<Y> {
+		Sample::new(self.1)
+	}
+}
diff --git a/client/test/src/criterion/stats/bivariate/regression.rs b/client/test/src/criterion/stats/bivariate/regression.rs
new file mode 100644
index 00000000..83a0d026
--- /dev/null
+++ b/client/test/src/criterion/stats/bivariate/regression.rs
@@ -0,0 +1,30 @@
+//! Regression analysis
+
+use super::super::bivariate::Data;
+use super::super::dot;
+use super::super::float::Float;
+
+/// A straight line that passes through the origin `y = m * x`
+#[derive(Clone, Copy)]
+pub struct Slope<A>(pub A)
+where
+	A: Float;
+
+impl<A> Slope<A>
+where
+	A: Float,
+{
+	/// Fits the data to a straight line that passes through the origin using
+	/// ordinary least squares
+	///
+	/// - Time: `O(length)`
+	pub fn fit(data: &Data<'_, A, A>) -> Self {
+		let xs = data.0;
+		let ys = data.1;
+
+		let xy = dot(xs, ys);
+		let x2 = dot(xs, xs);
+
+		Self(xy / x2)
+	}
+}
diff --git a/client/test/src/criterion/stats/bivariate/resamples.rs b/client/test/src/criterion/stats/bivariate/resamples.rs
new file mode 100644
index 00000000..9a37b14f
--- /dev/null
+++ b/client/test/src/criterion/stats/bivariate/resamples.rs
@@ -0,0 +1,62 @@
+use alloc::vec::Vec;
+
+use super::super::bivariate::Data;
+use super::super::float::Float;
+use super::super::rand_util::{Rng, new_rng};
+
+pub struct Resamples<'a, X, Y>
+where
+	X: 'a + Float,
+	Y: 'a + Float,
+{
+	rng: Rng,
+	data: (&'a [X], &'a [Y]),
+	stage: Option<(Vec<X>, Vec<Y>)>,
+}
+
+impl<'a, X, Y> Resamples<'a, X, Y>
+where
+	X: 'a + Float,
+	Y: 'a + Float,
+{
+	pub fn new(data: Data<'a, X, Y>) -> Self {
+		Resamples {
+			rng: new_rng(),
+			data: (data.x(), data.y()),
+			stage: None,
+		}
+	}
+
+	pub fn next(&mut self) -> Data<'_, X, Y> {
+		let n = self.data.0.len();
+
+		match self.stage {
+			None => {
+				let mut stage = (Vec::with_capacity(n), Vec::with_capacity(n));
+
+				for _ in 0..n {
+					let i = self.rng.rand_range(0u64..(self.data.0.len() as u64)) as usize;
+
+					stage.0.push(self.data.0[i]);
+					stage.1.push(self.data.1[i]);
+				}
+
+				self.stage = Some(stage);
+			}
+			Some(ref mut stage) => {
+				for i in 0..n {
+					let j = self.rng.rand_range(0u64..(self.data.0.len() as u64)) as usize;
+
+					stage.0[i] = self.data.0[j];
+					stage.1[i] = self.data.1[j];
+				}
+			}
+		}
+
+		if let Some((ref x, ref y)) = self.stage {
+			Data(x, y)
+		} else {
+			unreachable!();
+		}
+	}
+}
diff --git a/client/test/src/criterion/stats/float.rs b/client/test/src/criterion/stats/float.rs
new file mode 100644
index 00000000..27f7b035
--- /dev/null
+++ b/client/test/src/criterion/stats/float.rs
@@ -0,0 +1,15 @@
+//! Float trait
+
+use cast::From;
+use num_traits::float;
+
+/// This is an extension of `num_traits::float::Float` that adds safe
+/// casting and Sync + Send. Once `num_traits` has these features this
+/// can be removed.
+pub trait Float:
+	float::Float + From<usize, Output = Self> + From<f32, Output = Self> + Sync + Send
+{
+}
+
+impl Float for f32 {}
+impl Float for f64 {}
diff --git a/client/test/src/criterion/stats/mod.rs b/client/test/src/criterion/stats/mod.rs
new file mode 100644
index 00000000..fc67e35c
--- /dev/null
+++ b/client/test/src/criterion/stats/mod.rs
@@ -0,0 +1,112 @@
+//! [Criterion]'s statistics library.
+//!
+//! [Criterion]: https://github.com/bheisler/criterion.rs
+//!
+//! **WARNING** This library is criterion's implementation detail and there no
+//! plans to stabilize it. In other words, the API may break at any time without
+//! notice.
+
+pub mod bivariate;
+pub mod tuple;
+pub mod univariate;
+
+mod float;
+mod rand_util;
+
+use alloc::boxed::Box;
+use core::mem;
+use core::ops::Deref;
+
+use float::Float;
+use univariate::Sample;
+
+/// The bootstrap distribution of some parameter
+#[derive(Clone)]
+pub struct Distribution<A>(Box<[A]>);
+
+impl<A> Distribution<A>
+where
+	A: Float,
+{
+	/// Create a distribution from the given values
+	pub fn from(values: Box<[A]>) -> Self {
+		Self(values)
+	}
+
+	/// Computes the confidence interval of the population parameter using
+	/// percentiles
+	///
+	/// # Panics
+	///
+	/// Panics if the `confidence_level` is not in the `(0, 1)` range.
+	pub fn confidence_interval(&self, confidence_level: A) -> (A, A)
+	where
+		usize: cast::From<A, Output = Result<usize, cast::Error>>,
+	{
+		let zero = A::cast(0);
+		let one = A::cast(1);
+		let fifty = A::cast(50);
+
+		assert!(confidence_level > zero && confidence_level < one);
+
+		let percentiles = self.percentiles();
+
+		// FIXME(privacy) this should use the `at_unchecked()` method
+		(
+			percentiles.at(fifty * (one - confidence_level)),
+			percentiles.at(fifty * (one + confidence_level)),
+		)
+	}
+
+	/// Computes the "likelihood" of seeing the value `t` or "more extreme"
+	/// values in the distribution.
+	pub fn p_value(&self, t: A, tails: Tails) -> A {
+		use core::cmp;
+
+		let n = self.0.len();
+		let hits = self.0.iter().filter(|&&x| x < t).count();
+
+		let tails = A::cast(match tails {
+			Tails::One => 1,
+			Tails::Two => 2,
+		});
+
+		A::cast(cmp::min(hits, n - hits)) / A::cast(n) * tails
+	}
+}
+
+impl<A> Deref for Distribution<A> {
+	type Target = Sample<A>;
+
+	fn deref(&self) -> &Sample<A> {
+		let slice: &[_] = &self.0;
+		unsafe { mem::transmute(slice) }
+	}
+}
+
+/// Number of tails for significance testing
+#[derive(Clone, Copy)]
+pub enum Tails {
+	/// One tailed test
+	One,
+	/// Two tailed test
+	Two,
+}
+
+fn dot<A>(xs: &[A], ys: &[A]) -> A
+where
+	A: Float,
+{
+	xs.iter()
+		.zip(ys)
+		.fold(A::cast(0), |acc, (&x, &y)| acc + x * y)
+}
+
+fn sum<A>(xs: &[A]) -> A
+where
+	A: Float,
+{
+	use core::ops::Add;
+
+	xs.iter().copied().fold(A::cast(0), Add::add)
+}
diff --git a/client/test/src/criterion/stats/rand_util.rs b/client/test/src/criterion/stats/rand_util.rs
new file mode 100644
index 00000000..1ce1cc55
--- /dev/null
+++ b/client/test/src/criterion/stats/rand_util.rs
@@ -0,0 +1,23 @@
+use core::cell::RefCell;
+
+use oorandom::Rand64;
+
+use crate::{LazyCell, SystemTime, UNIX_EPOCH};
+
+pub type Rng = Rand64;
+
+#[cfg_attr(target_feature = "atomics", thread_local)]
+static SEED_RAND: LazyCell<RefCell<Rand64>> = LazyCell::new(|| {
+	RefCell::new(Rand64::new(
+		SystemTime::now()
+			.duration_since(UNIX_EPOCH)
+			.expect("Time went backwards")
+			.as_millis(),
+	))
+});
+
+pub fn new_rng() -> Rng {
+	let mut r = SEED_RAND.borrow_mut();
+	let seed = (u128::from(r.rand_u64()) << 64) | u128::from(r.rand_u64());
+	Rand64::new(seed)
+}
diff --git a/client/test/src/criterion/stats/tuple.rs b/client/test/src/criterion/stats/tuple.rs
new file mode 100644
index 00000000..13d32784
--- /dev/null
+++ b/client/test/src/criterion/stats/tuple.rs
@@ -0,0 +1,256 @@
+//! Helper traits for tupling/untupling
+
+use alloc::vec::Vec;
+
+use super::Distribution;
+
+/// Any tuple: `(A, B, ..)`
+pub trait Tuple: Sized {
+	/// A tuple of distributions associated with this tuple
+	type Distributions: TupledDistributions<Item = Self>;
+
+	/// A tuple of vectors associated with this tuple
+	type Builder: TupledDistributionsBuilder<Item = Self>;
+}
+
+/// A tuple of distributions: `(Distribution<A>, Distribution<B>, ..)`
+pub trait TupledDistributions: Sized {
+	/// A tuple that can be pushed/inserted into the tupled distributions
+	type Item: Tuple<Distributions = Self>;
+}
+
+/// A tuple of vecs used to build distributions.
+pub trait TupledDistributionsBuilder: Sized {
+	/// A tuple that can be pushed/inserted into the tupled distributions
+	type Item: Tuple<Builder = Self>;
+
+	/// Creates a new tuple of vecs
+	fn new(size: usize) -> Self;
+
+	/// Push one element into each of the vecs
+	fn push(&mut self, tuple: Self::Item);
+
+	/// Append one tuple of vecs to this one, leaving the vecs in the other
+	/// tuple empty
+	fn extend(&mut self, other: &mut Self);
+
+	/// Convert the tuple of vectors into a tuple of distributions
+	fn complete(self) -> <Self::Item as Tuple>::Distributions;
+}
+
+impl<A> Tuple for (A,)
+where
+	A: Copy,
+{
+	type Distributions = (Distribution<A>,);
+	type Builder = (Vec<A>,);
+}
+
+impl<A> TupledDistributions for (Distribution<A>,)
+where
+	A: Copy,
+{
+	type Item = (A,);
+}
+impl<A> TupledDistributionsBuilder for (Vec<A>,)
+where
+	A: Copy,
+{
+	type Item = (A,);
+
+	fn new(size: usize) -> (Vec<A>,) {
+		(Vec::with_capacity(size),)
+	}
+
+	fn push(&mut self, tuple: (A,)) {
+		(self.0).push(tuple.0);
+	}
+
+	fn extend(&mut self, other: &mut (Vec<A>,)) {
+		(self.0).append(&mut other.0);
+	}
+
+	fn complete(self) -> (Distribution<A>,) {
+		(Distribution(self.0.into_boxed_slice()),)
+	}
+}
+
+impl<A, B> Tuple for (A, B)
+where
+	A: Copy,
+	B: Copy,
+{
+	type Distributions = (Distribution<A>, Distribution<B>);
+	type Builder = (Vec<A>, Vec<B>);
+}
+
+impl<A, B> TupledDistributions for (Distribution<A>, Distribution<B>)
+where
+	A: Copy,
+	B: Copy,
+{
+	type Item = (A, B);
+}
+impl<A, B> TupledDistributionsBuilder for (Vec<A>, Vec<B>)
+where
+	A: Copy,
+	B: Copy,
+{
+	type Item = (A, B);
+
+	fn new(size: usize) -> (Vec<A>, Vec<B>) {
+		(Vec::with_capacity(size), Vec::with_capacity(size))
+	}
+
+	fn push(&mut self, tuple: (A, B)) {
+		(self.0).push(tuple.0);
+		(self.1).push(tuple.1);
+	}
+
+	fn extend(&mut self, other: &mut (Vec<A>, Vec<B>)) {
+		(self.0).append(&mut other.0);
+		(self.1).append(&mut other.1);
+	}
+
+	fn complete(self) -> (Distribution<A>, Distribution<B>) {
+		(
+			Distribution(self.0.into_boxed_slice()),
+			Distribution(self.1.into_boxed_slice()),
+		)
+	}
+}
+
+impl<A, B, C> Tuple for (A, B, C)
+where
+	A: Copy,
+	B: Copy,
+	C: Copy,
+{
+	type Distributions = (Distribution<A>, Distribution<B>, Distribution<C>);
+	type Builder = (Vec<A>, Vec<B>, Vec<C>);
+}
+
+impl<A, B, C> TupledDistributions for (Distribution<A>, Distribution<B>, Distribution<C>)
+where
+	A: Copy,
+	B: Copy,
+	C: Copy,
+{
+	type Item = (A, B, C);
+}
+impl<A, B, C> TupledDistributionsBuilder for (Vec<A>, Vec<B>, Vec<C>)
+where
+	A: Copy,
+	B: Copy,
+	C: Copy,
+{
+	type Item = (A, B, C);
+
+	fn new(size: usize) -> (Vec<A>, Vec<B>, Vec<C>) {
+		(
+			Vec::with_capacity(size),
+			Vec::with_capacity(size),
+			Vec::with_capacity(size),
+		)
+	}
+
+	fn push(&mut self, tuple: (A, B, C)) {
+		(self.0).push(tuple.0);
+		(self.1).push(tuple.1);
+		(self.2).push(tuple.2);
+	}
+
+	fn extend(&mut self, other: &mut (Vec<A>, Vec<B>, Vec<C>)) {
+		(self.0).append(&mut other.0);
+		(self.1).append(&mut other.1);
+		(self.2).append(&mut other.2);
+	}
+
+	fn complete(self) -> (Distribution<A>, Distribution<B>, Distribution<C>) {
+		(
+			Distribution(self.0.into_boxed_slice()),
+			Distribution(self.1.into_boxed_slice()),
+			Distribution(self.2.into_boxed_slice()),
+		)
+	}
+}
+
+impl<A, B, C, D> Tuple for (A, B, C, D)
+where
+	A: Copy,
+	B: Copy,
+	C: Copy,
+	D: Copy,
+{
+	type Distributions = (
+		Distribution<A>,
+		Distribution<B>,
+		Distribution<C>,
+		Distribution<D>,
+	);
+	type Builder = (Vec<A>, Vec<B>, Vec<C>, Vec<D>);
+}
+
+impl<A, B, C, D> TupledDistributions
+	for (
+		Distribution<A>,
+		Distribution<B>,
+		Distribution<C>,
+		Distribution<D>,
+	)
+where
+	A: Copy,
+	B: Copy,
+	C: Copy,
+	D: Copy,
+{
+	type Item = (A, B, C, D);
+}
+impl<A, B, C, D> TupledDistributionsBuilder for (Vec<A>, Vec<B>, Vec<C>, Vec<D>)
+where
+	A: Copy,
+	B: Copy,
+	C: Copy,
+	D: Copy,
+{
+	type Item = (A, B, C, D);
+
+	fn new(size: usize) -> (Vec<A>, Vec<B>, Vec<C>, Vec<D>) {
+		(
+			Vec::with_capacity(size),
+			Vec::with_capacity(size),
+			Vec::with_capacity(size),
+			Vec::with_capacity(size),
+		)
+	}
+
+	fn push(&mut self, tuple: (A, B, C, D)) {
+		(self.0).push(tuple.0);
+		(self.1).push(tuple.1);
+		(self.2).push(tuple.2);
+		(self.3).push(tuple.3);
+	}
+
+	fn extend(&mut self, other: &mut (Vec<A>, Vec<B>, Vec<C>, Vec<D>)) {
+		(self.0).append(&mut other.0);
+		(self.1).append(&mut other.1);
+		(self.2).append(&mut other.2);
+		(self.3).append(&mut other.3);
+	}
+
+	fn complete(
+		self,
+	) -> (
+		Distribution<A>,
+		Distribution<B>,
+		Distribution<C>,
+		Distribution<D>,
+	) {
+		(
+			Distribution(self.0.into_boxed_slice()),
+			Distribution(self.1.into_boxed_slice()),
+			Distribution(self.2.into_boxed_slice()),
+			Distribution(self.3.into_boxed_slice()),
+		)
+	}
+}
diff --git a/client/test/src/criterion/stats/univariate/mixed.rs b/client/test/src/criterion/stats/univariate/mixed.rs
new file mode 100644
index 00000000..cb53f140
--- /dev/null
+++ b/client/test/src/criterion/stats/univariate/mixed.rs
@@ -0,0 +1,44 @@
+//! Mixed bootstrap
+
+use alloc::vec::Vec;
+
+use super::super::float::Float;
+use super::super::tuple::{Tuple, TupledDistributionsBuilder};
+use super::{Resamples, Sample};
+
+/// Performs a *mixed* two-sample bootstrap
+pub fn bootstrap<A, T, S>(
+	a: &Sample<A>,
+	b: &Sample<A>,
+	nresamples: usize,
+	statistic: S,
+) -> T::Distributions
+where
+	A: Float,
+	S: Fn(&Sample<A>, &Sample<A>) -> T + Sync,
+	T: Tuple + Send,
+	T::Distributions: Send,
+	T::Builder: Send,
+{
+	let n_a = a.len();
+	let n_b = b.len();
+	let mut c = Vec::with_capacity(n_a + n_b);
+	c.extend_from_slice(a);
+	c.extend_from_slice(b);
+	let c = Sample::new(&c);
+
+	let mut resamples = Resamples::new(c);
+	(0..nresamples)
+		.map(|_| {
+			let resample = resamples.next();
+			let a: &Sample<A> = Sample::new(&resample[..n_a]);
+			let b: &Sample<A> = Sample::new(&resample[n_a..]);
+
+			statistic(a, b)
+		})
+		.fold(T::Builder::new(0), |mut sub_distributions, sample| {
+			sub_distributions.push(sample);
+			sub_distributions
+		})
+		.complete()
+}
diff --git a/client/test/src/criterion/stats/univariate/mod.rs b/client/test/src/criterion/stats/univariate/mod.rs
new file mode 100644
index 00000000..5c7e71a9
--- /dev/null
+++ b/client/test/src/criterion/stats/univariate/mod.rs
@@ -0,0 +1,62 @@
+//! Univariate analysis
+
+pub mod mixed;
+pub mod outliers;
+mod percentiles;
+mod resamples;
+mod sample;
+
+use core::cmp;
+
+use libm::{ceil, sqrt};
+pub use percentiles::Percentiles;
+use resamples::Resamples;
+pub use sample::Sample;
+
+use super::float::Float;
+use super::tuple::{Tuple, TupledDistributionsBuilder};
+
+/// Performs a two-sample bootstrap
+///
+/// - Multithreaded
+/// - Time: `O(nresamples)`
+/// - Memory: `O(nresamples)`
+pub fn bootstrap<A, B, T, S>(
+	a: &Sample<A>,
+	b: &Sample<B>,
+	nresamples: usize,
+	statistic: S,
+) -> T::Distributions
+where
+	A: Float,
+	B: Float,
+	S: Fn(&Sample<A>, &Sample<B>) -> T + Sync,
+	T: Tuple + Send,
+	T::Distributions: Send,
+	T::Builder: Send,
+{
+	let nresamples_sqrt = ceil(sqrt(nresamples as f64)) as usize;
+	let per_chunk = nresamples.div_ceil(nresamples_sqrt);
+
+	let mut a_resamples = Resamples::new(a);
+	let mut b_resamples = Resamples::new(b);
+	(0..nresamples_sqrt)
+		.map(|i| {
+			let start = i * per_chunk;
+			let end = cmp::min((i + 1) * per_chunk, nresamples);
+			let a_resample = a_resamples.next();
+
+			let mut sub_distributions: T::Builder = TupledDistributionsBuilder::new(end - start);
+
+			for _ in start..end {
+				let b_resample = b_resamples.next();
+				sub_distributions.push(statistic(a_resample, b_resample));
+			}
+			sub_distributions
+		})
+		.fold(T::Builder::new(0), |mut a, mut b| {
+			a.extend(&mut b);
+			a
+		})
+		.complete()
+}
diff --git a/client/test/src/criterion/stats/univariate/outliers/mod.rs b/client/test/src/criterion/stats/univariate/outliers/mod.rs
new file mode 100644
index 00000000..afff2d4c
--- /dev/null
+++ b/client/test/src/criterion/stats/univariate/outliers/mod.rs
@@ -0,0 +1,7 @@
+//! Classification of outliers
+//!
+//! WARNING: There's no formal/mathematical definition of what an outlier
+//! actually is. Therefore, all outlier classifiers are *subjective*, however
+//! some classifiers that have become *de facto* standard are provided here.
+
+pub mod tukey;
diff --git a/client/test/src/criterion/stats/univariate/outliers/tukey.rs b/client/test/src/criterion/stats/univariate/outliers/tukey.rs
new file mode 100644
index 00000000..1d05f7dd
--- /dev/null
+++ b/client/test/src/criterion/stats/univariate/outliers/tukey.rs
@@ -0,0 +1,274 @@
+//! Tukey's method
+//!
+//! The original method uses two "fences" to classify the data. All the
+//! observations "inside" the fences are considered "normal", and the rest are
+//! considered outliers.
+//!
+//! The fences are computed from the quartiles of the sample, according to the
+//! following formula:
+//!
+//! ``` ignore
+//! // q1, q3 are the first and third quartiles
+//! let iqr = q3 - q1;  // The interquartile range
+//! let (f1, f2) = (q1 - 1.5 * iqr, q3 + 1.5 * iqr);  // the "fences"
+//!
+//! let is_outlier = |x| if x > f1 && x < f2 { true } else { false };
+//! ```
+//!
+//! The classifier provided here adds two extra outer fences:
+//!
+//! ``` ignore
+//! let (f3, f4) = (q1 - 3 * iqr, q3 + 3 * iqr);  // the outer "fences"
+//! ```
+//!
+//! The extra fences add a sense of "severity" to the classification. Data
+//! points outside of the outer fences are considered "severe" outliers, whereas
+//! points outside the inner fences are just "mild" outliers, and, as the
+//! original method, everything inside the inner fences is considered
+//! "normal" data.
+//!
+//! Some ASCII art for the visually oriented people:
+//!
+//! ``` ignore
+//!          LOW-ish                NORMAL-ish                 HIGH-ish
+//!         x   |       +    |  o o  o    o   o o  o  |        +   |   x
+//!             f3           f1                       f2           f4
+//!
+//! Legend:
+//! o: "normal" data (not an outlier)
+//! +: "mild" outlier
+//! x: "severe" outlier
+//! ```
+
+use core::ops::{Deref, Index};
+use core::slice;
+
+use self::Label::{HighMild, HighSevere, LowMild, LowSevere, NotAnOutlier};
+use super::super::super::float::Float;
+use super::super::Sample;
+
+/// A classified/labeled sample.
+///
+/// The labeled data can be accessed using the indexing operator. The order of
+/// the data points is retained.
+///
+/// NOTE: Due to limitations in the indexing traits, only the label is returned.
+/// Once the `IndexGet` trait lands in stdlib, the indexing operation will
+/// return a `(data_point, label)` pair.
+#[derive(Clone, Copy)]
+pub struct LabeledSample<'a, A>
+where
+	A: Float,
+{
+	fences: (A, A, A, A),
+	sample: &'a Sample<A>,
+}
+
+impl<'a, A> LabeledSample<'a, A>
+where
+	A: Float,
+{
+	/// Returns the number of data points per label
+	///
+	/// - Time: `O(length)`
+	pub fn count(&self) -> (usize, usize, usize, usize, usize) {
+		let (mut los, mut lom, mut noa, mut him, mut his) = (0, 0, 0, 0, 0);
+
+		for (_, label) in self {
+			match label {
+				LowSevere => {
+					los += 1;
+				}
+				LowMild => {
+					lom += 1;
+				}
+				NotAnOutlier => {
+					noa += 1;
+				}
+				HighMild => {
+					him += 1;
+				}
+				HighSevere => {
+					his += 1;
+				}
+			}
+		}
+
+		(los, lom, noa, him, his)
+	}
+
+	/// Returns the fences used to classify the outliers
+	pub fn fences(&self) -> (A, A, A, A) {
+		self.fences
+	}
+
+	/// Returns an iterator over the labeled data
+	pub fn iter(&self) -> Iter<'a, A> {
+		Iter {
+			fences: self.fences,
+			iter: self.sample.iter(),
+		}
+	}
+}
+
+impl<A> Deref for LabeledSample<'_, A>
+where
+	A: Float,
+{
+	type Target = Sample<A>;
+
+	fn deref(&self) -> &Sample<A> {
+		self.sample
+	}
+}
+
+// FIXME Use the `IndexGet` trait
+impl<A> Index<usize> for LabeledSample<'_, A>
+where
+	A: Float,
+{
+	type Output = Label;
+
+	fn index(&self, i: usize) -> &Label {
+		static LOW_SEVERE: Label = LowSevere;
+		static LOW_MILD: Label = LowMild;
+		static HIGH_MILD: Label = HighMild;
+		static HIGH_SEVERE: Label = HighSevere;
+		static NOT_AN_OUTLIER: Label = NotAnOutlier;
+
+		let x = self.sample[i];
+		let (lost, lomt, himt, hist) = self.fences;
+
+		if x < lost {
+			&LOW_SEVERE
+		} else if x > hist {
+			&HIGH_SEVERE
+		} else if x < lomt {
+			&LOW_MILD
+		} else if x > himt {
+			&HIGH_MILD
+		} else {
+			&NOT_AN_OUTLIER
+		}
+	}
+}
+
+impl<'a, A> IntoIterator for &LabeledSample<'a, A>
+where
+	A: Float,
+{
+	type Item = (A, Label);
+	type IntoIter = Iter<'a, A>;
+
+	fn into_iter(self) -> Iter<'a, A> {
+		self.iter()
+	}
+}
+
+/// Iterator over the labeled data
+pub struct Iter<'a, A>
+where
+	A: Float,
+{
+	fences: (A, A, A, A),
+	iter: slice::Iter<'a, A>,
+}
+
+impl<A> Iterator for Iter<'_, A>
+where
+	A: Float,
+{
+	type Item = (A, Label);
+
+	fn next(&mut self) -> Option<(A, Label)> {
+		self.iter.next().map(|&x| {
+			let (lost, lomt, himt, hist) = self.fences;
+
+			let label = if x < lost {
+				LowSevere
+			} else if x > hist {
+				HighSevere
+			} else if x < lomt {
+				LowMild
+			} else if x > himt {
+				HighMild
+			} else {
+				NotAnOutlier
+			};
+
+			(x, label)
+		})
+	}
+
+	fn size_hint(&self) -> (usize, Option<usize>) {
+		self.iter.size_hint()
+	}
+}
+
+/// Labels used to classify outliers
+pub enum Label {
+	/// A "mild" outlier in the "high" spectrum
+	HighMild,
+	/// A "severe" outlier in the "high" spectrum
+	HighSevere,
+	/// A "mild" outlier in the "low" spectrum
+	LowMild,
+	/// A "severe" outlier in the "low" spectrum
+	LowSevere,
+	/// A normal data point
+	NotAnOutlier,
+}
+
+impl Label {
+	/// Checks if the data point has an "unusually" high value
+	pub fn is_high(&self) -> bool {
+		matches!(*self, HighMild | HighSevere)
+	}
+
+	/// Checks if the data point is labeled as a "mild" outlier
+	pub fn is_mild(&self) -> bool {
+		matches!(*self, HighMild | LowMild)
+	}
+
+	/// Checks if the data point has an "unusually" low value
+	pub fn is_low(&self) -> bool {
+		matches!(*self, LowMild | LowSevere)
+	}
+
+	/// Checks if the data point is labeled as an outlier
+	pub fn is_outlier(&self) -> bool {
+		!matches!(*self, NotAnOutlier)
+	}
+
+	/// Checks if the data point is labeled as a "severe" outlier
+	pub fn is_severe(&self) -> bool {
+		matches!(*self, HighSevere | LowSevere)
+	}
+}
+
+/// Classifies the sample, and returns a labeled sample.
+///
+/// - Time: `O(N log N) where N = length`
+pub fn classify<A>(sample: &Sample<A>) -> LabeledSample<'_, A>
+where
+	A: Float,
+	usize: cast::From<A, Output = Result<usize, cast::Error>>,
+{
+	let (q1, _, q3) = sample.percentiles().quartiles();
+	let iqr = q3 - q1;
+
+	// Mild
+	let k_m = A::cast(1.5_f32);
+	// Severe
+	let k_s = A::cast(3);
+
+	LabeledSample {
+		fences: (
+			q1 - k_s * iqr,
+			q1 - k_m * iqr,
+			q3 + k_m * iqr,
+			q3 + k_s * iqr,
+		),
+		sample,
+	}
+}
diff --git a/client/test/src/criterion/stats/univariate/percentiles.rs b/client/test/src/criterion/stats/univariate/percentiles.rs
new file mode 100644
index 00000000..49583f5e
--- /dev/null
+++ b/client/test/src/criterion/stats/univariate/percentiles.rs
@@ -0,0 +1,72 @@
+use alloc::boxed::Box;
+
+use cast::usize;
+
+use super::super::float::Float;
+
+/// A "view" into the percentiles of a sample
+pub struct Percentiles<A>(Box<[A]>)
+where
+	A: Float;
+
+// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module
+impl<A> Percentiles<A>
+where
+	A: Float,
+	usize: cast::From<A, Output = Result<usize, cast::Error>>,
+{
+	/// Returns the percentile at `p`%
+	///
+	/// Safety:
+	///
+	/// - Make sure that `p` is in the range `[0, 100]`
+	unsafe fn at_unchecked(&self, p: A) -> A {
+		unsafe {
+			let hundred = A::cast(100);
+			debug_assert!(p >= A::cast(0) && p <= hundred);
+			debug_assert!(!self.0.is_empty());
+			let len = self.0.len() - 1;
+
+			if p == hundred {
+				self.0[len]
+			} else {
+				let rank = (p / hundred) * A::cast(len);
+				let integer = rank.floor();
+				let fraction = rank - integer;
+				let n = usize(integer).unwrap();
+				let &floor = self.0.get_unchecked(n);
+				let &ceiling = self.0.get_unchecked(n + 1);
+
+				floor + (ceiling - floor) * fraction
+			}
+		}
+	}
+
+	/// Returns the percentile at `p`%
+	///
+	/// # Panics
+	///
+	/// Panics if `p` is outside the closed `[0, 100]` range
+	pub fn at(&self, p: A) -> A {
+		let zero = A::cast(0);
+		let hundred = A::cast(100);
+
+		assert!(p >= zero && p <= hundred);
+		assert!(!self.0.is_empty());
+		unsafe { self.at_unchecked(p) }
+	}
+
+	/// Returns the 50th percentile
+	pub fn median(&self) -> A {
+		self.at(A::cast(50))
+	}
+
+	/// Returns the 25th, 50th and 75th percentiles
+	pub fn quartiles(&self) -> (A, A, A) {
+		(
+			self.at(A::cast(25)),
+			self.at(A::cast(50)),
+			self.at(A::cast(75)),
+		)
+	}
+}
diff --git a/client/test/src/criterion/stats/univariate/resamples.rs b/client/test/src/criterion/stats/univariate/resamples.rs
new file mode 100644
index 00000000..08916db5
--- /dev/null
+++ b/client/test/src/criterion/stats/univariate/resamples.rs
@@ -0,0 +1,60 @@
+use alloc::vec::Vec;
+use core::mem;
+
+use super::super::float::Float;
+use super::super::rand_util::{Rng, new_rng};
+use super::super::univariate::Sample;
+
+pub struct Resamples<'a, A>
+where
+	A: Float,
+{
+	rng: Rng,
+	sample: &'a [A],
+	stage: Option<Vec<A>>,
+}
+
+impl<'a, A> Resamples<'a, A>
+where
+	A: 'a + Float,
+{
+	pub fn new(sample: &'a Sample<A>) -> Self {
+		let slice = sample;
+
+		Resamples {
+			rng: new_rng(),
+			sample: slice,
+			stage: None,
+		}
+	}
+
+	pub fn next(&mut self) -> &Sample<A> {
+		let n = self.sample.len();
+		let rng = &mut self.rng;
+
+		match self.stage {
+			None => {
+				let mut stage = Vec::with_capacity(n);
+
+				for _ in 0..n {
+					let idx = rng.rand_range(0u64..(self.sample.len() as u64));
+					stage.push(self.sample[idx as usize]);
+				}
+
+				self.stage = Some(stage);
+			}
+			Some(ref mut stage) => {
+				for elem in stage.iter_mut() {
+					let idx = rng.rand_range(0u64..(self.sample.len() as u64));
+					*elem = self.sample[idx as usize];
+				}
+			}
+		}
+
+		if let Some(ref v) = self.stage {
+			unsafe { mem::transmute::<&[A], &Sample<A>>(v) }
+		} else {
+			unreachable!();
+		}
+	}
+}
diff --git a/client/test/src/criterion/stats/univariate/sample.rs b/client/test/src/criterion/stats/univariate/sample.rs
new file mode 100644
index 00000000..ee05a97b
--- /dev/null
+++ b/client/test/src/criterion/stats/univariate/sample.rs
@@ -0,0 +1,178 @@
+use alloc::vec::Vec;
+use core::{mem, ops};
+
+use super::super::float::Float;
+use super::super::sum;
+use super::super::tuple::{Tuple, TupledDistributionsBuilder};
+use super::super::univariate::{Percentiles, Resamples};
+
+/// A collection of data points drawn from a population
+///
+/// Invariants:
+///
+/// - The sample contains at least 2 data points
+/// - The sample contains no `NaN`s
+#[repr(transparent)]
+pub struct Sample<A>([A]);
+
+// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module
+impl<A> Sample<A>
+where
+	A: Float,
+{
+	/// Creates a new sample from an existing slice
+	///
+	/// # Panics
+	///
+	/// Panics if `slice` contains any `NaN` or if `slice` has less than two
+	/// elements
+	pub fn new(slice: &[A]) -> &Self {
+		assert!(slice.len() > 1 && slice.iter().all(|x| !x.is_nan()));
+		unsafe { mem::transmute(slice) }
+	}
+
+	/// Returns the arithmetic average of the sample
+	///
+	/// - Time: `O(length)`
+	pub fn mean(&self) -> A {
+		let n = self.len();
+
+		self.sum() / A::cast(n)
+	}
+
+	/// Returns the median absolute deviation
+	///
+	/// The `median` can be optionally passed along to speed up (2X) the
+	/// computation
+	///
+	/// - Time: `O(length)`
+	/// - Memory: `O(length)`
+	pub fn median_abs_dev(&self, median: Option<A>) -> A
+	where
+		usize: cast::From<A, Output = Result<usize, cast::Error>>,
+	{
+		let median = median.unwrap_or_else(|| self.percentiles().median());
+
+		// NB Although this operation can be SIMD accelerated, the gain is negligible
+		// because the bottle neck is the sorting operation which is part of the
+		// computation of the median
+		let abs_devs = self.iter().map(|&x| (x - median).abs()).collect::<Vec<_>>();
+
+		let abs_devs: &Self = Self::new(&abs_devs);
+
+		abs_devs.percentiles().median() * A::cast(1.4826)
+	}
+
+	/// Returns a "view" into the percentiles of the sample
+	///
+	/// This "view" makes consecutive computations of percentiles much faster
+	/// (`O(1)`)
+	///
+	/// - Time: `O(N log N) where N = length`
+	/// - Memory: `O(length)`
+	pub fn percentiles(&self) -> Percentiles<A>
+	where
+		usize: cast::From<A, Output = Result<usize, cast::Error>>,
+	{
+		use core::cmp::Ordering;
+
+		// NB This function assumes that there are no `NaN`s in the sample
+		fn cmp<T>(a: &T, b: &T) -> Ordering
+		where
+			T: PartialOrd,
+		{
+			match a.partial_cmp(b) {
+				Some(o) => o,
+				// Arbitrary way to handle NaNs that should never happen
+				None => Ordering::Equal,
+			}
+		}
+
+		let mut v = self.to_vec().into_boxed_slice();
+		v.sort_unstable_by(cmp);
+		unsafe { mem::transmute(v) }
+	}
+
+	/// Returns the standard deviation of the sample
+	///
+	/// The `mean` can be optionally passed along to speed up (2X) the
+	/// computation
+	///
+	/// - Time: `O(length)`
+	pub fn std_dev(&self, mean: Option<A>) -> A {
+		self.var(mean).sqrt()
+	}
+
+	/// Returns the sum of all the elements of the sample
+	///
+	/// - Time: `O(length)`
+	pub fn sum(&self) -> A {
+		sum(self)
+	}
+
+	/// Returns the t score between these two samples
+	///
+	/// - Time: `O(length)`
+	pub fn t(&self, other: &Self) -> A {
+		let (x_bar, y_bar) = (self.mean(), other.mean());
+		let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar)));
+		let n_x = A::cast(self.len());
+		let n_y = A::cast(other.len());
+		let num = x_bar - y_bar;
+		let den = (s2_x / n_x + s2_y / n_y).sqrt();
+
+		num / den
+	}
+
+	/// Returns the variance of the sample
+	///
+	/// The `mean` can be optionally passed along to speed up (2X) the
+	/// computation
+	///
+	/// - Time: `O(length)`
+	pub fn var(&self, mean: Option<A>) -> A {
+		use core::ops::Add;
+
+		let mean = mean.unwrap_or_else(|| self.mean());
+		let slice = self;
+
+		let sum = slice
+			.iter()
+			.map(|&x| (x - mean).powi(2))
+			.fold(A::cast(0), Add::add);
+
+		sum / A::cast(slice.len() - 1)
+	}
+
+	// TODO Remove the `T` parameter in favor of `S::Output`
+	/// Returns the bootstrap distributions of the parameters estimated by the
+	/// 1-sample statistic
+	///
+	/// - Multi-threaded
+	/// - Time: `O(nresamples)`
+	/// - Memory: `O(nresamples)`
+	pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions
+	where
+		S: Fn(&Self) -> T + Sync,
+		T: Tuple + Send,
+		T::Distributions: Send,
+		T::Builder: Send,
+	{
+		let mut resamples = Resamples::new(self);
+		(0..nresamples)
+			.map(|_| statistic(resamples.next()))
+			.fold(T::Builder::new(0), |mut sub_distributions, sample| {
+				sub_distributions.push(sample);
+				sub_distributions
+			})
+			.complete()
+	}
+}
+
+impl<A> ops::Deref for Sample<A> {
+	type Target = [A];
+
+	fn deref(&self) -> &[A] {
+		&self.0
+	}
+}
diff --git a/client/test/src/lib.rs b/client/test/src/lib.rs
index 57e40cd0..efaddb7a 100644
--- a/client/test/src/lib.rs
+++ b/client/test/src/lib.rs
@@ -1,8 +1,20 @@
+#![cfg_attr(target_feature = "atomics", feature(thread_local))]
+
+extern crate alloc;
+
+mod criterion;
+mod time;
+
+use core::ops::Deref;
+/// TODO: `no_std` support
 use std::panic::{self, PanicHookInfo};
 use std::sync::Once;
 
-pub use js_bindgen_test_macro::test;
+pub use criterion::Criterion;
+pub use js_bindgen_test_macro::{bench, test};
 use js_sys::{JsString, js_sys};
+use once_cell::unsync::Lazy;
+pub use time::{Instant, SystemTime, UNIX_EPOCH};
 
 #[js_sys]
 extern "js-sys" {
@@ -39,3 +51,62 @@ pub fn set_panic_hook() {
 		}));
 	});
 }
+
+pub mod console {
+	use js_sys::{JsString, js_sys};
+
+	#[js_sys(namespace = "console")]
+	extern "js-sys" {
+		pub fn log(data: &JsString);
+		pub fn error(data: &JsString);
+	}
+
+	#[macro_export]
+	macro_rules! console_log {
+        ($($t:tt)*) => (
+            $crate::console::error(
+                &format_args!($($t)*).to_string().as_str().into()
+            )
+        )
+    }
+
+	#[macro_export]
+	macro_rules! console_error {
+        ($($t:tt)*) => (
+            $crate::console::error(
+                &format_args!($($t)*).to_string().as_str().into()
+            )
+        )
+    }
+}
+
+/* TODO: Move the following code into `xxx-shared` crate. */
+
+pub(crate) struct ThreadLocalWrapper<T>(pub(crate) T);
+
+#[cfg(not(target_feature = "atomics"))]
+// SAFETY: In wasm targets without atomics there is no cross-thread access, so
+// treating this wrapper as `Sync` is equivalent to thread-local usage.
+unsafe impl<T> Sync for ThreadLocalWrapper<T> {}
+
+#[cfg(not(target_feature = "atomics"))]
+// SAFETY: In wasm targets without atomics there is no cross-thread transfer, so
+// treating this wrapper as `Send` is equivalent to thread-local usage.
+unsafe impl<T> Send for ThreadLocalWrapper<T> {}
+
+/// Wrapper around [`Lazy`] adding `Send + Sync` when `atomics` is not enabled.
+pub(crate) struct LazyCell<T, F = fn() -> T>(ThreadLocalWrapper<Lazy<T, F>>);
+
+impl<T, F> LazyCell<T, F> {
+	pub const fn new(init: F) -> Self {
+		Self(ThreadLocalWrapper(Lazy::new(init)))
+	}
+}
+
+impl<T> Deref for LazyCell<T> {
+	type Target = T;
+
+	fn deref(&self) -> &T {
+		Lazy::force(&self.0.0)
+	}
+}
diff --git a/client/test/src/time/instant.rs b/client/test/src/time/instant.rs
new file mode 100644
index 00000000..9cd2b9b5
--- /dev/null
+++ b/client/test/src/time/instant.rs
@@ -0,0 +1,133 @@
+//! Re-implementation of [`std::time::Instant`].
+//!
+//! See <https://github.com/rust-lang/rust/blob/1.83.0/library/std/src/time.rs#L271-L468>.
+
+use core::ops::Sub;
+use core::time::Duration;
+
+use super::performance;
+
+/// See [`std::time::Instant`].
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct Instant(Duration);
+
+impl Instant {
+	/// See [`std::time::Instant::now()`].
+	///
+	/// # Panics
+	///
+	/// This call will panic if the [`Performance` object] was not found, e.g.
+	/// calling from a [worklet].
+	///
+	/// [`Performance` object]: https://developer.mozilla.org/en-US/docs/Web/API/performance_property
+	/// [worklet]: https://developer.mozilla.org/en-US/docs/Web/API/Worklet
+	#[must_use]
+	pub fn now() -> Self {
+		#[cfg(not(target_feature = "atomics"))]
+		let now = performance().now();
+		#[cfg(target_feature = "atomics")]
+		let now = performance().time_origin();
+
+		assert!(
+			now.is_sign_positive(),
+			"negative `DOMHighResTimeStamp`s are not supported"
+		);
+		Self(time_stamp_to_duration(now))
+	}
+
+	/// See [`std::time::Instant::duration_since()`].
+	#[must_use]
+	pub fn duration_since(&self, earlier: Self) -> Duration {
+		self.checked_duration_since(earlier).unwrap_or_default()
+	}
+
+	/// See [`std::time::Instant::checked_duration_since()`].
+	#[must_use]
+	pub fn checked_duration_since(&self, earlier: Self) -> Option<Duration> {
+		self.0.checked_sub(earlier.0)
+	}
+
+	/// See [`std::time::Instant::elapsed()`].
+	#[must_use]
+	pub fn elapsed(&self) -> Duration {
+		Self::now() - *self
+	}
+}
+
+impl Sub<Self> for Instant {
+	type Output = Duration;
+
+	/// Returns the amount of time elapsed from another instant to this one,
+	/// or zero duration if that instant is later than this one.
+	fn sub(self, rhs: Self) -> Duration {
+		self.duration_since(rhs)
+	}
+}
+
+/// Converts a `DOMHighResTimeStamp` to a [`Duration`].
+///
+/// # Note
+///
+/// Keep in mind that like [`Duration::from_secs_f64()`] this doesn't do perfect
+/// rounding.
+#[expect(clippy::pedantic, reason = "checked")]
+fn time_stamp_to_duration(time_stamp: f64) -> Duration {
+	let time_stamp = F64(time_stamp);
+
+	Duration::from_millis(time_stamp.trunc() as u64)
+		+ Duration::from_nanos(F64(time_stamp.fract() * 1.0e6).internal_round_ties_even() as u64)
+}
+
+/// [`f64`] `no_std` compatibility wrapper.
+#[derive(Clone, Copy)]
+struct F64(f64);
+
+impl F64 {
+	/// See [`f64::trunc()`].
+	fn trunc(self) -> f64 {
+		libm::trunc(self.0)
+	}
+
+	/// See [`f64::fract()`].
+	fn fract(self) -> f64 {
+		self.0 - self.trunc()
+	}
+
+	/// A specialized version of [`f64::round_ties_even()`]. [`f64`] must be
+	/// positive and have an exponent smaller than `52`.
+	///
+	/// - We expect `DOMHighResTimeStamp` to always be positive. We check that
+	///   in [`Instant::now()`].
+	/// - We only round the fractional part after multiplying it by `1e6`. A
+	///   fraction always has a negative exponent. `1e6` has an exponent of
+	///   `19`. Therefor the resulting exponent can at most be `19`.
+	///
+	/// [`f64::round_ties_even()`]: https://doc.rust-lang.org/1.83.0/std/primitive.f64.html#method.round_ties_even
+	fn internal_round_ties_even(self) -> f64 {
+		/// Put `debug_assert!` in a function to clap `coverage(off)` on it.
+		///
+		/// See <https://github.com/rust-lang/rust/issues/80549>.
+		fn check(this: f64) {
+			debug_assert!(this.is_sign_positive(), "found negative input");
+			debug_assert!(
+				{
+					let exponent: u64 = this.to_bits() >> 52 & 0x7ff;
+					exponent < 0x3ff + 52
+				},
+				"found number with exponent bigger than 51"
+			);
+		}
+
+		check(self.0);
+
+		// See <https://github.com/rust-lang/libm/blob/libm-v0.2.11/src/math/rint.rs>.
+
+		let one_over_e = 1.0 / f64::EPSILON;
+		// REMOVED: We don't support numbers with exponents bigger than 51.
+		// REMOVED: We don't support negative numbers.
+		// REMOVED: We don't support numbers with exponents bigger than 51.
+		let xplusoneovere = self.0 + one_over_e;
+		xplusoneovere - one_over_e
+		// REMOVED: We don't support negative numbers.
+	}
+}
diff --git a/client/test/src/time/mod.rs b/client/test/src/time/mod.rs
new file mode 100644
index 00000000..51c24604
--- /dev/null
+++ b/client/test/src/time/mod.rs
@@ -0,0 +1,36 @@
+mod instant;
+mod system_time;
+
+pub use instant::Instant;
+use js_sys::{js_bindgen, js_sys};
+pub use system_time::SystemTime;
+pub const UNIX_EPOCH: SystemTime = SystemTime::UNIX_EPOCH;
+
+#[js_sys]
+extern "js-sys" {
+	pub type Performance;
+
+	#[js_sys(js_embed = "performance")]
+	pub fn performance() -> Performance;
+
+	#[js_sys(js_name = "now")]
+	pub fn now(self: &Performance) -> f64;
+
+	#[cfg(target_feature = "atomics")]
+	#[js_sys(property, js_name = "timeOrigin")]
+	pub fn time_origin(self: &Performance) -> f64;
+}
+
+#[js_sys(namespace = "Date")]
+extern "js-sys" {
+	#[js_sys(js_name = "now")]
+	pub fn date_now() -> f64;
+}
+
+js_bindgen::embed_js!(
+	module = "js_bindgen_test",
+	name = "performance",
+	"() => {{
+        return globalThis.performance
+    }}"
+);
diff --git a/client/test/src/time/system_time.rs b/client/test/src/time/system_time.rs
new file mode 100644
index 00000000..6de4604f
--- /dev/null
+++ b/client/test/src/time/system_time.rs
@@ -0,0 +1,36 @@
+//! Re-implementation of [`std::time::SystemTime`].
+//!
+//! See <https://github.com/rust-lang/rust/blob/1.83.0/library/std/src/time.rs#L470-L707>.
+
+use core::time::Duration;
+
+use super::date_now;
+
+/// See [`std::time::SystemTime`].
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct SystemTime(pub(crate) Duration);
+
+impl SystemTime {
+	/// See [`std::time::SystemTime::UNIX_EPOCH`].
+	pub const UNIX_EPOCH: Self = Self(Duration::ZERO);
+
+	/// See [`std::time::SystemTime::now()`].
+	#[must_use]
+	pub fn now() -> Self {
+		#[expect(clippy::cast_possible_truncation, reason = "checked")]
+		let ms = date_now() as i64;
+		let ms = ms.try_into().expect("found negative timestamp");
+
+		Self(Duration::from_millis(ms))
+	}
+
+	/// See [`std::time::SystemTime::duration_since()`].
+	pub fn duration_since(&self, earlier: Self) -> Result<Duration, SystemTimeError> {
+		// See <https://github.com/rust-lang/rust/blob/1.83.0/library/std/src/sys/pal/unsupported/time.rs#L34-L36>.
+		self.0.checked_sub(earlier.0).ok_or(SystemTimeError)
+	}
+}
+
+/// See [`std::time::SystemTimeError`].
+#[derive(Clone, Debug)]
+pub struct SystemTimeError;
diff --git a/host/runner/src/js/shared.mjs b/host/runner/src/js/shared.mjs
index 8600ba8a..74cd304f 100644
--- a/host/runner/src/js/shared.mjs
+++ b/host/runner/src/js/shared.mjs
@@ -66,14 +66,6 @@ export async function runTests(module, report) {
         }
         const testFn = instance.exports[test.importName];
         let result;
-        if (test.shouldPanic) {
-            report(0 /* Stream.Stdout */, [
-                { text: `test ${test.name} - should panic ... `, color: 0 /* Color.Default */ },
-            ]);
-        }
-        else {
-            report(0 /* Stream.Stdout */, [testText]);
-        }
         interceptFlag = true;
         try {
             testFn();
@@ -83,6 +75,14 @@ export async function runTests(module, report) {
             result = { success: false, stack: error.stack };
         }
         interceptFlag = false;
+        if (test.shouldPanic) {
+            report(0 /* Stream.Stdout */, [
+                { text: `test ${test.name} - should panic ... `, color: 0 /* Color.Default */ },
+            ]);
+        }
+        else {
+            report(0 /* Stream.Stdout */, [testText]);
+        }
         if (test.shouldPanic) {
             if (result.success) {
                 report(0 /* Stream.Stdout */, [failedText, newLineText]);
diff --git a/host/runner/src/js/shared.mts b/host/runner/src/js/shared.mts
index 5519371e..4686efb3 100644
--- a/host/runner/src/js/shared.mts
+++ b/host/runner/src/js/shared.mts
@@ -96,14 +96,6 @@ export async function runTests(
 		const testFn = instance.exports[test.importName] as () => void
 		let result: { success: true } | { success: false; stack: string }
 
-		if (test.shouldPanic) {
-			report(Stream.Stdout, [
-				{ text: `test ${test.name} - should panic ... `, color: Color.Default },
-			])
-		} else {
-			report(Stream.Stdout, [testText])
-		}
-
 		interceptFlag = true
 
 		try {
@@ -115,6 +107,14 @@ export async function runTests(
 
 		interceptFlag = false
 
+        if (test.shouldPanic) {
+			report(Stream.Stdout, [
+				{ text: `test ${test.name} - should panic ... `, color: Color.Default },
+			])
+		} else {
+			report(Stream.Stdout, [testText])
+		}
+
 		if (test.shouldPanic) {
 			if (result.success) {
 				report(Stream.Stdout, [failedText, newLineText])
diff --git a/host/runner/src/js/test-data.d.json.ts b/host/runner/src/js/test-data.d.json.ts
index ccd80bcf..d4a84c65 100644
--- a/host/runner/src/js/test-data.d.json.ts
+++ b/host/runner/src/js/test-data.d.json.ts
@@ -1,5 +1,6 @@
 type TestData = {
 	worker: WorkerKind
+    bench: boolean,
 	noCapture: boolean
 	filteredCount: number
 	tests: TestEntry[]
diff --git a/host/runner/src/main.rs b/host/runner/src/main.rs
index e388c34c..1432055d 100644
--- a/host/runner/src/main.rs
+++ b/host/runner/src/main.rs
@@ -45,6 +45,9 @@ struct Cli {
 	/// don't capture `console.*()` of each task, allow printing directly.
 	#[arg(long, alias = "nocapture")]
 	no_capture: bool,
+	/// Run benchmarks.
+	#[arg(long)]
+	bench: bool,
 	/// Configure formatting of output.
 	#[arg(long, value_enum)]
 	format: Option<FormatSetting>,
@@ -81,32 +84,41 @@ fn main() -> Result<()> {
 		.with_context(|| format!("failed to read Wasm file: {}", wasm_path.display()))?;
 	let args = TestArgs::new(cli);
 
-	let (tests, filtered_count) = TestEntry::read(
+	let (tests, test_filtered_count) = TestEntry::read_tests(
 		&wasm_bytes,
 		args.filter.as_ref(),
 		args.ignored_only,
 		args.exact,
 	)?;
 
+	let (benches, bench_filtered_count) =
+		TestEntry::read_benches(&wasm_bytes, args.filter.as_ref(), args.exact)?;
+
 	if args.list_only {
 		match args.list_format {
 			Some(FormatSetting::Terse) => {
 				for test in &tests {
 					println!("{}: test", test.name);
 				}
+				for bench in &benches {
+					println!("{}: benchmark", bench.name);
+				}
 			}
 			None => {
 				for test in &tests {
 					println!("{}: test", test.name);
 				}
+				for bench in &benches {
+					println!("{}: benchmark", bench.name);
+				}
 				println!();
-				println!("{} tests, 0 benchmarks", tests.len());
+				println!("{} tests, {} benchmarks", tests.len(), benches.len());
 			}
 		}
 		return Ok(());
 	}
 
-	if tests.is_empty() {
+	if !args.bench && tests.is_empty() || args.bench && benches.is_empty() {
 		const GREEN: &str = "\u{001b}[32m";
 		const RESET: &str = "\u{001b}[0m";
 
@@ -115,16 +127,29 @@ fn main() -> Result<()> {
 		println!();
 		println!(
 			"test result: {GREEN}ok{RESET}. 0 passed; 0 failed; 0 ignored; 0 measured; \
-			 {filtered_count} filtered out; finished in 0.00s"
+			 {bench_filtered_count} filtered out; finished in 0.00s"
 		);
 		println!();
 		return Ok(());
 	}
 
+	let filtered_count = if args.bench {
+		bench_filtered_count
+	} else {
+		test_filtered_count
+	};
+
+    let tests = if args.bench {
+        benches
+    } else {
+        tests
+    };
+
 	// The JS file has the same name, just a different file extension.
 	let imports_path = wasm_path.with_extension("mjs");
 	let test_data = TestData {
-		no_capture: args.no_capture,
+        bench: args.bench,
+		no_capture: args.no_capture || args.bench,
 		filtered_count,
 		tests,
 	};
@@ -154,6 +179,7 @@ struct TestArgs {
 	list_format: Option<FormatSetting>,
 	ignored_only: bool,
 	exact: bool,
+	bench: bool,
 }
 
 impl TestArgs {
@@ -165,6 +191,7 @@ impl TestArgs {
 			list_format: cli.format,
 			ignored_only: cli.ignored,
 			exact: cli.exact,
+			bench: cli.bench,
 		}
 	}
 }
@@ -172,6 +199,7 @@ impl TestArgs {
 #[derive(Serialize)]
 #[serde(rename_all = "camelCase")]
 struct TestData {
+    bench: bool,
 	no_capture: bool,
 	filtered_count: usize,
 	tests: Vec<TestEntry>,
@@ -193,7 +221,67 @@ enum TestAttr {
 }
 
 impl TestEntry {
-	fn read(
+	fn read_benches(
+		wasm_bytes: &[u8],
+		filter: &[String],
+		exact: bool,
+	) -> Result<(Vec<Self>, usize)> {
+		let mut tests = Vec::new();
+		let mut total = 0;
+
+		for payload in WasmParser::new(0).parse_all(wasm_bytes) {
+			if let Payload::CustomSection(section) = payload?
+				&& section.name() == "js_bindgen.bench"
+			{
+				let mut data = section.data();
+
+				while !data.is_empty() {
+					let len = u32::from_le_bytes(
+						data.split_off(..4)
+							.context("invalid test encoding")?
+							.try_into()?,
+					) as usize;
+					let data = data.split_off(..len).context("invalid test encoding")?;
+
+					let import_name = str::from_utf8(data)?;
+					let name = import_name
+						.split_once("::")
+						.unwrap_or_else(|| panic!("unexpected test name: {import_name}"))
+						.1;
+
+					total += 1;
+
+					let matches_filter = filter.is_empty()
+						|| filter.iter().any(|filter| {
+							if exact {
+								filter == name
+							} else {
+								name.contains(filter)
+							}
+						});
+
+					if matches_filter {
+						tests.push(Self {
+							name: name.to_string(),
+							import_name: import_name.to_string(),
+							ignore: TestAttr::None,
+							should_panic: TestAttr::None,
+						});
+					}
+				}
+
+				// Section with the same name can never appear again.
+				break;
+			}
+		}
+
+		tests.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+		let filtered_count = total - tests.len();
+
+		Ok((tests, filtered_count))
+	}
+
+	fn read_tests(
 		wasm_bytes: &[u8],
 		filter: &[String],
 		ignored_only: bool,
diff --git a/host/test-macro/src/lib.rs b/host/test-macro/src/lib.rs
index 3c94b56b..165ff938 100644
--- a/host/test-macro/src/lib.rs
+++ b/host/test-macro/src/lib.rs
@@ -15,6 +15,16 @@ enum TestAttribute {
 	WithText(String),
 }
 
+#[proc_macro_attribute]
+pub fn bench(
+	attr: proc_macro::TokenStream,
+	item: proc_macro::TokenStream,
+) -> proc_macro::TokenStream {
+	bench_internal(attr.into(), item.into())
+		.unwrap_or_else(Error::into_compile_error)
+		.into()
+}
+
 #[proc_macro_attribute]
 pub fn test(
 	attr: proc_macro::TokenStream,
@@ -25,7 +35,7 @@ pub fn test(
 		.into()
 }
 
-fn test_internal(attr: TokenStream, item: TokenStream) -> Result<TokenStream> {
+fn parse_crate(attr: TokenStream) -> Result<Path> {
 	let mut crate_: Option<Path> = None;
 
 	meta::parser(|meta| {
@@ -44,6 +54,68 @@ fn test_internal(attr: TokenStream, item: TokenStream) -> Result<TokenStream> {
 
 	let crate_ = crate_.unwrap_or_else(|| parse_quote!(::js_bindgen_test));
 
+	Ok(crate_)
+}
+
+fn bench_internal(attr: TokenStream, item: TokenStream) -> Result<TokenStream> {
+	let crate_ = parse_crate(attr)?;
+	let function: ItemFn = syn::parse2(item)?;
+
+	if let Some(asyncness) = function.sig.asyncness {
+		return Err(Error::new_spanned(
+			asyncness,
+			"`async` benchmark not supported",
+		));
+	}
+
+	if let ReturnType::Type(..) = function.sig.output {
+		return Err(Error::new_spanned(
+			function.sig.output,
+			"benchmark with return value not supported",
+		));
+	}
+
+	let ident = &function.sig.ident;
+	let foreign_bench = quote! {
+		::core::concat!(::core::module_path!(), "::", ::core::stringify!(#ident))
+	};
+
+	Ok(quote! {
+		#function
+
+		const _: () = {
+			const TEST: &::core::primitive::str = #foreign_bench;
+			const TEST_LEN: ::core::primitive::usize = ::core::primitive::str::len(TEST);
+			const TEST_PTR: *const ::core::primitive::u8 = ::core::primitive::str::as_ptr(TEST);
+			const TEST_ARR: [::core::primitive::u8; TEST_LEN] = unsafe { *(TEST_PTR as *const _) };
+
+
+			const LEN_ARR: [::core::primitive::u8; 4] = ::core::primitive::u32::to_le_bytes(TEST_LEN as u32);
+
+			#[repr(C)]
+			struct Layout(
+				[::core::primitive::u8; 4],
+				[::core::primitive::u8; TEST_LEN],
+			);
+
+			#[unsafe(link_section = "js_bindgen.bench")]
+			static CUSTOM_SECTION: Layout = Layout(LEN_ARR, TEST_ARR);
+		};
+
+		const _: () = {
+			#[unsafe(export_name = #foreign_bench)]
+			extern "C" fn __jbg_bench() {
+				#crate_::set_panic_hook();
+				let mut bencher = Criterion::default()
+					.with_location(file!(), module_path!());
+				#ident(&mut bencher);
+			}
+		};
+	})
+}
+
+fn test_internal(attr: TokenStream, item: TokenStream) -> Result<TokenStream> {
+	let crate_ = parse_crate(attr)?;
 	let mut function: ItemFn = syn::parse2(item)?;
 	let span = function.span();
 	let mut ignore = TestAttribute::None;