From 4fdfbbb02bca8696f53a72d57aaf1f02c658c572 Mon Sep 17 00:00:00 2001
From: Boshen <boshenc@gmail.com>
Date: Sat, 16 Mar 2024 16:07:11 +0800
Subject: [PATCH] codspeed

---
 .github/workflows/ci.yaml       |   3 +
 Cargo.toml                      |   3 +
 src/analysis/compare.rs         |   3 +-
 src/analysis/mod.rs             |   3 +-
 src/benchmark_group.rs          |   7 +-
 src/codspeed/bencher.rs         | 260 +++++++++
 src/codspeed/benchmark_group.rs | 174 ++++++
 src/codspeed/criterion.rs       | 158 ++++++
 src/codspeed/mod.rs             |   3 +
 src/criterion.rs                | 898 +++++++++++++++++++++++++++++++
 src/lib.rs                      | 920 ++------------------------------
 src/macros_codspeed.rs          |  44 ++
 src/routine.rs                  |   9 +-
 13 files changed, 1590 insertions(+), 895 deletions(-)
 create mode 100644 src/codspeed/bencher.rs
 create mode 100644 src/codspeed/benchmark_group.rs
 create mode 100644 src/codspeed/criterion.rs
 create mode 100644 src/codspeed/mod.rs
 create mode 100644 src/criterion.rs
 create mode 100644 src/macros_codspeed.rs

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 60dd573..e5f5744 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -49,6 +49,9 @@ jobs:
       - name: Test with minimal features
         run: cargo test --no-default-features
 
+      - name: Check codspeed
+        run: cargo check --features codspeed
+
       - name: Check for non-standard formatting
         if: ${{ matrix.rust == 'stable' }}
         run: cargo fmt --all -- --check
diff --git a/Cargo.toml b/Cargo.toml
index 09484a5..42c3fd5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -64,6 +64,8 @@ plotters = { version = "^0.3.5", default-features = false, features = [
   "area_series",
   "line_series",
 ], optional = true }
+codspeed = { version = "2.4.0", optional = true }
+colored = { version = "2.0.0", optional = true }
 
 [dev-dependencies]
 tempfile = "3.10.1"
@@ -82,6 +84,7 @@ stable = [
   "async_std",
 ]
 default = ["rayon", "plotters", "cargo_bench_support"]
+codspeed = ["dep:codspeed", "dep:colored"]
 
 # Enable use of the nightly-only test::black_box function to discourage compiler optimizations.
 real_blackbox = []
diff --git a/src/analysis/compare.rs b/src/analysis/compare.rs
index 53e570f..408fe53 100644
--- a/src/analysis/compare.rs
+++ b/src/analysis/compare.rs
@@ -3,13 +3,14 @@ use crate::stats::univariate::{self, mixed};
 use crate::stats::Distribution;
 
 use crate::benchmark::BenchmarkConfig;
+use crate::criterion::Criterion;
 use crate::error::Result;
 use crate::estimate::{
     build_change_estimates, ChangeDistributions, ChangeEstimates, ChangePointEstimates, Estimates,
 };
 use crate::measurement::Measurement;
 use crate::report::BenchmarkId;
-use crate::{fs, Criterion, SavedSample};
+use crate::{fs, SavedSample};
 
 // Common comparison procedure
 #[cfg_attr(feature = "cargo-clippy", allow(clippy::type_complexity))]
diff --git a/src/analysis/mod.rs b/src/analysis/mod.rs
index 5fe4fcc..87601a1 100644
--- a/src/analysis/mod.rs
+++ b/src/analysis/mod.rs
@@ -8,6 +8,7 @@ use crate::stats::{Distribution, Tails};
 
 use crate::benchmark::BenchmarkConfig;
 use crate::connection::OutgoingMessage;
+use crate::criterion::Criterion;
 use crate::estimate::{
     build_estimates, ConfidenceInterval, Distributions, Estimate, Estimates, PointEstimates,
 };
@@ -15,7 +16,7 @@ use crate::fs;
 use crate::measurement::Measurement;
 use crate::report::{BenchmarkId, Report, ReportContext};
 use crate::routine::Routine;
-use crate::{Baseline, Criterion, SavedSample, Throughput};
+use crate::{Baseline, SavedSample, Throughput};
 
 macro_rules! elapsed {
     ($msg:expr, $block:expr) => {{
diff --git a/src/benchmark_group.rs b/src/benchmark_group.rs
index 3c3b67c..06c424b 100644
--- a/src/benchmark_group.rs
+++ b/src/benchmark_group.rs
@@ -1,13 +1,16 @@
+use std::time::Duration;
+
 use crate::analysis;
+use crate::bencher::Bencher;
 use crate::benchmark::PartialBenchmarkConfig;
 use crate::connection::OutgoingMessage;
+use crate::criterion::Criterion;
 use crate::measurement::Measurement;
 use crate::report::BenchmarkId as InternalBenchmarkId;
 use crate::report::Report;
 use crate::report::ReportContext;
 use crate::routine::{Function, Routine};
-use crate::{Bencher, Criterion, Mode, PlotConfiguration, SamplingMode, Throughput};
-use std::time::Duration;
+use crate::{Mode, PlotConfiguration, SamplingMode, Throughput};
 
 /// Structure used to group together a set of related benchmarks, along with custom configuration
 /// settings for groups of benchmarks. All benchmarks performed using a benchmark group will be
diff --git a/src/codspeed/bencher.rs b/src/codspeed/bencher.rs
new file mode 100644
index 0000000..a42f8d2
--- /dev/null
+++ b/src/codspeed/bencher.rs
@@ -0,0 +1,260 @@
+#[cfg(feature = "async")]
+use std::future::Future;
+use std::{cell::RefCell, rc::Rc};
+
+use codspeed::codspeed::{black_box, CodSpeed};
+use colored::Colorize;
+
+#[cfg(feature = "async")]
+use crate::async_executor::AsyncExecutor;
+
+use crate::{BatchSize, Measurement, WallTime};
+
+pub struct Bencher<'a, M: Measurement = WallTime> {
+    codspeed: Rc<RefCell<CodSpeed>>,
+    uri: String,
+    _marker: std::marker::PhantomData<&'a M>,
+}
+
+impl<'a> Bencher<'a> {
+    pub fn new(codspeed: Rc<RefCell<CodSpeed>>, uri: String) -> Self {
+        Bencher { codspeed, uri, _marker: std::marker::PhantomData }
+    }
+
+    #[inline(never)]
+    pub fn iter<O, R>(&mut self, mut routine: R)
+    where
+        R: FnMut() -> O,
+    {
+        let mut codspeed = self.codspeed.borrow_mut();
+        // NOTE: this structure hardens our benchmark against dead code elimination
+        // https://godbolt.org/z/KnYeKMd1o
+        for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 {
+            if i < codspeed::codspeed::WARMUP_RUNS {
+                black_box(routine());
+            } else {
+                codspeed.start_benchmark(self.uri.as_str());
+                black_box(routine());
+                codspeed.end_benchmark();
+            }
+        }
+    }
+
+    #[inline(never)]
+    pub fn iter_custom<R, MV>(&mut self, mut _routine: R)
+    where
+        R: FnMut(u64) -> MV,
+    {
+        println!(
+            "{} {} (CodSpeed doesn't support custom iterations)",
+            "Skipping:".to_string().yellow(),
+            self.uri.yellow(),
+        );
+    }
+
+    #[inline(never)]
+    pub fn iter_batched<I, O, S, R>(&mut self, mut setup: S, mut routine: R, _size: BatchSize)
+    where
+        S: FnMut() -> I,
+        R: FnMut(I) -> O,
+    {
+        let mut codspeed = self.codspeed.borrow_mut();
+
+        for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 {
+            let input = black_box(setup());
+            let output = if i < codspeed::codspeed::WARMUP_RUNS {
+                black_box(routine(input))
+            } else {
+                let input = black_box(setup());
+                codspeed.start_benchmark(self.uri.as_str());
+                let output = black_box(routine(input));
+                codspeed.end_benchmark();
+                output
+            };
+            drop(black_box(output));
+        }
+    }
+
+    pub fn iter_with_setup<I, O, S, R>(&mut self, setup: S, routine: R)
+    where
+        S: FnMut() -> I,
+        R: FnMut(I) -> O,
+    {
+        self.iter_batched(setup, routine, BatchSize::PerIteration);
+    }
+
+    pub fn iter_with_large_drop<O, R>(&mut self, mut routine: R)
+    where
+        R: FnMut() -> O,
+    {
+        self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
+    }
+
+    pub fn iter_with_large_setup<I, O, S, R>(&mut self, setup: S, routine: R)
+    where
+        S: FnMut() -> I,
+        R: FnMut(I) -> O,
+    {
+        self.iter_batched(setup, routine, BatchSize::NumBatches(1));
+    }
+
+    #[inline(never)]
+    pub fn iter_batched_ref<I, O, S, R>(&mut self, mut setup: S, mut routine: R, _size: BatchSize)
+    where
+        S: FnMut() -> I,
+        R: FnMut(&mut I) -> O,
+    {
+        let mut codspeed = self.codspeed.borrow_mut();
+
+        for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 {
+            let mut input = black_box(setup());
+            let output = if i < codspeed::codspeed::WARMUP_RUNS {
+                black_box(routine(&mut input))
+            } else {
+                codspeed.start_benchmark(self.uri.as_str());
+                let output = black_box(routine(&mut input));
+                codspeed.end_benchmark();
+                output
+            };
+            drop(black_box(output));
+            drop(black_box(input));
+        }
+    }
+
+    #[cfg(feature = "async")]
+    pub fn to_async<'b, A: AsyncExecutor>(&'b mut self, runner: A) -> AsyncBencher<'a, 'b, A> {
+        AsyncBencher { b: self, runner }
+    }
+}
+
+#[cfg(feature = "async")]
+pub struct AsyncBencher<'a, 'b, A: AsyncExecutor> {
+    b: &'b mut Bencher<'a>,
+    runner: A,
+}
+
+#[cfg(feature = "async")]
+impl<'a, 'b, A: AsyncExecutor> AsyncBencher<'a, 'b, A> {
+    #[allow(clippy::await_holding_refcell_ref)]
+    #[inline(never)]
+    pub fn iter<O, R, F>(&mut self, mut routine: R)
+    where
+        R: FnMut() -> F,
+        F: Future<Output = O>,
+    {
+        let AsyncBencher { b, runner } = self;
+        runner.block_on(async {
+            let mut codspeed = b.codspeed.borrow_mut();
+            for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 {
+                if i < codspeed::codspeed::WARMUP_RUNS {
+                    black_box(routine().await);
+                } else {
+                    codspeed.start_benchmark(b.uri.as_str());
+                    black_box(routine().await);
+                    codspeed.end_benchmark();
+                }
+            }
+        });
+    }
+
+    #[inline(never)]
+    pub fn iter_custom<R, F, MV>(&mut self, mut _routine: R)
+    where
+        R: FnMut(u64) -> F,
+        F: Future<Output = MV>,
+    {
+        let AsyncBencher { b, .. } = self;
+        println!(
+            "{} {} (CodSpeed doesn't support custom iterations)",
+            "Skipping:".to_string().yellow(),
+            b.uri.yellow(),
+        );
+    }
+
+    #[doc(hidden)]
+    pub fn iter_with_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
+    where
+        S: FnMut() -> I,
+        R: FnMut(I) -> F,
+        F: Future<Output = O>,
+    {
+        self.iter_batched(setup, routine, BatchSize::PerIteration);
+    }
+
+    pub fn iter_with_large_drop<O, R, F>(&mut self, mut routine: R)
+    where
+        R: FnMut() -> F,
+        F: Future<Output = O>,
+    {
+        self.iter_batched(|| (), |_| routine(), BatchSize::SmallInput);
+    }
+
+    #[doc(hidden)]
+    pub fn iter_with_large_setup<I, O, S, R, F>(&mut self, setup: S, routine: R)
+    where
+        S: FnMut() -> I,
+        R: FnMut(I) -> F,
+        F: Future<Output = O>,
+    {
+        self.iter_batched(setup, routine, BatchSize::NumBatches(1));
+    }
+
+    #[allow(clippy::await_holding_refcell_ref)]
+    #[inline(never)]
+    pub fn iter_batched<I, O, S, R, F>(&mut self, mut setup: S, mut routine: R, _size: BatchSize)
+    where
+        S: FnMut() -> I,
+        R: FnMut(I) -> F,
+        F: Future<Output = O>,
+    {
+        let AsyncBencher { b, runner } = self;
+        runner.block_on(async {
+            let mut codspeed = b.codspeed.borrow_mut();
+
+            for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 {
+                let input = black_box(setup());
+                let output = if i < codspeed::codspeed::WARMUP_RUNS {
+                    black_box(routine(input).await)
+                } else {
+                    codspeed.start_benchmark(b.uri.as_str());
+                    let output = black_box(routine(input).await);
+                    codspeed.end_benchmark();
+                    output
+                };
+                drop(black_box(output));
+            }
+        })
+    }
+
+    #[allow(clippy::await_holding_refcell_ref)]
+    #[inline(never)]
+    pub fn iter_batched_ref<I, O, S, R, F>(
+        &mut self,
+        mut setup: S,
+        mut routine: R,
+        _size: BatchSize,
+    ) where
+        S: FnMut() -> I,
+        R: FnMut(&mut I) -> F,
+        F: Future<Output = O>,
+    {
+        let AsyncBencher { b, runner } = self;
+        runner.block_on(async {
+            let mut codspeed = b.codspeed.borrow_mut();
+
+            for i in 0..codspeed::codspeed::WARMUP_RUNS + 1 {
+                let mut input = black_box(setup());
+                let output = if i < codspeed::codspeed::WARMUP_RUNS {
+                    black_box(routine(&mut input).await)
+                } else {
+                    codspeed.start_benchmark(b.uri.as_str());
+                    let output = black_box(routine(&mut input).await);
+                    codspeed.end_benchmark();
+                    output
+                };
+                drop(black_box(output));
+                drop(black_box(input));
+            }
+        });
+    }
+}
diff --git a/src/codspeed/benchmark_group.rs b/src/codspeed/benchmark_group.rs
new file mode 100644
index 0000000..3f3dc50
--- /dev/null
+++ b/src/codspeed/benchmark_group.rs
@@ -0,0 +1,174 @@
+use std::marker::PhantomData;
+use std::{cell::RefCell, rc::Rc, time::Duration};
+
+use codspeed::{codspeed::CodSpeed, utils::get_git_relative_path};
+
+use crate::measurement::WallTime;
+use crate::{measurement::Measurement, PlotConfiguration, SamplingMode, Throughput};
+
+use super::bencher::Bencher;
+use super::criterion::Criterion;
+
+/// Deprecated: using the default measurement will be removed in the next major version.
+/// Defaulting to WallTime differs from the original BenchmarkGroup implementation but avoids creating a breaking change
+pub struct BenchmarkGroup<'a, M: Measurement = WallTime> {
+    codspeed: Rc<RefCell<CodSpeed>>,
+    current_file: String,
+    macro_group: String,
+    group_name: String,
+    _marker: PhantomData<&'a M>,
+}
+
+impl<'a, M: Measurement> BenchmarkGroup<'a, M> {
+    pub fn new(criterion: &mut Criterion<M>, group_name: String) -> BenchmarkGroup<M> {
+        BenchmarkGroup::<M> {
+            codspeed: criterion
+                .codspeed
+                .as_ref()
+                .expect("non instrumented codspeed interface")
+                .clone(),
+            current_file: criterion.current_file.clone(),
+            macro_group: criterion.macro_group.clone(),
+            group_name,
+            _marker: PhantomData,
+        }
+    }
+
+    pub fn bench_function<ID: IntoBenchmarkId, F>(&mut self, id: ID, mut f: F) -> &mut Self
+    where
+        F: FnMut(&mut Bencher),
+    {
+        self.run_bench(id.into_benchmark_id(), &(), |b, _| f(b));
+        self
+    }
+
+    pub fn bench_with_input<ID: IntoBenchmarkId, F, I>(
+        &mut self,
+        id: ID,
+        input: &I,
+        f: F,
+    ) -> &mut Self
+    where
+        F: FnMut(&mut Bencher, &I),
+        I: ?Sized,
+    {
+        self.run_bench(id.into_benchmark_id(), input, f);
+        self
+    }
+
+    fn run_bench<F, I>(&mut self, id: BenchmarkId, input: &I, mut f: F)
+    where
+        F: FnMut(&mut Bencher, &I),
+        I: ?Sized,
+    {
+        let git_relative_file_path = get_git_relative_path(&self.current_file);
+        let mut uri = format!(
+            "{}::{}::{}",
+            git_relative_file_path.to_string_lossy(),
+            self.macro_group,
+            self.group_name,
+        );
+        if let Some(function_name) = id.function_name {
+            uri = format!("{}::{}", uri, function_name);
+        }
+        if let Some(parameter) = id.parameter {
+            uri = format!("{}[{}]", uri, parameter);
+        }
+        let mut b = Bencher::new(self.codspeed.clone(), uri);
+        f(&mut b, input);
+    }
+}
+
+// Dummy methods
+#[allow(unused_variables)]
+impl<'a, M: Measurement> BenchmarkGroup<'a, M> {
+    pub fn sample_size(&mut self, n: usize) -> &mut Self {
+        self
+    }
+    pub fn warm_up_time(&mut self, dur: Duration) -> &mut Self {
+        self
+    }
+    pub fn measurement_time(&mut self, dur: Duration) -> &mut Self {
+        self
+    }
+    pub fn nresamples(&mut self, n: usize) -> &mut Self {
+        self
+    }
+    pub fn noise_threshold(&mut self, threshold: f64) -> &mut Self {
+        self
+    }
+    pub fn confidence_level(&mut self, cl: f64) -> &mut Self {
+        self
+    }
+    pub fn significance_level(&mut self, sl: f64) -> &mut Self {
+        self
+    }
+    pub fn throughput(&mut self, throughput: Throughput) -> &mut Self {
+        self
+    }
+    pub fn sampling_mode(&mut self, new_mode: SamplingMode) -> &mut Self {
+        self
+    }
+    pub fn plot_config(&mut self, new_config: PlotConfiguration) -> &mut Self {
+        self
+    }
+    pub fn finish(self) {}
+}
+
+// BenchmarkId is a copy of the BenchmarkId struct from criterion.rs allowing private fields to
+// be used in this crate.
+#[derive(Clone, Eq, PartialEq, Hash)]
+pub struct BenchmarkId {
+    pub(crate) function_name: Option<String>,
+    pub(crate) parameter: Option<String>,
+}
+
+impl BenchmarkId {
+    pub fn new<S: Into<String>, P: ::std::fmt::Display>(
+        function_name: S,
+        parameter: P,
+    ) -> BenchmarkId {
+        BenchmarkId {
+            function_name: Some(function_name.into()),
+            parameter: Some(format!("{}", parameter)),
+        }
+    }
+
+    /// Construct a new benchmark ID from just a parameter value. Use this when benchmarking a
+    /// single function with a variety of different inputs.
+    pub fn from_parameter<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
+        BenchmarkId { function_name: None, parameter: Some(format!("{}", parameter)) }
+    }
+
+    pub(crate) fn no_function() -> BenchmarkId {
+        BenchmarkId { function_name: None, parameter: None }
+    }
+
+    pub(crate) fn no_function_with_input<P: ::std::fmt::Display>(parameter: P) -> BenchmarkId {
+        BenchmarkId { function_name: None, parameter: Some(format!("{}", parameter)) }
+    }
+}
+
+mod private {
+    pub trait Sealed {}
+    impl Sealed for super::BenchmarkId {}
+    impl<S: Into<String>> Sealed for S {}
+}
+
+/// Sealed trait which allows users to automatically convert strings to benchmark IDs.
+pub trait IntoBenchmarkId: private::Sealed {
+    fn into_benchmark_id(self) -> BenchmarkId;
+}
+impl IntoBenchmarkId for BenchmarkId {
+    fn into_benchmark_id(self) -> BenchmarkId {
+        self
+    }
+}
+impl<S: Into<String>> IntoBenchmarkId for S {
+    fn into_benchmark_id(self) -> BenchmarkId {
+        let function_name = self.into();
+        assert!(!function_name.is_empty(), "Function name must not be empty.");
+
+        BenchmarkId { function_name: Some(function_name), parameter: None }
+    }
+}
diff --git a/src/codspeed/criterion.rs b/src/codspeed/criterion.rs
new file mode 100644
index 0000000..9bfc125
--- /dev/null
+++ b/src/codspeed/criterion.rs
@@ -0,0 +1,158 @@
+use std::{cell::RefCell, marker::PhantomData, rc::Rc, time::Duration};
+
+use crate::{
+    measurement::{Measurement, WallTime},
+    profiler::Profiler,
+    PlottingBackend,
+};
+use codspeed::codspeed::CodSpeed;
+
+use crate::{Bencher, BenchmarkGroup, BenchmarkId};
+
+pub struct Criterion<M: Measurement = WallTime> {
+    pub codspeed: Option<Rc<RefCell<CodSpeed>>>,
+    pub current_file: String,
+    pub macro_group: String,
+    phantom: PhantomData<*const M>,
+}
+
+#[doc(hidden)]
+impl Criterion {
+    pub fn new_instrumented() -> Self {
+        println!("Harness: codspeed-criterion-compat v{}", env!("CARGO_PKG_VERSION"),);
+        Criterion {
+            codspeed: Some(Rc::new(RefCell::new(CodSpeed::new()))),
+            current_file: String::new(),
+            macro_group: String::new(),
+            phantom: PhantomData,
+        }
+    }
+
+    pub fn with_patched_measurement<M: Measurement>(&mut self, _: Criterion<M>) -> Criterion<M> {
+        Criterion {
+            codspeed: self.codspeed.clone(),
+            current_file: self.current_file.clone(),
+            macro_group: self.macro_group.clone(),
+            phantom: PhantomData,
+        }
+    }
+}
+
+impl<M: Measurement> Criterion<M> {
+    #[doc(hidden)]
+    pub fn set_current_file(&mut self, file: impl Into<String>) {
+        self.current_file = file.into();
+    }
+
+    #[doc(hidden)]
+    pub fn set_macro_group(&mut self, macro_group: impl Into<String>) {
+        self.macro_group = macro_group.into();
+    }
+
+    pub fn bench_function<F>(&mut self, id: &str, f: F) -> &mut Criterion<M>
+    where
+        F: FnMut(&mut Bencher),
+    {
+        self.benchmark_group(id).bench_function(BenchmarkId::no_function(), f);
+        self
+    }
+
+    pub fn bench_with_input<F, I>(&mut self, id: BenchmarkId, input: &I, f: F) -> &mut Criterion<M>
+    where
+        F: FnMut(&mut Bencher, &I),
+    {
+        let group_name = id.function_name.expect(
+            "Cannot use BenchmarkId::from_parameter with Criterion::bench_with_input. \
+                 Consider using a BenchmarkGroup or BenchmarkId::new instead.",
+        );
+        let parameter = id.parameter.unwrap();
+        self.benchmark_group(group_name).bench_with_input(
+            BenchmarkId::no_function_with_input(parameter),
+            input,
+            f,
+        );
+        self
+    }
+
+    pub fn benchmark_group<S: Into<String>>(&mut self, group_name: S) -> BenchmarkGroup<M> {
+        BenchmarkGroup::<M>::new(self, group_name.into())
+    }
+}
+
+// Dummy methods
+#[allow(clippy::derivable_impls)]
+impl Default for Criterion {
+    // Dummy method creating an empty Criterion helper useful to mock the configuration
+    fn default() -> Self {
+        Criterion {
+            codspeed: None,
+            current_file: String::new(),
+            macro_group: String::new(),
+            phantom: PhantomData,
+        }
+    }
+}
+
+#[allow(dead_code, unused_variables, unused_mut)]
+impl<M: Measurement> Criterion<M> {
+    pub fn with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2> {
+        Criterion {
+            codspeed: self.codspeed,
+            current_file: self.current_file,
+            macro_group: self.macro_group,
+            phantom: PhantomData::<*const M2>,
+        }
+    }
+    pub fn with_profiler<P: Profiler + 'static>(self, p: P) -> Criterion<M> {
+        self
+    }
+    pub fn plotting_backend(mut self, backend: PlottingBackend) -> Criterion<M> {
+        self
+    }
+    pub fn sample_size(mut self, n: usize) -> Criterion<M> {
+        self
+    }
+    pub fn warm_up_time(mut self, dur: Duration) -> Criterion<M> {
+        self
+    }
+    pub fn measurement_time(mut self, dur: Duration) -> Criterion<M> {
+        self
+    }
+    pub fn nresamples(mut self, n: usize) -> Criterion<M> {
+        self
+    }
+    pub fn noise_threshold(mut self, threshold: f64) -> Criterion<M> {
+        self
+    }
+    pub fn confidence_level(mut self, cl: f64) -> Criterion<M> {
+        self
+    }
+    pub fn significance_level(mut self, sl: f64) -> Criterion<M> {
+        self
+    }
+    pub fn with_plots(mut self) -> Criterion<M> {
+        self
+    }
+    pub fn without_plots(mut self) -> Criterion<M> {
+        self
+    }
+    pub fn can_plot(&self) -> bool {
+        true
+    }
+    pub fn save_baseline(mut self, baseline: String) -> Criterion<M> {
+        self
+    }
+    pub fn retain_baseline(mut self, baseline: String) -> Criterion<M> {
+        self
+    }
+    pub fn with_filter<S: Into<String>>(mut self, filter: S) -> Criterion<M> {
+        //FIXME: Implement
+        self
+    }
+    pub fn with_output_color(mut self, enabled: bool) -> Criterion<M> {
+        self
+    }
+    pub fn configure_from_args(mut self) -> Criterion<M> {
+        self
+    }
+}
diff --git a/src/codspeed/mod.rs b/src/codspeed/mod.rs
new file mode 100644
index 0000000..bfa92ca
--- /dev/null
+++ b/src/codspeed/mod.rs
@@ -0,0 +1,3 @@
+pub mod bencher;
+pub mod benchmark_group;
+pub mod criterion;
diff --git a/src/criterion.rs b/src/criterion.rs
new file mode 100644
index 0000000..d243f49
--- /dev/null
+++ b/src/criterion.rs
@@ -0,0 +1,898 @@
+use std::cell::RefCell;
+use std::collections::HashSet;
+use std::io::{stdout, IsTerminal};
+use std::path::{Path, PathBuf};
+use std::sync::MutexGuard;
+use std::time::Duration;
+
+use regex::Regex;
+
+use crate::bencher::Bencher;
+use crate::benchmark_group::{BenchmarkGroup, BenchmarkId};
+use crate::{
+    debug_enabled, Baseline, BencherReport, BenchmarkConfig, BenchmarkFilter, CliReport,
+    CliVerbosity, Connection, ExternalProfiler, Html, ListFormat, Measurement, Mode,
+    OutgoingMessage, PlotConfiguration, PlottingBackend, Profiler, Report, ReportContext, Reports,
+    SamplingMode, WallTime, CARGO_CRITERION_CONNECTION, DEFAULT_OUTPUT_DIRECTORY,
+    DEFAULT_PLOTTING_BACKEND, GNUPLOT_VERSION,
+};
+
+/// The benchmark manager
+///
+/// `Criterion` lets you configure and execute benchmarks
+///
+/// Each benchmark consists of four phases:
+///
+/// - **Warm-up**: The routine is repeatedly executed, to let the CPU/OS/JIT/interpreter adapt to
+/// the new load
+/// - **Measurement**: The routine is repeatedly executed, and timing information is collected into
+/// a sample
+/// - **Analysis**: The sample is analyzed and distilled into meaningful statistics that get
+/// reported to stdout, stored in files, and plotted
+/// - **Comparison**: The current sample is compared with the sample obtained in the previous
+/// benchmark.
+pub struct Criterion<M: Measurement = WallTime> {
+    pub(crate) config: BenchmarkConfig,
+    pub(crate) filter: BenchmarkFilter,
+    pub(crate) report: Reports,
+    pub(crate) output_directory: PathBuf,
+    pub(crate) baseline_directory: String,
+    pub(crate) baseline: Baseline,
+    pub(crate) load_baseline: Option<String>,
+    pub(crate) all_directories: HashSet<String>,
+    pub(crate) all_titles: HashSet<String>,
+    pub(crate) measurement: M,
+    pub(crate) profiler: Box<RefCell<dyn Profiler>>,
+    pub(crate) connection: Option<MutexGuard<'static, Connection>>,
+    pub(crate) mode: Mode,
+}
+
+impl Default for Criterion {
+    /// Creates a benchmark manager with the following default settings:
+    ///
+    /// - Sample size: 100 measurements
+    /// - Warm-up time: 3 s
+    /// - Measurement time: 5 s
+    /// - Bootstrap size: 100 000 resamples
+    /// - Noise threshold: 0.01 (1%)
+    /// - Confidence level: 0.95
+    /// - Significance level: 0.05
+    /// - Plotting: enabled, using gnuplot if available or plotters if gnuplot is not available
+    /// - No filter
+    fn default() -> Criterion {
+        let reports = Reports {
+            cli_enabled: true,
+            cli: CliReport::new(false, false, CliVerbosity::Normal),
+            bencher_enabled: false,
+            bencher: BencherReport,
+            html: DEFAULT_PLOTTING_BACKEND.create_plotter().map(Html::new),
+            csv_enabled: cfg!(feature = "csv_output"),
+        };
+
+        let mut criterion = Criterion {
+            config: BenchmarkConfig {
+                confidence_level: 0.95,
+                measurement_time: Duration::from_secs(5),
+                noise_threshold: 0.01,
+                nresamples: 100_000,
+                sample_size: 100,
+                significance_level: 0.05,
+                warm_up_time: Duration::from_secs(3),
+                sampling_mode: SamplingMode::Auto,
+                quick_mode: false,
+            },
+            filter: BenchmarkFilter::AcceptAll,
+            report: reports,
+            baseline_directory: "base".to_owned(),
+            baseline: Baseline::Save,
+            load_baseline: None,
+            output_directory: DEFAULT_OUTPUT_DIRECTORY.clone(),
+            all_directories: HashSet::new(),
+            all_titles: HashSet::new(),
+            measurement: WallTime,
+            profiler: Box::new(RefCell::new(ExternalProfiler)),
+            connection: CARGO_CRITERION_CONNECTION.as_ref().map(|mtx| mtx.lock().unwrap()),
+            mode: Mode::Benchmark,
+        };
+
+        if criterion.connection.is_some() {
+            // disable all reports when connected to cargo-criterion; it will do the reporting.
+            criterion.report.cli_enabled = false;
+            criterion.report.bencher_enabled = false;
+            criterion.report.csv_enabled = false;
+            criterion.report.html = None;
+        }
+        criterion
+    }
+}
+
+impl<M: Measurement> Criterion<M> {
+    /// Changes the measurement for the benchmarks run with this runner. See the
+    /// Measurement trait for more details
+    pub fn with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2> {
+        // Can't use struct update syntax here because they're technically different types.
+        Criterion {
+            config: self.config,
+            filter: self.filter,
+            report: self.report,
+            baseline_directory: self.baseline_directory,
+            baseline: self.baseline,
+            load_baseline: self.load_baseline,
+            output_directory: self.output_directory,
+            all_directories: self.all_directories,
+            all_titles: self.all_titles,
+            measurement: m,
+            profiler: self.profiler,
+            connection: self.connection,
+            mode: self.mode,
+        }
+    }
+
+    #[must_use]
+    /// Changes the internal profiler for benchmarks run with this runner. See
+    /// the Profiler trait for more details.
+    pub fn with_profiler<P: Profiler + 'static>(self, p: P) -> Criterion<M> {
+        Criterion { profiler: Box::new(RefCell::new(p)), ..self }
+    }
+
+    #[must_use]
+    /// Set the plotting backend. By default, Criterion will use gnuplot if available, or plotters
+    /// if not.
+    ///
+    /// Panics if `backend` is `PlottingBackend::Gnuplot` and gnuplot is not available.
+    pub fn plotting_backend(mut self, backend: PlottingBackend) -> Criterion<M> {
+        if let PlottingBackend::Gnuplot = backend {
+            assert!(
+                !GNUPLOT_VERSION.is_err(),
+                "Gnuplot plotting backend was requested, but gnuplot is not available. \
+                To continue, either install Gnuplot or allow Criterion.rs to fall back \
+                to using plotters."
+            );
+        }
+
+        self.report.html = backend.create_plotter().map(Html::new);
+        self
+    }
+
+    #[must_use]
+    /// Changes the default size of the sample for benchmarks run with this runner.
+    ///
+    /// A bigger sample should yield more accurate results if paired with a sufficiently large
+    /// measurement time.
+    ///
+    /// Sample size must be at least 10.
+    ///
+    /// # Panics
+    ///
+    /// Panics if n < 10
+    pub fn sample_size(mut self, n: usize) -> Criterion<M> {
+        assert!(n >= 10);
+
+        self.config.sample_size = n;
+        self
+    }
+
+    #[must_use]
+    /// Changes the default warm up time for benchmarks run with this runner.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the input duration is zero
+    pub fn warm_up_time(mut self, dur: Duration) -> Criterion<M> {
+        assert!(dur.as_nanos() > 0);
+
+        self.config.warm_up_time = dur;
+        self
+    }
+
+    #[must_use]
+    /// Changes the default measurement time for benchmarks run with this runner.
+    ///
+    /// With a longer time, the measurement will become more resilient to transitory peak loads
+    /// caused by external programs
+    ///
+    /// **Note**: If the measurement time is too "low", Criterion will automatically increase it
+    ///
+    /// # Panics
+    ///
+    /// Panics if the input duration in zero
+    pub fn measurement_time(mut self, dur: Duration) -> Criterion<M> {
+        assert!(dur.as_nanos() > 0);
+
+        self.config.measurement_time = dur;
+        self
+    }
+
+    #[must_use]
+    /// Changes the default number of resamples for benchmarks run with this runner.
+    ///
+    /// Number of resamples to use for the
+    /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
+    ///
+    /// A larger number of resamples reduces the random sampling errors, which are inherent to the
+    /// bootstrap method, but also increases the analysis time
+    ///
+    /// # Panics
+    ///
+    /// Panics if the number of resamples is set to zero
+    pub fn nresamples(mut self, n: usize) -> Criterion<M> {
+        assert!(n > 0);
+        if n <= 1000 {
+            eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
+        }
+
+        self.config.nresamples = n;
+        self
+    }
+
+    #[must_use]
+    /// Changes the default noise threshold for benchmarks run with this runner. The noise threshold
+    /// is used to filter out small changes in performance, even if they are statistically
+    /// significant. Sometimes benchmarking the same code twice will result in small but
+    /// statistically significant differences solely because of noise. This provides a way to filter
+    /// out some of these false positives at the cost of making it harder to detect small changes
+    /// to the true performance of the benchmark.
+    ///
+    /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the threshold is set to a negative value
+    pub fn noise_threshold(mut self, threshold: f64) -> Criterion<M> {
+        assert!(threshold >= 0.0);
+
+        self.config.noise_threshold = threshold;
+        self
+    }
+
+    #[must_use]
+    /// Changes the default confidence level for benchmarks run with this runner. The confidence
+    /// level is the desired probability that the true runtime lies within the estimated
+    /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
+    /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the confidence level is set to a value outside the `(0, 1)` range
+    pub fn confidence_level(mut self, cl: f64) -> Criterion<M> {
+        assert!(cl > 0.0 && cl < 1.0);
+        if cl < 0.5 {
+            eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
+        }
+
+        self.config.confidence_level = cl;
+        self
+    }
+
+    #[must_use]
+    /// Changes the default [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
+    /// for benchmarks run with this runner. This is used to perform a
+    /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
+    /// the measurements from this run are different from the measured performance of the last run.
+    /// The significance level is the desired probability that two measurements of identical code
+    /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
+    /// meaning that approximately 5% of identical benchmarks will register as different due to
+    /// noise.
+    ///
+    /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
+    /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
+    /// detect small but real changes in the performance. By setting the significance level
+    /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
+    /// report more spurious differences.
+    ///
+    /// See also the noise threshold setting.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the significance level is set to a value outside the `(0, 1)` range
+    pub fn significance_level(mut self, sl: f64) -> Criterion<M> {
+        assert!(sl > 0.0 && sl < 1.0);
+
+        self.config.significance_level = sl;
+        self
+    }
+
+    #[must_use]
+    /// Enables plotting
+    pub fn with_plots(mut self) -> Criterion<M> {
+        // If running under cargo-criterion then don't re-enable the reports; let it do the reporting.
+        if self.connection.is_none() && self.report.html.is_none() {
+            let default_backend = DEFAULT_PLOTTING_BACKEND.create_plotter();
+            if let Some(backend) = default_backend {
+                self.report.html = Some(Html::new(backend));
+            } else {
+                panic!("Cannot find a default plotting backend!");
+            }
+        }
+        self
+    }
+
+    #[must_use]
+    /// Disables plotting
+    pub fn without_plots(mut self) -> Criterion<M> {
+        self.report.html = None;
+        self
+    }
+
+    #[must_use]
+    /// Names an explicit baseline and enables overwriting the previous results.
+    pub fn save_baseline(mut self, baseline: String) -> Criterion<M> {
+        self.baseline_directory = baseline;
+        self.baseline = Baseline::Save;
+        self
+    }
+
+    #[must_use]
+    /// Names an explicit baseline and disables overwriting the previous results.
+    pub fn retain_baseline(mut self, baseline: String, strict: bool) -> Criterion<M> {
+        self.baseline_directory = baseline;
+        self.baseline = if strict { Baseline::CompareStrict } else { Baseline::CompareLenient };
+        self
+    }
+
+    #[must_use]
+    /// Filters the benchmarks. Only benchmarks with names that contain the
+    /// given string will be executed.
+    ///
+    /// This overwrites [`Self::with_benchmark_filter`].
+    pub fn with_filter<S: Into<String>>(mut self, filter: S) -> Criterion<M> {
+        let filter_text = filter.into();
+        let filter = Regex::new(&filter_text).unwrap_or_else(|err| {
+            panic!("Unable to parse '{}' as a regular expression: {}", filter_text, err)
+        });
+        self.filter = BenchmarkFilter::Regex(filter);
+
+        self
+    }
+
+    /// Only run benchmarks specified by the given filter.
+    ///
+    /// This overwrites [`Self::with_filter`].
+    pub fn with_benchmark_filter(mut self, filter: BenchmarkFilter) -> Criterion<M> {
+        self.filter = filter;
+
+        self
+    }
+
+    #[must_use]
+    /// Override whether the CLI output will be colored or not. Usually you would use the `--color`
+    /// CLI argument, but this is available for programmmatic use as well.
+    pub fn with_output_color(mut self, enabled: bool) -> Criterion<M> {
+        self.report.cli.enable_text_coloring = enabled;
+        self
+    }
+
+    /// Set the output directory (currently for testing only)
+    #[must_use]
+    #[doc(hidden)]
+    pub fn output_directory(mut self, path: &Path) -> Criterion<M> {
+        self.output_directory = path.to_owned();
+
+        self
+    }
+
+    /// Set the profile time (currently for testing only)
+    #[must_use]
+    #[doc(hidden)]
+    pub fn profile_time(mut self, profile_time: Option<Duration>) -> Criterion<M> {
+        match profile_time {
+            Some(time) => self.mode = Mode::Profile(time),
+            None => self.mode = Mode::Benchmark,
+        }
+
+        self
+    }
+
+    /// Generate the final summary at the end of a run.
+    #[doc(hidden)]
+    pub fn final_summary(&self) {
+        if !self.mode.is_benchmark() {
+            return;
+        }
+
+        let report_context = ReportContext {
+            output_directory: self.output_directory.clone(),
+            plot_config: PlotConfiguration::default(),
+        };
+
+        self.report.final_summary(&report_context);
+    }
+
+    /// Configure this criterion struct based on the command-line arguments to
+    /// this process.
+    #[must_use]
+    #[cfg_attr(feature = "cargo-clippy", allow(clippy::cognitive_complexity))]
+    pub fn configure_from_args(mut self) -> Criterion<M> {
+        use clap::{value_parser, Arg, Command};
+        let matches = Command::new("Criterion Benchmark")
+            .arg(Arg::new("FILTER")
+                .help("Skip benchmarks whose names do not contain FILTER.")
+                .index(1))
+            .arg(Arg::new("color")
+                .short('c')
+                .long("color")
+                .alias("colour")
+                .value_parser(["auto", "always", "never"])
+                .default_value("auto")
+                .help("Configure coloring of output. always = always colorize output, never = never colorize output, auto = colorize output if output is a tty and compiled for unix."))
+            .arg(Arg::new("verbose")
+                .short('v')
+                .long("verbose")
+                .num_args(0)
+                .help("Print additional statistical information."))
+            .arg(Arg::new("quiet")
+                .long("quiet")
+                .num_args(0)
+                .conflicts_with("verbose")
+                .help("Print only the benchmark results."))
+            .arg(Arg::new("noplot")
+                .short('n')
+                .long("noplot")
+                .num_args(0)
+                .help("Disable plot and HTML generation."))
+            .arg(Arg::new("save-baseline")
+                .short('s')
+                .long("save-baseline")
+                .default_value("base")
+                .help("Save results under a named baseline."))
+            .arg(Arg::new("discard-baseline")
+                .long("discard-baseline")
+                .num_args(0)
+                .conflicts_with_all(["save-baseline", "baseline", "baseline-lenient"])
+                .help("Discard benchmark results."))
+            .arg(Arg::new("baseline")
+                .short('b')
+                .long("baseline")
+                .conflicts_with_all(["save-baseline", "baseline-lenient"])
+                .help("Compare to a named baseline. If any benchmarks do not have the specified baseline this command fails."))
+            .arg(Arg::new("baseline-lenient")
+                .long("baseline-lenient")
+                .conflicts_with_all(["save-baseline", "baseline"])
+                .help("Compare to a named baseline. If any benchmarks do not have the specified baseline then just those benchmarks are not compared against the baseline while every other benchmark is compared against the baseline."))
+            .arg(Arg::new("list")
+                .long("list")
+                .num_args(0)
+                .help("List all benchmarks")
+                .conflicts_with_all(["test", "profile-time"]))
+            .arg(Arg::new("format")
+                .long("format")
+                .value_parser(["pretty", "terse"])
+                .default_value("pretty")
+                // Note that libtest's --format also works during test execution, but criterion
+                // doesn't support that at the moment.
+                .help("Output formatting"))
+            .arg(Arg::new("ignored")
+                .long("ignored")
+                .num_args(0)
+                .help("List or run ignored benchmarks (currently means skip all benchmarks)"))
+            .arg(Arg::new("exact")
+                .long("exact")
+                .num_args(0)
+                .help("Run benchmarks that exactly match the provided filter"))
+            .arg(Arg::new("profile-time")
+                .long("profile-time")
+                .value_parser(value_parser!(f64))
+                .help("Iterate each benchmark for approximately the given number of seconds, doing no analysis and without storing the results. Useful for running the benchmarks in a profiler.")
+                .conflicts_with_all(["test", "list"]))
+            .arg(Arg::new("load-baseline")
+                 .long("load-baseline")
+                 .conflicts_with("profile-time")
+                 .requires("baseline")
+                 .help("Load a previous baseline instead of sampling new data."))
+            .arg(Arg::new("sample-size")
+                .long("sample-size")
+                .value_parser(value_parser!(usize))
+                .help(format!("Changes the default size of the sample for this run. [default: {}]", self.config.sample_size)))
+            .arg(Arg::new("warm-up-time")
+                .long("warm-up-time")
+                .value_parser(value_parser!(f64))
+                .help(format!("Changes the default warm up time for this run. [default: {}]", self.config.warm_up_time.as_secs())))
+            .arg(Arg::new("measurement-time")
+                .long("measurement-time")
+                .value_parser(value_parser!(f64))
+                .help(format!("Changes the default measurement time for this run. [default: {}]", self.config.measurement_time.as_secs())))
+            .arg(Arg::new("nresamples")
+                .long("nresamples")
+                .value_parser(value_parser!(usize))
+                .help(format!("Changes the default number of resamples for this run. [default: {}]", self.config.nresamples)))
+            .arg(Arg::new("noise-threshold")
+                .long("noise-threshold")
+                .value_parser(value_parser!(f64))
+                .help(format!("Changes the default noise threshold for this run. [default: {}]", self.config.noise_threshold)))
+            .arg(Arg::new("confidence-level")
+                .long("confidence-level")
+                .value_parser(value_parser!(f64))
+                .help(format!("Changes the default confidence level for this run. [default: {}]", self.config.confidence_level)))
+            .arg(Arg::new("significance-level")
+                .long("significance-level")
+                .value_parser(value_parser!(f64))
+                .help(format!("Changes the default significance level for this run. [default: {}]", self.config.significance_level)))
+            .arg(Arg::new("quick")
+                .long("quick")
+                .num_args(0)
+                .conflicts_with("sample-size")
+                .help(format!("Benchmark only until the significance level has been reached [default: {}]", self.config.quick_mode)))
+            .arg(Arg::new("test")
+                .hide(true)
+                .long("test")
+                .num_args(0)
+                .help("Run the benchmarks once, to verify that they execute successfully, but do not measure or report the results.")
+                .conflicts_with_all(["list", "profile-time"]))
+            .arg(Arg::new("bench")
+                .hide(true)
+                .long("bench")
+                .num_args(0))
+            .arg(Arg::new("plotting-backend")
+                 .long("plotting-backend")
+                 .value_parser(["gnuplot", "plotters"])
+                 .help("Set the plotting backend. By default, Criterion.rs will use the gnuplot backend if gnuplot is available, or the plotters backend if it isn't."))
+            .arg(Arg::new("output-format")
+                .long("output-format")
+                .value_parser(["criterion", "bencher"])
+                .default_value("criterion")
+                .help("Change the CLI output format. By default, Criterion.rs will use its own format. If output format is set to 'bencher', Criterion.rs will print output in a format that resembles the 'bencher' crate."))
+            .arg(Arg::new("nocapture")
+                .long("nocapture")
+                .num_args(0)
+                .hide(true)
+                .help("Ignored, but added for compatibility with libtest."))
+            .arg(Arg::new("show-output")
+                .long("show-output")
+                .num_args(0)
+                .hide(true)
+                .help("Ignored, but added for compatibility with libtest."))
+            .arg(Arg::new("include-ignored")
+                .long("include-ignored")
+                .num_args(0)
+                .hide(true)
+                .help("Ignored, but added for compatibility with libtest."))
+            .arg(Arg::new("version")
+                .hide(true)
+                .short('V')
+                .long("version")
+                .num_args(0))
+            .after_help("
+This executable is a Criterion.rs benchmark.
+See https://github.com/bheisler/criterion.rs for more details.
+
+To enable debug output, define the environment variable CRITERION_DEBUG.
+Criterion.rs will output more debug information and will save the gnuplot
+scripts alongside the generated plots.
+
+To test that the benchmarks work, run `cargo test --benches`
+
+NOTE: If you see an 'unrecognized option' error using any of the options above, see:
+https://bheisler.github.io/criterion.rs/book/faq.html
+")
+            .get_matches();
+
+        if self.connection.is_some() {
+            if let Some(color) = matches.get_one::<String>("color") {
+                if color != "auto" {
+                    eprintln!("Warning: --color will be ignored when running with cargo-criterion. Use `cargo criterion --color {} -- <args>` instead.", color);
+                }
+            }
+            if matches.get_flag("verbose") {
+                eprintln!("Warning: --verbose will be ignored when running with cargo-criterion. Use `cargo criterion --output-format verbose -- <args>` instead.");
+            }
+            if matches.get_flag("noplot") {
+                eprintln!("Warning: --noplot will be ignored when running with cargo-criterion. Use `cargo criterion --plotting-backend disabled -- <args>` instead.");
+            }
+            if let Some(backend) = matches.get_one::<String>("plotting-backend") {
+                eprintln!("Warning: --plotting-backend will be ignored when running with cargo-criterion. Use `cargo criterion --plotting-backend {} -- <args>` instead.", backend);
+            }
+            if let Some(format) = matches.get_one::<String>("output-format") {
+                if format != "criterion" {
+                    eprintln!("Warning: --output-format will be ignored when running with cargo-criterion. Use `cargo criterion --output-format {} -- <args>` instead.", format);
+                }
+            }
+
+            if matches.contains_id("baseline")
+                || matches.get_one::<String>("save-baseline").map_or(false, |base| base != "base")
+                || matches.contains_id("load-baseline")
+            {
+                eprintln!("Error: baselines are not supported when running with cargo-criterion.");
+                std::process::exit(1);
+            }
+        }
+
+        let bench = matches.get_flag("bench");
+        let test = matches.get_flag("test");
+        let test_mode = match (bench, test) {
+            (true, true) => true,   // cargo bench -- --test should run tests
+            (true, false) => false, // cargo bench should run benchmarks
+            (false, _) => true,     // cargo test --benches should run tests
+        };
+
+        self.mode = if matches.get_flag("list") {
+            let list_format = match matches
+                .get_one::<String>("format")
+                .expect("a default value was provided for this")
+                .as_str()
+            {
+                "pretty" => ListFormat::Pretty,
+                "terse" => ListFormat::Terse,
+                other => unreachable!(
+                    "unrecognized value for --format that isn't part of possible-values: {}",
+                    other
+                ),
+            };
+            Mode::List(list_format)
+        } else if test_mode {
+            Mode::Test
+        } else if let Some(&num_seconds) = matches.get_one("profile-time") {
+            if num_seconds < 1.0 {
+                eprintln!("Profile time must be at least one second.");
+                std::process::exit(1);
+            }
+
+            Mode::Profile(Duration::from_secs_f64(num_seconds))
+        } else {
+            Mode::Benchmark
+        };
+
+        // This is kind of a hack, but disable the connection to the runner if we're not benchmarking.
+        if !self.mode.is_benchmark() {
+            self.connection = None;
+        }
+
+        let filter = if matches.get_flag("ignored") {
+            // --ignored overwrites any name-based filters passed in.
+            BenchmarkFilter::RejectAll
+        } else if let Some(filter) = matches.get_one::<String>("FILTER") {
+            if matches.get_flag("exact") {
+                BenchmarkFilter::Exact(filter.to_owned())
+            } else {
+                let regex = Regex::new(filter).unwrap_or_else(|err| {
+                    panic!("Unable to parse '{}' as a regular expression: {}", filter, err)
+                });
+                BenchmarkFilter::Regex(regex)
+            }
+        } else {
+            BenchmarkFilter::AcceptAll
+        };
+        self = self.with_benchmark_filter(filter);
+
+        match matches.get_one("plotting-backend").map(String::as_str) {
+            // Use plotting_backend() here to re-use the panic behavior if Gnuplot is not available.
+            Some("gnuplot") => self = self.plotting_backend(PlottingBackend::Gnuplot),
+            Some("plotters") => self = self.plotting_backend(PlottingBackend::Plotters),
+            Some(val) => panic!("Unexpected plotting backend '{}'", val),
+            None => {}
+        }
+
+        if matches.get_flag("noplot") {
+            self = self.without_plots();
+        }
+
+        if let Some(dir) = matches.get_one::<String>("save-baseline") {
+            self.baseline = Baseline::Save;
+            self.baseline_directory = dir.to_owned()
+        }
+        if matches.get_flag("discard-baseline") {
+            self.baseline = Baseline::Discard;
+        }
+        if let Some(dir) = matches.get_one::<String>("baseline") {
+            self.baseline = Baseline::CompareStrict;
+            self.baseline_directory = dir.to_owned();
+        }
+        if let Some(dir) = matches.get_one::<String>("baseline-lenient") {
+            self.baseline = Baseline::CompareLenient;
+            self.baseline_directory = dir.to_owned();
+        }
+
+        if self.connection.is_some() {
+            // disable all reports when connected to cargo-criterion; it will do the reporting.
+            self.report.cli_enabled = false;
+            self.report.bencher_enabled = false;
+            self.report.csv_enabled = false;
+            self.report.html = None;
+        } else {
+            match matches.get_one("output-format").map(String::as_str) {
+                Some("bencher") => {
+                    self.report.bencher_enabled = true;
+                    self.report.cli_enabled = false;
+                }
+                _ => {
+                    let verbose = matches.get_flag("verbose");
+                    let verbosity = if verbose {
+                        CliVerbosity::Verbose
+                    } else if matches.get_flag("quiet") {
+                        CliVerbosity::Quiet
+                    } else {
+                        CliVerbosity::Normal
+                    };
+                    let stdout_isatty = stdout().is_terminal();
+                    let mut enable_text_overwrite = stdout_isatty && !verbose && !debug_enabled();
+                    let enable_text_coloring;
+                    match matches.get_one("color").map(String::as_str) {
+                        Some("always") => {
+                            enable_text_coloring = true;
+                        }
+                        Some("never") => {
+                            enable_text_coloring = false;
+                            enable_text_overwrite = false;
+                        }
+                        _ => enable_text_coloring = stdout_isatty,
+                    };
+                    self.report.bencher_enabled = false;
+                    self.report.cli_enabled = true;
+                    self.report.cli =
+                        CliReport::new(enable_text_overwrite, enable_text_coloring, verbosity);
+                }
+            };
+        }
+
+        if let Some(dir) = matches.get_one::<String>("load-baseline") {
+            self.load_baseline = Some(dir.to_owned());
+        }
+
+        if let Some(&num_size) = matches.get_one("sample-size") {
+            assert!(num_size >= 10);
+            self.config.sample_size = num_size;
+        }
+        if let Some(&num_seconds) = matches.get_one("warm-up-time") {
+            let dur = std::time::Duration::from_secs_f64(num_seconds);
+            assert!(dur.as_nanos() > 0);
+
+            self.config.warm_up_time = dur;
+        }
+        if let Some(&num_seconds) = matches.get_one("measurement-time") {
+            let dur = std::time::Duration::from_secs_f64(num_seconds);
+            assert!(dur.as_nanos() > 0);
+
+            self.config.measurement_time = dur;
+        }
+        if let Some(&num_resamples) = matches.get_one("nresamples") {
+            assert!(num_resamples > 0);
+
+            self.config.nresamples = num_resamples;
+        }
+        if let Some(&num_noise_threshold) = matches.get_one("noise-threshold") {
+            assert!(num_noise_threshold > 0.0);
+
+            self.config.noise_threshold = num_noise_threshold;
+        }
+        if let Some(&num_confidence_level) = matches.get_one("confidence-level") {
+            assert!(num_confidence_level > 0.0 && num_confidence_level < 1.0);
+
+            self.config.confidence_level = num_confidence_level;
+        }
+        if let Some(&num_significance_level) = matches.get_one("significance-level") {
+            assert!(num_significance_level > 0.0 && num_significance_level < 1.0);
+
+            self.config.significance_level = num_significance_level;
+        }
+
+        if matches.get_flag("quick") {
+            self.config.quick_mode = true;
+        }
+
+        self
+    }
+
+    pub(crate) fn filter_matches(&self, id: &str) -> bool {
+        match &self.filter {
+            BenchmarkFilter::AcceptAll => true,
+            BenchmarkFilter::Regex(regex) => regex.is_match(id),
+            BenchmarkFilter::Exact(exact) => id == exact,
+            BenchmarkFilter::RejectAll => false,
+        }
+    }
+
+    /// Returns true iff we should save the benchmark results in
+    /// json files on the local disk.
+    pub(crate) fn should_save_baseline(&self) -> bool {
+        self.connection.is_none()
+            && self.load_baseline.is_none()
+            && !matches!(self.baseline, Baseline::Discard)
+    }
+
+    /// Return a benchmark group. All benchmarks performed using a benchmark group will be
+    /// grouped together in the final report.
+    ///
+    /// # Examples:
+    ///
+    /// ```rust
+    /// use self::criterion2::*;
+    ///
+    /// fn bench_simple(c: &mut Criterion) {
+    ///     let mut group = c.benchmark_group("My Group");
+    ///
+    ///     // Now we can perform benchmarks with this group
+    ///     group.bench_function("Bench 1", |b| b.iter(|| 1 ));
+    ///     group.bench_function("Bench 2", |b| b.iter(|| 2 ));
+    ///
+    ///     group.finish();
+    /// }
+    /// criterion_group!(benches, bench_simple);
+    /// criterion_main!(benches);
+    /// ```
+    /// # Panics:
+    /// Panics if the group name is empty
+    pub fn benchmark_group<S: Into<String>>(&mut self, group_name: S) -> BenchmarkGroup<'_, M> {
+        let group_name = group_name.into();
+        assert!(!group_name.is_empty(), "Group name must not be empty.");
+
+        if let Some(conn) = &self.connection {
+            conn.send(&OutgoingMessage::BeginningBenchmarkGroup { group: &group_name }).unwrap();
+        }
+
+        BenchmarkGroup::new(self, group_name)
+    }
+}
+impl<M> Criterion<M>
+where
+    M: Measurement + 'static,
+{
+    /// Benchmarks a function. For comparing multiple functions, see `benchmark_group`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use self::criterion2::*;
+    ///
+    /// fn bench(c: &mut Criterion) {
+    ///     // Setup (construct data, allocate memory, etc)
+    ///     c.bench_function(
+    ///         "function_name",
+    ///         |b| b.iter(|| {
+    ///             // Code to benchmark goes here
+    ///         }),
+    ///     );
+    /// }
+    ///
+    /// criterion_group!(benches, bench);
+    /// criterion_main!(benches);
+    /// ```
+    pub fn bench_function<F>(&mut self, id: &str, f: F) -> &mut Criterion<M>
+    where
+        F: FnMut(&mut Bencher<'_, M>),
+    {
+        self.benchmark_group(id).bench_function(BenchmarkId::no_function(), f);
+        self
+    }
+
+    /// Benchmarks a function with an input. For comparing multiple functions or multiple inputs,
+    /// see `benchmark_group`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use self::criterion2::*;
+    ///
+    /// fn bench(c: &mut Criterion) {
+    ///     // Setup (construct data, allocate memory, etc)
+    ///     let input = 5u64;
+    ///     c.bench_with_input(
+    ///         BenchmarkId::new("function_name", input), &input,
+    ///         |b, i| b.iter(|| {
+    ///             // Code to benchmark using input `i` goes here
+    ///         }),
+    ///     );
+    /// }
+    ///
+    /// criterion_group!(benches, bench);
+    /// criterion_main!(benches);
+    /// ```
+    pub fn bench_with_input<F, I>(&mut self, id: BenchmarkId, input: &I, f: F) -> &mut Criterion<M>
+    where
+        F: FnMut(&mut Bencher<'_, M>, &I),
+    {
+        // It's possible to use BenchmarkId::from_parameter to create a benchmark ID with no function
+        // name. That's intended for use with BenchmarkGroups where the function name isn't necessary,
+        // but here it is.
+        let group_name = id.function_name.expect(
+            "Cannot use BenchmarkId::from_parameter with Criterion::bench_with_input. \
+                 Consider using a BenchmarkGroup or BenchmarkId::new instead.",
+        );
+        // Guaranteed safe because external callers can't create benchmark IDs without a parameter
+        let parameter = id.parameter.unwrap();
+        self.benchmark_group(group_name).bench_with_input(
+            BenchmarkId::no_function_with_input(parameter),
+            input,
+            f,
+        );
+        self
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 8cb8074..fab83a4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,7 +15,6 @@
 //! * Produces detailed charts, providing thorough understanding of your code's
 //!   performance behavior.
 
-#![warn(missing_docs)]
 #![warn(bare_trait_objects)]
 #![cfg_attr(
     feature = "cargo-clippy",
@@ -25,6 +24,7 @@
         clippy::manual_non_exhaustive, // Remove when MSRV bumped above 1.40
     )
 )]
+#![cfg_attr(feature = "codspeed", allow(unused))]
 
 #[cfg(all(feature = "rayon", target_arch = "wasm32"))]
 compile_error!("Rayon cannot be used when targeting wasi32. Try disabling default features.");
@@ -42,9 +42,13 @@ mod analysis;
 mod benchmark;
 #[macro_use]
 mod benchmark_group;
+#[cfg(feature = "codspeed")]
+#[macro_use]
+pub mod codspeed;
 pub mod async_executor;
 mod bencher;
 mod connection;
+mod criterion;
 #[cfg(feature = "csv_output")]
 mod csv_report;
 mod error;
@@ -53,7 +57,6 @@ mod format;
 mod fs;
 mod html;
 mod kde;
-mod macros;
 pub mod measurement;
 mod plot;
 pub mod profiler;
@@ -61,15 +64,19 @@ mod report;
 mod routine;
 mod stats;
 
-use std::cell::RefCell;
-use std::collections::HashSet;
+#[cfg(not(feature = "codspeed"))]
+#[macro_use]
+mod macros;
+#[cfg(feature = "codspeed")]
+#[macro_use]
+mod macros_codspeed;
+
 use std::default::Default;
 use std::env;
-use std::io::{stdout, IsTerminal};
 use std::net::TcpStream;
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 use std::process::Command;
-use std::sync::{Mutex, MutexGuard};
+use std::sync::Mutex;
 use std::time::Duration;
 
 use criterion_plot::{Version, VersionError};
@@ -87,9 +94,26 @@ use crate::profiler::{ExternalProfiler, Profiler};
 use crate::report::{BencherReport, CliReport, CliVerbosity, Report, ReportContext, Reports};
 
 #[cfg(feature = "async")]
+#[cfg(not(feature = "codspeed"))]
 pub use crate::bencher::AsyncBencher;
+#[cfg(feature = "async")]
+#[cfg(feature = "codspeed")]
+pub use crate::codspeed::bencher::AsyncBencher;
+
+#[cfg(not(feature = "codspeed"))]
 pub use crate::bencher::Bencher;
+#[cfg(feature = "codspeed")]
+pub use crate::codspeed::bencher::Bencher;
+
+#[cfg(not(feature = "codspeed"))]
 pub use crate::benchmark_group::{BenchmarkGroup, BenchmarkId};
+#[cfg(feature = "codspeed")]
+pub use crate::codspeed::benchmark_group::{BenchmarkGroup, BenchmarkId};
+
+#[cfg(feature = "codspeed")]
+pub use crate::codspeed::criterion::Criterion;
+#[cfg(not(feature = "codspeed"))]
+pub use crate::criterion::Criterion;
 
 static DEBUG_ENABLED: Lazy<bool> = Lazy::new(|| std::env::var_os("CRITERION_DEBUG").is_some());
 static GNUPLOT_VERSION: Lazy<Result<Version, VersionError>> = Lazy::new(criterion_plot::version);
@@ -330,36 +354,6 @@ pub enum BenchmarkFilter {
     RejectAll,
 }
 
-/// The benchmark manager
-///
-/// `Criterion` lets you configure and execute benchmarks
-///
-/// Each benchmark consists of four phases:
-///
-/// - **Warm-up**: The routine is repeatedly executed, to let the CPU/OS/JIT/interpreter adapt to
-/// the new load
-/// - **Measurement**: The routine is repeatedly executed, and timing information is collected into
-/// a sample
-/// - **Analysis**: The sample is analyzed and distilled into meaningful statistics that get
-/// reported to stdout, stored in files, and plotted
-/// - **Comparison**: The current sample is compared with the sample obtained in the previous
-/// benchmark.
-pub struct Criterion<M: Measurement = WallTime> {
-    config: BenchmarkConfig,
-    filter: BenchmarkFilter,
-    report: Reports,
-    output_directory: PathBuf,
-    baseline_directory: String,
-    baseline: Baseline,
-    load_baseline: Option<String>,
-    all_directories: HashSet<String>,
-    all_titles: HashSet<String>,
-    measurement: M,
-    profiler: Box<RefCell<dyn Profiler>>,
-    connection: Option<MutexGuard<'static, Connection>>,
-    mode: Mode,
-}
-
 /// Returns the Cargo target directory, possibly calling `cargo metadata` to
 /// figure it out.
 fn cargo_target_directory() -> Option<PathBuf> {
@@ -378,856 +372,6 @@ fn cargo_target_directory() -> Option<PathBuf> {
     })
 }
 
-impl Default for Criterion {
-    /// Creates a benchmark manager with the following default settings:
-    ///
-    /// - Sample size: 100 measurements
-    /// - Warm-up time: 3 s
-    /// - Measurement time: 5 s
-    /// - Bootstrap size: 100 000 resamples
-    /// - Noise threshold: 0.01 (1%)
-    /// - Confidence level: 0.95
-    /// - Significance level: 0.05
-    /// - Plotting: enabled, using gnuplot if available or plotters if gnuplot is not available
-    /// - No filter
-    fn default() -> Criterion {
-        let reports = Reports {
-            cli_enabled: true,
-            cli: CliReport::new(false, false, CliVerbosity::Normal),
-            bencher_enabled: false,
-            bencher: BencherReport,
-            html: DEFAULT_PLOTTING_BACKEND.create_plotter().map(Html::new),
-            csv_enabled: cfg!(feature = "csv_output"),
-        };
-
-        let mut criterion = Criterion {
-            config: BenchmarkConfig {
-                confidence_level: 0.95,
-                measurement_time: Duration::from_secs(5),
-                noise_threshold: 0.01,
-                nresamples: 100_000,
-                sample_size: 100,
-                significance_level: 0.05,
-                warm_up_time: Duration::from_secs(3),
-                sampling_mode: SamplingMode::Auto,
-                quick_mode: false,
-            },
-            filter: BenchmarkFilter::AcceptAll,
-            report: reports,
-            baseline_directory: "base".to_owned(),
-            baseline: Baseline::Save,
-            load_baseline: None,
-            output_directory: DEFAULT_OUTPUT_DIRECTORY.clone(),
-            all_directories: HashSet::new(),
-            all_titles: HashSet::new(),
-            measurement: WallTime,
-            profiler: Box::new(RefCell::new(ExternalProfiler)),
-            connection: CARGO_CRITERION_CONNECTION.as_ref().map(|mtx| mtx.lock().unwrap()),
-            mode: Mode::Benchmark,
-        };
-
-        if criterion.connection.is_some() {
-            // disable all reports when connected to cargo-criterion; it will do the reporting.
-            criterion.report.cli_enabled = false;
-            criterion.report.bencher_enabled = false;
-            criterion.report.csv_enabled = false;
-            criterion.report.html = None;
-        }
-        criterion
-    }
-}
-
-impl<M: Measurement> Criterion<M> {
-    /// Changes the measurement for the benchmarks run with this runner. See the
-    /// Measurement trait for more details
-    pub fn with_measurement<M2: Measurement>(self, m: M2) -> Criterion<M2> {
-        // Can't use struct update syntax here because they're technically different types.
-        Criterion {
-            config: self.config,
-            filter: self.filter,
-            report: self.report,
-            baseline_directory: self.baseline_directory,
-            baseline: self.baseline,
-            load_baseline: self.load_baseline,
-            output_directory: self.output_directory,
-            all_directories: self.all_directories,
-            all_titles: self.all_titles,
-            measurement: m,
-            profiler: self.profiler,
-            connection: self.connection,
-            mode: self.mode,
-        }
-    }
-
-    #[must_use]
-    /// Changes the internal profiler for benchmarks run with this runner. See
-    /// the Profiler trait for more details.
-    pub fn with_profiler<P: Profiler + 'static>(self, p: P) -> Criterion<M> {
-        Criterion { profiler: Box::new(RefCell::new(p)), ..self }
-    }
-
-    #[must_use]
-    /// Set the plotting backend. By default, Criterion will use gnuplot if available, or plotters
-    /// if not.
-    ///
-    /// Panics if `backend` is `PlottingBackend::Gnuplot` and gnuplot is not available.
-    pub fn plotting_backend(mut self, backend: PlottingBackend) -> Criterion<M> {
-        if let PlottingBackend::Gnuplot = backend {
-            assert!(
-                !GNUPLOT_VERSION.is_err(),
-                "Gnuplot plotting backend was requested, but gnuplot is not available. \
-                To continue, either install Gnuplot or allow Criterion.rs to fall back \
-                to using plotters."
-            );
-        }
-
-        self.report.html = backend.create_plotter().map(Html::new);
-        self
-    }
-
-    #[must_use]
-    /// Changes the default size of the sample for benchmarks run with this runner.
-    ///
-    /// A bigger sample should yield more accurate results if paired with a sufficiently large
-    /// measurement time.
-    ///
-    /// Sample size must be at least 10.
-    ///
-    /// # Panics
-    ///
-    /// Panics if n < 10
-    pub fn sample_size(mut self, n: usize) -> Criterion<M> {
-        assert!(n >= 10);
-
-        self.config.sample_size = n;
-        self
-    }
-
-    #[must_use]
-    /// Changes the default warm up time for benchmarks run with this runner.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the input duration is zero
-    pub fn warm_up_time(mut self, dur: Duration) -> Criterion<M> {
-        assert!(dur.as_nanos() > 0);
-
-        self.config.warm_up_time = dur;
-        self
-    }
-
-    #[must_use]
-    /// Changes the default measurement time for benchmarks run with this runner.
-    ///
-    /// With a longer time, the measurement will become more resilient to transitory peak loads
-    /// caused by external programs
-    ///
-    /// **Note**: If the measurement time is too "low", Criterion will automatically increase it
-    ///
-    /// # Panics
-    ///
-    /// Panics if the input duration in zero
-    pub fn measurement_time(mut self, dur: Duration) -> Criterion<M> {
-        assert!(dur.as_nanos() > 0);
-
-        self.config.measurement_time = dur;
-        self
-    }
-
-    #[must_use]
-    /// Changes the default number of resamples for benchmarks run with this runner.
-    ///
-    /// Number of resamples to use for the
-    /// [bootstrap](http://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Case_resampling)
-    ///
-    /// A larger number of resamples reduces the random sampling errors, which are inherent to the
-    /// bootstrap method, but also increases the analysis time
-    ///
-    /// # Panics
-    ///
-    /// Panics if the number of resamples is set to zero
-    pub fn nresamples(mut self, n: usize) -> Criterion<M> {
-        assert!(n > 0);
-        if n <= 1000 {
-            eprintln!("\nWarning: It is not recommended to reduce nresamples below 1000.");
-        }
-
-        self.config.nresamples = n;
-        self
-    }
-
-    #[must_use]
-    /// Changes the default noise threshold for benchmarks run with this runner. The noise threshold
-    /// is used to filter out small changes in performance, even if they are statistically
-    /// significant. Sometimes benchmarking the same code twice will result in small but
-    /// statistically significant differences solely because of noise. This provides a way to filter
-    /// out some of these false positives at the cost of making it harder to detect small changes
-    /// to the true performance of the benchmark.
-    ///
-    /// The default is 0.01, meaning that changes smaller than 1% will be ignored.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the threshold is set to a negative value
-    pub fn noise_threshold(mut self, threshold: f64) -> Criterion<M> {
-        assert!(threshold >= 0.0);
-
-        self.config.noise_threshold = threshold;
-        self
-    }
-
-    #[must_use]
-    /// Changes the default confidence level for benchmarks run with this runner. The confidence
-    /// level is the desired probability that the true runtime lies within the estimated
-    /// [confidence interval](https://en.wikipedia.org/wiki/Confidence_interval). The default is
-    /// 0.95, meaning that the confidence interval should capture the true value 95% of the time.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the confidence level is set to a value outside the `(0, 1)` range
-    pub fn confidence_level(mut self, cl: f64) -> Criterion<M> {
-        assert!(cl > 0.0 && cl < 1.0);
-        if cl < 0.5 {
-            eprintln!("\nWarning: It is not recommended to reduce confidence level below 0.5.");
-        }
-
-        self.config.confidence_level = cl;
-        self
-    }
-
-    #[must_use]
-    /// Changes the default [significance level](https://en.wikipedia.org/wiki/Statistical_significance)
-    /// for benchmarks run with this runner. This is used to perform a
-    /// [hypothesis test](https://en.wikipedia.org/wiki/Statistical_hypothesis_testing) to see if
-    /// the measurements from this run are different from the measured performance of the last run.
-    /// The significance level is the desired probability that two measurements of identical code
-    /// will be considered 'different' due to noise in the measurements. The default value is 0.05,
-    /// meaning that approximately 5% of identical benchmarks will register as different due to
-    /// noise.
-    ///
-    /// This presents a trade-off. By setting the significance level closer to 0.0, you can increase
-    /// the statistical robustness against noise, but it also weakens Criterion.rs' ability to
-    /// detect small but real changes in the performance. By setting the significance level
-    /// closer to 1.0, Criterion.rs will be more able to detect small true changes, but will also
-    /// report more spurious differences.
-    ///
-    /// See also the noise threshold setting.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the significance level is set to a value outside the `(0, 1)` range
-    pub fn significance_level(mut self, sl: f64) -> Criterion<M> {
-        assert!(sl > 0.0 && sl < 1.0);
-
-        self.config.significance_level = sl;
-        self
-    }
-
-    #[must_use]
-    /// Enables plotting
-    pub fn with_plots(mut self) -> Criterion<M> {
-        // If running under cargo-criterion then don't re-enable the reports; let it do the reporting.
-        if self.connection.is_none() && self.report.html.is_none() {
-            let default_backend = DEFAULT_PLOTTING_BACKEND.create_plotter();
-            if let Some(backend) = default_backend {
-                self.report.html = Some(Html::new(backend));
-            } else {
-                panic!("Cannot find a default plotting backend!");
-            }
-        }
-        self
-    }
-
-    #[must_use]
-    /// Disables plotting
-    pub fn without_plots(mut self) -> Criterion<M> {
-        self.report.html = None;
-        self
-    }
-
-    #[must_use]
-    /// Names an explicit baseline and enables overwriting the previous results.
-    pub fn save_baseline(mut self, baseline: String) -> Criterion<M> {
-        self.baseline_directory = baseline;
-        self.baseline = Baseline::Save;
-        self
-    }
-
-    #[must_use]
-    /// Names an explicit baseline and disables overwriting the previous results.
-    pub fn retain_baseline(mut self, baseline: String, strict: bool) -> Criterion<M> {
-        self.baseline_directory = baseline;
-        self.baseline = if strict { Baseline::CompareStrict } else { Baseline::CompareLenient };
-        self
-    }
-
-    #[must_use]
-    /// Filters the benchmarks. Only benchmarks with names that contain the
-    /// given string will be executed.
-    ///
-    /// This overwrites [`Self::with_benchmark_filter`].
-    pub fn with_filter<S: Into<String>>(mut self, filter: S) -> Criterion<M> {
-        let filter_text = filter.into();
-        let filter = Regex::new(&filter_text).unwrap_or_else(|err| {
-            panic!("Unable to parse '{}' as a regular expression: {}", filter_text, err)
-        });
-        self.filter = BenchmarkFilter::Regex(filter);
-
-        self
-    }
-
-    /// Only run benchmarks specified by the given filter.
-    ///
-    /// This overwrites [`Self::with_filter`].
-    pub fn with_benchmark_filter(mut self, filter: BenchmarkFilter) -> Criterion<M> {
-        self.filter = filter;
-
-        self
-    }
-
-    #[must_use]
-    /// Override whether the CLI output will be colored or not. Usually you would use the `--color`
-    /// CLI argument, but this is available for programmmatic use as well.
-    pub fn with_output_color(mut self, enabled: bool) -> Criterion<M> {
-        self.report.cli.enable_text_coloring = enabled;
-        self
-    }
-
-    /// Set the output directory (currently for testing only)
-    #[must_use]
-    #[doc(hidden)]
-    pub fn output_directory(mut self, path: &Path) -> Criterion<M> {
-        self.output_directory = path.to_owned();
-
-        self
-    }
-
-    /// Set the profile time (currently for testing only)
-    #[must_use]
-    #[doc(hidden)]
-    pub fn profile_time(mut self, profile_time: Option<Duration>) -> Criterion<M> {
-        match profile_time {
-            Some(time) => self.mode = Mode::Profile(time),
-            None => self.mode = Mode::Benchmark,
-        }
-
-        self
-    }
-
-    /// Generate the final summary at the end of a run.
-    #[doc(hidden)]
-    pub fn final_summary(&self) {
-        if !self.mode.is_benchmark() {
-            return;
-        }
-
-        let report_context = ReportContext {
-            output_directory: self.output_directory.clone(),
-            plot_config: PlotConfiguration::default(),
-        };
-
-        self.report.final_summary(&report_context);
-    }
-
-    /// Configure this criterion struct based on the command-line arguments to
-    /// this process.
-    #[must_use]
-    #[cfg_attr(feature = "cargo-clippy", allow(clippy::cognitive_complexity))]
-    pub fn configure_from_args(mut self) -> Criterion<M> {
-        use clap::{value_parser, Arg, Command};
-        let matches = Command::new("Criterion Benchmark")
-            .arg(Arg::new("FILTER")
-                .help("Skip benchmarks whose names do not contain FILTER.")
-                .index(1))
-            .arg(Arg::new("color")
-                .short('c')
-                .long("color")
-                .alias("colour")
-                .value_parser(["auto", "always", "never"])
-                .default_value("auto")
-                .help("Configure coloring of output. always = always colorize output, never = never colorize output, auto = colorize output if output is a tty and compiled for unix."))
-            .arg(Arg::new("verbose")
-                .short('v')
-                .long("verbose")
-                .num_args(0)
-                .help("Print additional statistical information."))
-            .arg(Arg::new("quiet")
-                .long("quiet")
-                .num_args(0)
-                .conflicts_with("verbose")
-                .help("Print only the benchmark results."))
-            .arg(Arg::new("noplot")
-                .short('n')
-                .long("noplot")
-                .num_args(0)
-                .help("Disable plot and HTML generation."))
-            .arg(Arg::new("save-baseline")
-                .short('s')
-                .long("save-baseline")
-                .default_value("base")
-                .help("Save results under a named baseline."))
-            .arg(Arg::new("discard-baseline")
-                .long("discard-baseline")
-                .num_args(0)
-                .conflicts_with_all(["save-baseline", "baseline", "baseline-lenient"])
-                .help("Discard benchmark results."))
-            .arg(Arg::new("baseline")
-                .short('b')
-                .long("baseline")
-                .conflicts_with_all(["save-baseline", "baseline-lenient"])
-                .help("Compare to a named baseline. If any benchmarks do not have the specified baseline this command fails."))
-            .arg(Arg::new("baseline-lenient")
-                .long("baseline-lenient")
-                .conflicts_with_all(["save-baseline", "baseline"])
-                .help("Compare to a named baseline. If any benchmarks do not have the specified baseline then just those benchmarks are not compared against the baseline while every other benchmark is compared against the baseline."))
-            .arg(Arg::new("list")
-                .long("list")
-                .num_args(0)
-                .help("List all benchmarks")
-                .conflicts_with_all(["test", "profile-time"]))
-            .arg(Arg::new("format")
-                .long("format")
-                .value_parser(["pretty", "terse"])
-                .default_value("pretty")
-                // Note that libtest's --format also works during test execution, but criterion
-                // doesn't support that at the moment.
-                .help("Output formatting"))
-            .arg(Arg::new("ignored")
-                .long("ignored")
-                .num_args(0)
-                .help("List or run ignored benchmarks (currently means skip all benchmarks)"))
-            .arg(Arg::new("exact")
-                .long("exact")
-                .num_args(0)
-                .help("Run benchmarks that exactly match the provided filter"))
-            .arg(Arg::new("profile-time")
-                .long("profile-time")
-                .value_parser(value_parser!(f64))
-                .help("Iterate each benchmark for approximately the given number of seconds, doing no analysis and without storing the results. Useful for running the benchmarks in a profiler.")
-                .conflicts_with_all(["test", "list"]))
-            .arg(Arg::new("load-baseline")
-                 .long("load-baseline")
-                 .conflicts_with("profile-time")
-                 .requires("baseline")
-                 .help("Load a previous baseline instead of sampling new data."))
-            .arg(Arg::new("sample-size")
-                .long("sample-size")
-                .value_parser(value_parser!(usize))
-                .help(format!("Changes the default size of the sample for this run. [default: {}]", self.config.sample_size)))
-            .arg(Arg::new("warm-up-time")
-                .long("warm-up-time")
-                .value_parser(value_parser!(f64))
-                .help(format!("Changes the default warm up time for this run. [default: {}]", self.config.warm_up_time.as_secs())))
-            .arg(Arg::new("measurement-time")
-                .long("measurement-time")
-                .value_parser(value_parser!(f64))
-                .help(format!("Changes the default measurement time for this run. [default: {}]", self.config.measurement_time.as_secs())))
-            .arg(Arg::new("nresamples")
-                .long("nresamples")
-                .value_parser(value_parser!(usize))
-                .help(format!("Changes the default number of resamples for this run. [default: {}]", self.config.nresamples)))
-            .arg(Arg::new("noise-threshold")
-                .long("noise-threshold")
-                .value_parser(value_parser!(f64))
-                .help(format!("Changes the default noise threshold for this run. [default: {}]", self.config.noise_threshold)))
-            .arg(Arg::new("confidence-level")
-                .long("confidence-level")
-                .value_parser(value_parser!(f64))
-                .help(format!("Changes the default confidence level for this run. [default: {}]", self.config.confidence_level)))
-            .arg(Arg::new("significance-level")
-                .long("significance-level")
-                .value_parser(value_parser!(f64))
-                .help(format!("Changes the default significance level for this run. [default: {}]", self.config.significance_level)))
-            .arg(Arg::new("quick")
-                .long("quick")
-                .num_args(0)
-                .conflicts_with("sample-size")
-                .help(format!("Benchmark only until the significance level has been reached [default: {}]", self.config.quick_mode)))
-            .arg(Arg::new("test")
-                .hide(true)
-                .long("test")
-                .num_args(0)
-                .help("Run the benchmarks once, to verify that they execute successfully, but do not measure or report the results.")
-                .conflicts_with_all(["list", "profile-time"]))
-            .arg(Arg::new("bench")
-                .hide(true)
-                .long("bench")
-                .num_args(0))
-            .arg(Arg::new("plotting-backend")
-                 .long("plotting-backend")
-                 .value_parser(["gnuplot", "plotters"])
-                 .help("Set the plotting backend. By default, Criterion.rs will use the gnuplot backend if gnuplot is available, or the plotters backend if it isn't."))
-            .arg(Arg::new("output-format")
-                .long("output-format")
-                .value_parser(["criterion", "bencher"])
-                .default_value("criterion")
-                .help("Change the CLI output format. By default, Criterion.rs will use its own format. If output format is set to 'bencher', Criterion.rs will print output in a format that resembles the 'bencher' crate."))
-            .arg(Arg::new("nocapture")
-                .long("nocapture")
-                .num_args(0)
-                .hide(true)
-                .help("Ignored, but added for compatibility with libtest."))
-            .arg(Arg::new("show-output")
-                .long("show-output")
-                .num_args(0)
-                .hide(true)
-                .help("Ignored, but added for compatibility with libtest."))
-            .arg(Arg::new("include-ignored")
-                .long("include-ignored")
-                .num_args(0)
-                .hide(true)
-                .help("Ignored, but added for compatibility with libtest."))
-            .arg(Arg::new("version")
-                .hide(true)
-                .short('V')
-                .long("version")
-                .num_args(0))
-            .after_help("
-This executable is a Criterion.rs benchmark.
-See https://github.com/bheisler/criterion.rs for more details.
-
-To enable debug output, define the environment variable CRITERION_DEBUG.
-Criterion.rs will output more debug information and will save the gnuplot
-scripts alongside the generated plots.
-
-To test that the benchmarks work, run `cargo test --benches`
-
-NOTE: If you see an 'unrecognized option' error using any of the options above, see:
-https://bheisler.github.io/criterion.rs/book/faq.html
-")
-            .get_matches();
-
-        if self.connection.is_some() {
-            if let Some(color) = matches.get_one::<String>("color") {
-                if color != "auto" {
-                    eprintln!("Warning: --color will be ignored when running with cargo-criterion. Use `cargo criterion --color {} -- <args>` instead.", color);
-                }
-            }
-            if matches.get_flag("verbose") {
-                eprintln!("Warning: --verbose will be ignored when running with cargo-criterion. Use `cargo criterion --output-format verbose -- <args>` instead.");
-            }
-            if matches.get_flag("noplot") {
-                eprintln!("Warning: --noplot will be ignored when running with cargo-criterion. Use `cargo criterion --plotting-backend disabled -- <args>` instead.");
-            }
-            if let Some(backend) = matches.get_one::<String>("plotting-backend") {
-                eprintln!("Warning: --plotting-backend will be ignored when running with cargo-criterion. Use `cargo criterion --plotting-backend {} -- <args>` instead.", backend);
-            }
-            if let Some(format) = matches.get_one::<String>("output-format") {
-                if format != "criterion" {
-                    eprintln!("Warning: --output-format will be ignored when running with cargo-criterion. Use `cargo criterion --output-format {} -- <args>` instead.", format);
-                }
-            }
-
-            if matches.contains_id("baseline")
-                || matches.get_one::<String>("save-baseline").map_or(false, |base| base != "base")
-                || matches.contains_id("load-baseline")
-            {
-                eprintln!("Error: baselines are not supported when running with cargo-criterion.");
-                std::process::exit(1);
-            }
-        }
-
-        let bench = matches.get_flag("bench");
-        let test = matches.get_flag("test");
-        let test_mode = match (bench, test) {
-            (true, true) => true,   // cargo bench -- --test should run tests
-            (true, false) => false, // cargo bench should run benchmarks
-            (false, _) => true,     // cargo test --benches should run tests
-        };
-
-        self.mode = if matches.get_flag("list") {
-            let list_format = match matches
-                .get_one::<String>("format")
-                .expect("a default value was provided for this")
-                .as_str()
-            {
-                "pretty" => ListFormat::Pretty,
-                "terse" => ListFormat::Terse,
-                other => unreachable!(
-                    "unrecognized value for --format that isn't part of possible-values: {}",
-                    other
-                ),
-            };
-            Mode::List(list_format)
-        } else if test_mode {
-            Mode::Test
-        } else if let Some(&num_seconds) = matches.get_one("profile-time") {
-            if num_seconds < 1.0 {
-                eprintln!("Profile time must be at least one second.");
-                std::process::exit(1);
-            }
-
-            Mode::Profile(Duration::from_secs_f64(num_seconds))
-        } else {
-            Mode::Benchmark
-        };
-
-        // This is kind of a hack, but disable the connection to the runner if we're not benchmarking.
-        if !self.mode.is_benchmark() {
-            self.connection = None;
-        }
-
-        let filter = if matches.get_flag("ignored") {
-            // --ignored overwrites any name-based filters passed in.
-            BenchmarkFilter::RejectAll
-        } else if let Some(filter) = matches.get_one::<String>("FILTER") {
-            if matches.get_flag("exact") {
-                BenchmarkFilter::Exact(filter.to_owned())
-            } else {
-                let regex = Regex::new(filter).unwrap_or_else(|err| {
-                    panic!("Unable to parse '{}' as a regular expression: {}", filter, err)
-                });
-                BenchmarkFilter::Regex(regex)
-            }
-        } else {
-            BenchmarkFilter::AcceptAll
-        };
-        self = self.with_benchmark_filter(filter);
-
-        match matches.get_one("plotting-backend").map(String::as_str) {
-            // Use plotting_backend() here to re-use the panic behavior if Gnuplot is not available.
-            Some("gnuplot") => self = self.plotting_backend(PlottingBackend::Gnuplot),
-            Some("plotters") => self = self.plotting_backend(PlottingBackend::Plotters),
-            Some(val) => panic!("Unexpected plotting backend '{}'", val),
-            None => {}
-        }
-
-        if matches.get_flag("noplot") {
-            self = self.without_plots();
-        }
-
-        if let Some(dir) = matches.get_one::<String>("save-baseline") {
-            self.baseline = Baseline::Save;
-            self.baseline_directory = dir.to_owned()
-        }
-        if matches.get_flag("discard-baseline") {
-            self.baseline = Baseline::Discard;
-        }
-        if let Some(dir) = matches.get_one::<String>("baseline") {
-            self.baseline = Baseline::CompareStrict;
-            self.baseline_directory = dir.to_owned();
-        }
-        if let Some(dir) = matches.get_one::<String>("baseline-lenient") {
-            self.baseline = Baseline::CompareLenient;
-            self.baseline_directory = dir.to_owned();
-        }
-
-        if self.connection.is_some() {
-            // disable all reports when connected to cargo-criterion; it will do the reporting.
-            self.report.cli_enabled = false;
-            self.report.bencher_enabled = false;
-            self.report.csv_enabled = false;
-            self.report.html = None;
-        } else {
-            match matches.get_one("output-format").map(String::as_str) {
-                Some("bencher") => {
-                    self.report.bencher_enabled = true;
-                    self.report.cli_enabled = false;
-                }
-                _ => {
-                    let verbose = matches.get_flag("verbose");
-                    let verbosity = if verbose {
-                        CliVerbosity::Verbose
-                    } else if matches.get_flag("quiet") {
-                        CliVerbosity::Quiet
-                    } else {
-                        CliVerbosity::Normal
-                    };
-                    let stdout_isatty = stdout().is_terminal();
-                    let mut enable_text_overwrite = stdout_isatty && !verbose && !debug_enabled();
-                    let enable_text_coloring;
-                    match matches.get_one("color").map(String::as_str) {
-                        Some("always") => {
-                            enable_text_coloring = true;
-                        }
-                        Some("never") => {
-                            enable_text_coloring = false;
-                            enable_text_overwrite = false;
-                        }
-                        _ => enable_text_coloring = stdout_isatty,
-                    };
-                    self.report.bencher_enabled = false;
-                    self.report.cli_enabled = true;
-                    self.report.cli =
-                        CliReport::new(enable_text_overwrite, enable_text_coloring, verbosity);
-                }
-            };
-        }
-
-        if let Some(dir) = matches.get_one::<String>("load-baseline") {
-            self.load_baseline = Some(dir.to_owned());
-        }
-
-        if let Some(&num_size) = matches.get_one("sample-size") {
-            assert!(num_size >= 10);
-            self.config.sample_size = num_size;
-        }
-        if let Some(&num_seconds) = matches.get_one("warm-up-time") {
-            let dur = std::time::Duration::from_secs_f64(num_seconds);
-            assert!(dur.as_nanos() > 0);
-
-            self.config.warm_up_time = dur;
-        }
-        if let Some(&num_seconds) = matches.get_one("measurement-time") {
-            let dur = std::time::Duration::from_secs_f64(num_seconds);
-            assert!(dur.as_nanos() > 0);
-
-            self.config.measurement_time = dur;
-        }
-        if let Some(&num_resamples) = matches.get_one("nresamples") {
-            assert!(num_resamples > 0);
-
-            self.config.nresamples = num_resamples;
-        }
-        if let Some(&num_noise_threshold) = matches.get_one("noise-threshold") {
-            assert!(num_noise_threshold > 0.0);
-
-            self.config.noise_threshold = num_noise_threshold;
-        }
-        if let Some(&num_confidence_level) = matches.get_one("confidence-level") {
-            assert!(num_confidence_level > 0.0 && num_confidence_level < 1.0);
-
-            self.config.confidence_level = num_confidence_level;
-        }
-        if let Some(&num_significance_level) = matches.get_one("significance-level") {
-            assert!(num_significance_level > 0.0 && num_significance_level < 1.0);
-
-            self.config.significance_level = num_significance_level;
-        }
-
-        if matches.get_flag("quick") {
-            self.config.quick_mode = true;
-        }
-
-        self
-    }
-
-    fn filter_matches(&self, id: &str) -> bool {
-        match &self.filter {
-            BenchmarkFilter::AcceptAll => true,
-            BenchmarkFilter::Regex(regex) => regex.is_match(id),
-            BenchmarkFilter::Exact(exact) => id == exact,
-            BenchmarkFilter::RejectAll => false,
-        }
-    }
-
-    /// Returns true iff we should save the benchmark results in
-    /// json files on the local disk.
-    fn should_save_baseline(&self) -> bool {
-        self.connection.is_none()
-            && self.load_baseline.is_none()
-            && !matches!(self.baseline, Baseline::Discard)
-    }
-
-    /// Return a benchmark group. All benchmarks performed using a benchmark group will be
-    /// grouped together in the final report.
-    ///
-    /// # Examples:
-    ///
-    /// ```rust
-    /// use self::criterion2::*;
-    ///
-    /// fn bench_simple(c: &mut Criterion) {
-    ///     let mut group = c.benchmark_group("My Group");
-    ///
-    ///     // Now we can perform benchmarks with this group
-    ///     group.bench_function("Bench 1", |b| b.iter(|| 1 ));
-    ///     group.bench_function("Bench 2", |b| b.iter(|| 2 ));
-    ///
-    ///     group.finish();
-    /// }
-    /// criterion_group!(benches, bench_simple);
-    /// criterion_main!(benches);
-    /// ```
-    /// # Panics:
-    /// Panics if the group name is empty
-    pub fn benchmark_group<S: Into<String>>(&mut self, group_name: S) -> BenchmarkGroup<'_, M> {
-        let group_name = group_name.into();
-        assert!(!group_name.is_empty(), "Group name must not be empty.");
-
-        if let Some(conn) = &self.connection {
-            conn.send(&OutgoingMessage::BeginningBenchmarkGroup { group: &group_name }).unwrap();
-        }
-
-        BenchmarkGroup::new(self, group_name)
-    }
-}
-impl<M> Criterion<M>
-where
-    M: Measurement + 'static,
-{
-    /// Benchmarks a function. For comparing multiple functions, see `benchmark_group`.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// use self::criterion2::*;
-    ///
-    /// fn bench(c: &mut Criterion) {
-    ///     // Setup (construct data, allocate memory, etc)
-    ///     c.bench_function(
-    ///         "function_name",
-    ///         |b| b.iter(|| {
-    ///             // Code to benchmark goes here
-    ///         }),
-    ///     );
-    /// }
-    ///
-    /// criterion_group!(benches, bench);
-    /// criterion_main!(benches);
-    /// ```
-    pub fn bench_function<F>(&mut self, id: &str, f: F) -> &mut Criterion<M>
-    where
-        F: FnMut(&mut Bencher<'_, M>),
-    {
-        self.benchmark_group(id).bench_function(BenchmarkId::no_function(), f);
-        self
-    }
-
-    /// Benchmarks a function with an input. For comparing multiple functions or multiple inputs,
-    /// see `benchmark_group`.
-    ///
-    /// # Example
-    ///
-    /// ```rust
-    /// use self::criterion2::*;
-    ///
-    /// fn bench(c: &mut Criterion) {
-    ///     // Setup (construct data, allocate memory, etc)
-    ///     let input = 5u64;
-    ///     c.bench_with_input(
-    ///         BenchmarkId::new("function_name", input), &input,
-    ///         |b, i| b.iter(|| {
-    ///             // Code to benchmark using input `i` goes here
-    ///         }),
-    ///     );
-    /// }
-    ///
-    /// criterion_group!(benches, bench);
-    /// criterion_main!(benches);
-    /// ```
-    pub fn bench_with_input<F, I>(&mut self, id: BenchmarkId, input: &I, f: F) -> &mut Criterion<M>
-    where
-        F: FnMut(&mut Bencher<'_, M>, &I),
-    {
-        // It's possible to use BenchmarkId::from_parameter to create a benchmark ID with no function
-        // name. That's intended for use with BenchmarkGroups where the function name isn't necessary,
-        // but here it is.
-        let group_name = id.function_name.expect(
-            "Cannot use BenchmarkId::from_parameter with Criterion::bench_with_input. \
-                 Consider using a BenchmarkGroup or BenchmarkId::new instead.",
-        );
-        // Guaranteed safe because external callers can't create benchmark IDs without a parameter
-        let parameter = id.parameter.unwrap();
-        self.benchmark_group(group_name).bench_with_input(
-            BenchmarkId::no_function_with_input(parameter),
-            input,
-            f,
-        );
-        self
-    }
-}
-
 /// Enum representing different ways of measuring the throughput of benchmarked code.
 /// If the throughput setting is configured for a benchmark then the estimated throughput will
 /// be reported as well as the time per iteration.
@@ -1470,5 +614,5 @@ pub fn runner(benches: &[&dyn Fn()]) {
     for bench in benches {
         bench();
     }
-    Criterion::default().configure_from_args().final_summary();
+    crate::criterion::Criterion::default().configure_from_args().final_summary();
 }
diff --git a/src/macros_codspeed.rs b/src/macros_codspeed.rs
new file mode 100644
index 0000000..be31a2b
--- /dev/null
+++ b/src/macros_codspeed.rs
@@ -0,0 +1,44 @@
+#[macro_export]
+macro_rules! abs_file {
+    () => {
+        std::path::PathBuf::from(
+            std::env::var("CODSPEED_CARGO_WORKSPACE_ROOT")
+            .expect("Could not find CODSPEED_CARGO_WORKSPACE_ROOT env variable, make sure you are using the latest version of cargo-codspeed")
+        )
+        .join(file!())
+        .to_string_lossy()
+    };
+}
+
+#[macro_export]
+macro_rules! criterion_group {
+    (name = $name:ident; config = $config:expr; targets = $( $target:path ),+ $(,)*) => {
+        pub fn $name(criterion: &mut $crate::codspeed::criterion::Criterion) {
+            let mut criterion = &mut criterion.with_patched_measurement($config);
+            $(
+                criterion.set_current_file(criterion::abs_file!());
+                criterion.set_macro_group(format!("{}::{}", stringify!($name), stringify!($target)));
+                $target(criterion);
+            )+
+        }
+    };
+    ($name:ident, $( $target:path ),+ $(,)*) => {
+        $crate::criterion_group!{
+            name = $name;
+            config = $crate::Criterion::default();
+            targets = $( $target ),+
+        }
+    }
+}
+
+#[macro_export]
+macro_rules! criterion_main {
+    ( $( $group:path ),+ $(,)* ) => {
+        pub fn main() {
+            let mut criterion = $crate::codspeed::criterion::Criterion::new_instrumented();
+            $(
+                $group(&mut criterion);
+            )+
+        }
+    };
+}
diff --git a/src/routine.rs b/src/routine.rs
index 2c7be29..3ed992c 100644
--- a/src/routine.rs
+++ b/src/routine.rs
@@ -1,10 +1,13 @@
+use std::marker::PhantomData;
+use std::time::Duration;
+
+use crate::bencher::Bencher;
 use crate::benchmark::BenchmarkConfig;
 use crate::connection::OutgoingMessage;
+use crate::criterion::Criterion;
 use crate::measurement::Measurement;
 use crate::report::{BenchmarkId, Report, ReportContext};
-use crate::{black_box, ActualSamplingMode, Bencher, Criterion};
-use std::marker::PhantomData;
-use std::time::Duration;
+use crate::{black_box, ActualSamplingMode};
 
 /// PRIVATE
 pub(crate) trait Routine<M: Measurement, T: ?Sized> {