Skip to content

Commit

Permalink
Add benchmarks for SIMD merkle
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewmilson committed May 16, 2024
1 parent 42fbd6e commit aaa48c8
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 48 deletions.
77 changes: 30 additions & 47 deletions crates/prover/benches/merkle.rs
Original file line number Diff line number Diff line change
@@ -1,59 +1,42 @@
#![feature(iter_array_chunks)]

use criterion::Criterion;
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use itertools::Itertools;
use num_traits::Zero;
use stwo_prover::core::backend::simd::SimdBackend;
use stwo_prover::core::backend::{CPUBackend, Col};
use stwo_prover::core::fields::m31::BaseField;
use stwo_prover::core::vcs::blake2_merkle::Blake2sMerkleHasher;
use stwo_prover::core::vcs::ops::MerkleOps;

#[cfg(target_arch = "x86_64")]
pub fn cpu_merkle(c: &mut criterion::Criterion) {
use itertools::Itertools;
use num_traits::Zero;
use stwo_prover::core::backend::avx512::AVX512Backend;
use stwo_prover::core::backend::{CPUBackend, Col};
use stwo_prover::core::fields::m31::BaseField;
use stwo_prover::core::vcs::ops::MerkleOps;
use stwo_prover::platform;
const LOG_N_ROWS: u32 = 16;

const N_COLS: usize = 1 << 8;
const LOG_SIZE: u32 = 16;
let cols = (0..N_COLS)
.map(|_| {
(0..(1 << LOG_SIZE))
.map(|_| BaseField::zero())
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
const LOG_N_COLS: u32 = 8;

fn bench_blake2s_merkle<B: MerkleOps<Blake2sMerkleHasher>>(c: &mut Criterion, id: &str) {
let col: Col<B, BaseField> = (0..1 << LOG_N_ROWS).map(|_| BaseField::zero()).collect();
let cols = (0..1 << LOG_N_COLS).map(|_| col.clone()).collect_vec();
let col_refs = cols.iter().collect_vec();
let mut group = c.benchmark_group("merkle throughput");
group.throughput(criterion::Throughput::Elements((N_COLS << LOG_SIZE) as u64));
group.throughput(criterion::Throughput::Bytes(
(N_COLS << (LOG_SIZE + 2)) as u64,
));
group.bench_function("cpu merkle", |b| {
b.iter(|| {
CPUBackend::commit_on_layer(LOG_SIZE, None, &cols.iter().collect_vec());
})
group.throughput(Throughput::Elements(1 << (LOG_N_COLS + LOG_N_ROWS)));
group.throughput(Throughput::Bytes(4 << (LOG_N_COLS + LOG_N_ROWS)));
group.bench_function(&format!("{id} merkle"), |b| {
b.iter_with_large_drop(|| B::commit_on_layer(LOG_N_ROWS, None, &col_refs))
});
}

if !platform::avx512_detected() {
return;
fn blake2s_merkle_benches(c: &mut Criterion) {
#[cfg(target_arch = "x86_64")]
if stwo_prover::platform::avx512_detected() {
use stwo_prover::core::backend::avx512::AVX512Backend;
bench_blake2s_merkle::<AVX512Backend>(c, "avx");
}
let cols = (0..N_COLS)
.map(|_| {
(0..(1 << LOG_SIZE))
.map(|_| BaseField::zero())
.collect::<Col<AVX512Backend, BaseField>>()
})
.collect::<Vec<_>>();

group.bench_function("avx merkle", |b| {
b.iter(|| {
AVX512Backend::commit_on_layer(LOG_SIZE, None, &cols.iter().collect_vec());
})
});
bench_blake2s_merkle::<SimdBackend>(c, "simd");
bench_blake2s_merkle::<CPUBackend>(c, "cpu");
}

#[cfg(target_arch = "x86_64")]
criterion::criterion_group!(
name=merkle;
criterion_group!(
name = benches;
config = Criterion::default().sample_size(10);
targets=cpu_merkle);
criterion::criterion_main!(merkle);
targets = blake2s_merkle_benches);
criterion_main!(benches);
3 changes: 2 additions & 1 deletion crates/prover/src/core/backend/simd/blake2s.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ fn rotate<const N: u32>(x: u32x16) -> u32x16 {
(x >> N) | (x << (u32::BITS - N))
}

#[inline]
// `inline(always)` can cause code parsing errors for wasm: "locals exceed maximum".
#[cfg_attr(not(target_arch = "wasm32"), inline(always))]
fn round(v: &mut [u32x16; 16], m: [u32x16; 16], r: usize) {
v[0] += m[SIGMA[r][0] as usize];
v[1] += m[SIGMA[r][2] as usize];
Expand Down

0 comments on commit aaa48c8

Please sign in to comment.