Skip to content

Commit

Permalink
Try different approach for black_box in benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
ogxd committed Nov 9, 2024
1 parent 0ad6fb3 commit 39614ad
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 26 deletions.
30 changes: 14 additions & 16 deletions benches/throughput/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@ mod result_processor;

use result_processor::*;

use std::hint::black_box;
use std::hash::Hasher;
use std::time::{Instant, Duration};
use std::alloc::{alloc, dealloc, Layout};
use std::slice;

// black_box from std::hint is not as good as preventing bias
use criterion::black_box;

use rand::Rng;

use gxhash::*;
Expand Down Expand Up @@ -46,12 +44,12 @@ fn main() {
});

// XxHash (twox-hash)
benchmark(processor.as_mut(), slice, "XxHash (XXH3)", |data: &[u8], seed: u64| -> u64 {
twox_hash::xxh3::hash64_with_seed(data, seed)
});
// benchmark(processor.as_mut(), slice, "XxHash (XXH3)", |data: &[u8], seed: u64| -> u64 {
// twox_hash::xxh3::hash64_with_seed(data, seed)
// });

// AHash
let ahash_hasher = ahash::RandomState::with_seeds(0, 0, 0, 0);
let ahash_hasher = ahash::RandomState::with_seed(black_box(42));
benchmark(processor.as_mut(), slice, "AHash", |data: &[u8], _: i32| -> u64 {
ahash_hasher.hash_one(data)
});
Expand Down Expand Up @@ -103,7 +101,7 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
}

// Warmup
black_box(time(ITERATIONS, &|| delegate(black_box(&data[..len]), black_box(S::default()))));
//time(ITERATIONS, &|| delegate(black_box(&data[..len]), black_box(S::default())));

let mut durations_s = vec![];
let now = Instant::now();
Expand All @@ -119,7 +117,7 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
let slice = &data[start..end];
// Execute method for a new iterations
let seed_copy = seed.clone();
let duration = time(ITERATIONS, &|| black_box(delegate(black_box(slice), black_box(seed_copy))));
let duration = time(ITERATIONS, &delegate, slice, seed_copy);
durations_s.push(duration.as_secs_f64());
}
let average_duration_s = calculate_average_without_outliers(&mut durations_s);
Expand All @@ -131,18 +129,18 @@ fn benchmark<F, S>(processor: &mut dyn ResultProcessor, data: &[u8], name: &str,
}

#[inline(never)]
fn time<F>(iterations: u32, delegate: &F) -> Duration
where F: Fn() -> u64
fn time<F, S>(iterations: u32, delegate: F, slice: &[u8], seed: S) -> Duration
where F: Fn(&[u8], S) -> u64, S: Default + TryFrom<u128> + TryInto<usize> + Clone + Copy
{
let now = Instant::now();
// Bench the same way to what is done in criterion.rs
// https://github.com/bheisler/criterion.rs/blob/e1a8c9ab2104fbf2d15f700d0038b2675054a2c8/src/bencher.rs#L87
for _ in 0..iterations {
if FORCE_NO_INLINING {
black_box(execute_noinlining(delegate));
} else {
black_box(delegate());
}
//if FORCE_NO_INLINING {
// black_box(execute_noinlining(delegate));
//} else {
black_box(delegate(black_box(slice), black_box(seed)));
//}
}
now.elapsed()
}
Expand Down
20 changes: 10 additions & 10 deletions src/gxhash/platform/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,18 @@ pub unsafe fn get_partial_safe(data: *const State, len: usize) -> State {
#[inline(always)]
pub unsafe fn get_partial_unsafe_no_ub(data: *const State, len: usize) -> State {
// Using inline assembly to load out-of-bounds
// use std::arch::asm;
// let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
// let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
// let mut result: State;
// asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) result, options(pure, nomem, nostack));
// let partial_vector = _mm_and_si128(result, mask);
// _mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))

// Using simd_masked_load
use std::arch::asm;
let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))
let mut result: State;
asm!("movdqu [{}], {}", in(reg) data, out(xmm_reg) result, options(pure, nomem, nostack));
let partial_vector = _mm_and_si128(result, mask);
_mm_add_epi8(partial_vector, _mm_set1_epi8(len as i8))

// Using simd_masked_load
// let indices = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
// let mask = _mm_cmpgt_epi8(_mm_set1_epi8(len as i8), indices);
// State::from(std::intrinsics::simd::simd_masked_load(core::simd::i8x16::from(mask), data as *const i8, core::simd::i8x16::from(_mm_set1_epi8(len as i8))))

// Using std::simd
// use std::simd::*;
Expand Down

0 comments on commit 39614ad

Please sign in to comment.