From 91d4ee1526ee0f167977c5daa620f487172f44f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 25 Aug 2024 16:38:31 +0200 Subject: [PATCH] Better markdown benchmarks (#849) * Display the size of the databases after the benchmarks * Show the best result in bold text in the markdown table * Display the table in markdown * Make clippy happy * Update the README to highly the best timings for each benchmark --- Cargo.toml | 2 + README.md | 28 ++++----- benches/lmdb_benchmark.rs | 127 ++++++++++++++++++++++++++++++++------ 3 files changed, 125 insertions(+), 32 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b37d946f..ebfa880a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,8 @@ redb1 = { version = "=1.0.0", package = "redb" } redb2 = { version = "=2.0.0", package = "redb" } serde = { version = "1.0", features = ["derive"] } bincode = "1.3.3" +walkdir = "2.5.0" +byte-unit = "=5.0.4" # Just benchmarking dependencies [target.'cfg(not(target_os = "wasi"))'.dev-dependencies] diff --git a/README.md b/README.md index b22ff95b..2cd3b245 100644 --- a/README.md +++ b/README.md @@ -53,20 +53,20 @@ To run all the tests and benchmarks a few extra dependencies are required: ## Benchmarks redb has similar performance to other top embedded key-value stores such as lmdb and rocksdb -| | redb | lmdb | rocksdb | sled | sanakirja | -|---------------------------|--------|--------|---------|--------|-----------| -| bulk load | 2792ms | 1115ms | 5610ms | 5005ms | 1161ms | -| individual writes | 462ms | 1119ms | 1097ms | 957ms | 662ms | -| batch writes | 2568ms | 2247ms | 1344ms | 1622ms | 2713ms | -| random reads | 988ms | 558ms | 3469ms | 1509ms | 678ms | -| random reads | 962ms | 556ms | 3377ms | 1425ms | 671ms | -| random range reads | 2534ms | 985ms | 6058ms | 4670ms | 1089ms | -| random range reads | 2493ms | 998ms | 5801ms | 4665ms | 1119ms | -| random reads (4 threads) | 344ms | 141ms | 1247ms | 424ms | 266ms | -| random reads (8 threads) | 192ms | 72ms | 673ms | 230ms | 620ms | -| random reads (16 threads) | 131ms | 47ms | 476ms | 148ms | 3500ms | -| random reads (32 threads) | 118ms | 44ms | 412ms | 129ms | 4313ms | -| removals | 2184ms | 784ms | 2451ms | 2047ms | 1344ms | +| | redb | lmdb | rocksdb | sled | sanakirja | +|---------------------------|------------|------------|-------------|--------|-----------| +| bulk load | 2792ms | **1115ms** | 5610ms | 5005ms | 1161ms | +| individual writes | **462ms** | 1119ms | 1097ms | 957ms | 662ms | +| batch writes | 2568ms | 2247ms | **1344ms** | 1622ms | 2713ms | +| random reads | 988ms | **558ms** | 3469ms | 1509ms | 678ms | +| random reads | 962ms | **556ms** | 3377ms | 1425ms | 671ms | +| random range reads | 2534ms | **985ms** | 6058ms | 4670ms | 1089ms | +| random range reads | 2493ms | **998ms** | 5801ms | 4665ms | 1119ms | +| random reads (4 threads) | 344ms | **141ms** | 1247ms | 424ms | 266ms | +| random reads (8 threads) | 192ms | **72ms** | 673ms | 230ms | 620ms | +| random reads (16 threads) | 131ms | **47ms** | 476ms | 148ms | 3500ms | +| random reads (32 threads) | 118ms | **44ms** | 412ms | 129ms | 4313ms | +| removals | 2184ms | **784ms** | 2451ms | 2047ms | 1344ms | Source code for benchmark [here](./benches/lmdb_benchmark.rs). Results collected on a Ryzen 5900X with Samsung 980 PRO NVMe. diff --git a/benches/lmdb_benchmark.rs b/benches/lmdb_benchmark.rs index 7ed58cc1..f2197f78 100644 --- a/benches/lmdb_benchmark.rs +++ b/benches/lmdb_benchmark.rs @@ -1,5 +1,6 @@ use std::env::current_dir; use std::mem::size_of; +use std::path::Path; use std::sync::Arc; use std::{fs, process, thread}; use tempfile::{NamedTempFile, TempDir}; @@ -70,7 +71,7 @@ fn make_rng_shards(shards: usize, elements: usize) -> Vec { rngs } -fn benchmark(db: T) -> Vec<(String, Duration)> { +fn benchmark(db: T) -> Vec<(String, ResultType)> { let mut rng = make_rng(); let mut results = Vec::new(); let db = Arc::new(db); @@ -95,7 +96,7 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { ELEMENTS, duration.as_millis() ); - results.push(("bulk load".to_string(), duration)); + results.push(("bulk load".to_string(), ResultType::Duration(duration))); let start = Instant::now(); let writes = 100; @@ -118,7 +119,10 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { writes, duration.as_millis() ); - results.push(("individual writes".to_string(), duration)); + results.push(( + "individual writes".to_string(), + ResultType::Duration(duration), + )); let start = Instant::now(); let batch_size = 1000; @@ -144,7 +148,7 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { batch_size, duration.as_millis() ); - results.push(("batch writes".to_string(), duration)); + results.push(("batch writes".to_string(), ResultType::Duration(duration))); let txn = db.read_transaction(); { @@ -155,7 +159,7 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { let end = Instant::now(); let duration = end - start; println!("{}: len() in {}ms", T::db_type_name(), duration.as_millis()); - results.push(("len()".to_string(), duration)); + results.push(("len()".to_string(), ResultType::Duration(duration))); } for _ in 0..ITERATIONS { @@ -179,7 +183,7 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { ELEMENTS, duration.as_millis() ); - results.push(("random reads".to_string(), duration)); + results.push(("random reads".to_string(), ResultType::Duration(duration))); } for _ in 0..ITERATIONS { @@ -208,7 +212,10 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { ELEMENTS * num_scan, duration.as_millis() ); - results.push(("random range reads".to_string(), duration)); + results.push(( + "random range reads".to_string(), + ResultType::Duration(duration), + )); } } drop(txn); @@ -246,7 +253,10 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { ELEMENTS, duration.as_millis() ); - results.push((format!("random reads ({num_threads} threads)"), duration)); + results.push(( + format!("random reads ({num_threads} threads)"), + ResultType::Duration(duration), + )); } let start = Instant::now(); @@ -271,11 +281,40 @@ fn benchmark(db: T) -> Vec<(String, Duration)> { deletes, duration.as_millis() ); - results.push(("removals".to_string(), duration)); + results.push(("removals".to_string(), ResultType::Duration(duration))); results } +fn database_size(path: &Path) -> u64 { + let mut size = 0u64; + for result in walkdir::WalkDir::new(path) { + let entry = result.unwrap(); + size += entry.metadata().unwrap().len(); + } + size +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +enum ResultType { + Duration(Duration), + SizeInBytes(u64), +} + +impl std::fmt::Display for ResultType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use byte_unit::{Byte, UnitType}; + + match self { + ResultType::Duration(d) => write!(f, "{d:.2?}"), + ResultType::SizeInBytes(s) => { + let b = Byte::from_u64(*s).get_appropriate_unit(UnitType::Binary); + write!(f, "{b:.2}") + } + } + } +} + fn main() { let tmpdir = current_dir().unwrap().join(".benchmark"); fs::create_dir(&tmpdir).unwrap(); @@ -294,7 +333,13 @@ fn main() { .create(tmpfile.path()) .unwrap(); let table = RedbBenchDatabase::new(&db); - benchmark(table) + let mut results = benchmark(table); + let size = database_size(tmpfile.path()); + results.push(( + "size after bench".to_string(), + ResultType::SizeInBytes(size), + )); + results }; let lmdb_results = { @@ -306,7 +351,13 @@ fn main() { .unwrap() }; let table = HeedBenchDatabase::new(&env); - benchmark(table) + let mut results = benchmark(table); + let size = database_size(tmpfile.path()); + results.push(( + "size after bench".to_string(), + ResultType::SizeInBytes(size), + )); + results }; let rocksdb_results = { @@ -321,14 +372,26 @@ fn main() { let db = rocksdb::TransactionDB::open(&opts, &Default::default(), tmpfile.path()).unwrap(); let table = RocksdbBenchDatabase::new(&db); - benchmark(table) + let mut results = benchmark(table); + let size = database_size(tmpfile.path()); + results.push(( + "size after bench".to_string(), + ResultType::SizeInBytes(size), + )); + results }; let sled_results = { let tmpfile: TempDir = tempfile::tempdir_in(&tmpdir).unwrap(); let db = sled::Config::new().path(tmpfile.path()).open().unwrap(); let table = SledBenchDatabase::new(&db, tmpfile.path()); - benchmark(table) + let mut results = benchmark(table); + let size = database_size(tmpfile.path()); + results.push(( + "size after bench".to_string(), + ResultType::SizeInBytes(size), + )); + results }; let sanakirja_results = { @@ -336,7 +399,13 @@ fn main() { fs::remove_file(tmpfile.path()).unwrap(); let db = sanakirja::Env::new(tmpfile.path(), 4096 * 1024 * 1024, 2).unwrap(); let table = SanakirjaBenchDatabase::new(&db); - benchmark(table) + let mut results = benchmark(table); + let size = database_size(tmpfile.path()); + results.push(( + "size after bench".to_string(), + ResultType::SizeInBytes(size), + )); + results }; fs::remove_dir_all(&tmpdir).unwrap(); @@ -347,19 +416,41 @@ fn main() { rows.push(vec![benchmark.to_string()]); } - for results in [ + let results = [ redb_latency_results, lmdb_results, rocksdb_results, sled_results, sanakirja_results, - ] { - for (i, (_benchmark, duration)) in results.iter().enumerate() { - rows[i].push(format!("{}ms", duration.as_millis())); + ]; + + let mut identified_smallests = vec![vec![false; results.len()]; rows.len()]; + for (i, identified_smallests_row) in identified_smallests.iter_mut().enumerate() { + let mut smallest = None; + for (j, _) in identified_smallests_row.iter().enumerate() { + let (_, rt) = &results[j][i]; + smallest = match smallest { + Some((_, prev)) if rt < prev => Some((j, rt)), + Some((pi, prev)) => Some((pi, prev)), + None => Some((j, rt)), + }; + } + let (j, _rt) = smallest.unwrap(); + identified_smallests_row[j] = true; + } + + for (j, results) in results.iter().enumerate() { + for (i, (_benchmark, result_type)) in results.iter().enumerate() { + rows[i].push(if identified_smallests[i][j] { + format!("**{result_type}**") + } else { + result_type.to_string() + }); } } let mut table = comfy_table::Table::new(); + table.load_preset(comfy_table::presets::ASCII_MARKDOWN); table.set_width(100); table.set_header(["", "redb", "lmdb", "rocksdb", "sled", "sanakirja"]); for row in rows {