From 91d4ee1526ee0f167977c5daa620f487172f44f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <renault.cle@gmail.com>
Date: Sun, 25 Aug 2024 16:38:31 +0200
Subject: [PATCH] Better markdown benchmarks (#849)

* Display the size of the databases after the benchmarks

* Show the best result in bold text in the markdown table

* Display the table in markdown

* Make clippy happy

* Update the README to highly the best timings for each benchmark
---
 Cargo.toml                |   2 +
 README.md                 |  28 ++++-----
 benches/lmdb_benchmark.rs | 127 ++++++++++++++++++++++++++++++++------
 3 files changed, 125 insertions(+), 32 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index b37d946f..ebfa880a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,6 +34,8 @@ redb1 = { version = "=1.0.0", package = "redb" }
 redb2 = { version = "=2.0.0", package = "redb" }
 serde = { version = "1.0", features = ["derive"] }
 bincode = "1.3.3"
+walkdir = "2.5.0"
+byte-unit = "=5.0.4"
 
 # Just benchmarking dependencies
 [target.'cfg(not(target_os = "wasi"))'.dev-dependencies]
diff --git a/README.md b/README.md
index b22ff95b..2cd3b245 100644
--- a/README.md
+++ b/README.md
@@ -53,20 +53,20 @@ To run all the tests and benchmarks a few extra dependencies are required:
 ## Benchmarks
 redb has similar performance to other top embedded key-value stores such as lmdb and rocksdb
 
-|                           | redb   | lmdb   | rocksdb | sled   | sanakirja |
-|---------------------------|--------|--------|---------|--------|-----------|
-| bulk load                 | 2792ms | 1115ms | 5610ms  | 5005ms | 1161ms    |
-| individual writes         | 462ms  | 1119ms | 1097ms  | 957ms  | 662ms     |
-| batch writes              | 2568ms | 2247ms | 1344ms  | 1622ms | 2713ms    |
-| random reads              | 988ms  | 558ms  | 3469ms  | 1509ms | 678ms     |
-| random reads              | 962ms  | 556ms  | 3377ms  | 1425ms | 671ms     |
-| random range reads        | 2534ms | 985ms  | 6058ms  | 4670ms | 1089ms    |
-| random range reads        | 2493ms | 998ms  | 5801ms  | 4665ms | 1119ms    |
-| random reads (4 threads)  | 344ms  | 141ms  | 1247ms  | 424ms  | 266ms     |
-| random reads (8 threads)  | 192ms  | 72ms   | 673ms   | 230ms  | 620ms     |
-| random reads (16 threads) | 131ms  | 47ms   | 476ms   | 148ms  | 3500ms    |
-| random reads (32 threads) | 118ms  | 44ms   | 412ms   | 129ms  | 4313ms    |
-| removals                  | 2184ms | 784ms  | 2451ms  | 2047ms | 1344ms    |
+|                           |    redb    |    lmdb    |    rocksdb  |  sled  | sanakirja |
+|---------------------------|------------|------------|-------------|--------|-----------|
+| bulk load                 |   2792ms   | **1115ms** |   5610ms    | 5005ms | 1161ms |
+| individual writes         | **462ms**  |   1119ms   |   1097ms    | 957ms  | 662ms  |
+| batch writes              |   2568ms   |   2247ms   | **1344ms**  | 1622ms | 2713ms |
+| random reads              |   988ms    | **558ms**  |   3469ms    | 1509ms | 678ms  |
+| random reads              |   962ms    | **556ms**  |   3377ms    | 1425ms | 671ms  |
+| random range reads        |   2534ms   | **985ms**  |   6058ms    | 4670ms | 1089ms |
+| random range reads        |   2493ms   | **998ms**  |   5801ms    | 4665ms | 1119ms |
+| random reads (4 threads)  |   344ms    | **141ms**  |   1247ms    | 424ms  | 266ms  |
+| random reads (8 threads)  |   192ms    | **72ms**   |   673ms     | 230ms  | 620ms  |
+| random reads (16 threads) |   131ms    | **47ms**   |   476ms     | 148ms  | 3500ms |
+| random reads (32 threads) |   118ms    | **44ms**   |   412ms     | 129ms  | 4313ms |
+| removals                  |   2184ms   | **784ms**  |   2451ms    | 2047ms | 1344ms |
 
 Source code for benchmark [here](./benches/lmdb_benchmark.rs). Results collected on a Ryzen 5900X with Samsung 980 PRO NVMe.
 
diff --git a/benches/lmdb_benchmark.rs b/benches/lmdb_benchmark.rs
index 7ed58cc1..f2197f78 100644
--- a/benches/lmdb_benchmark.rs
+++ b/benches/lmdb_benchmark.rs
@@ -1,5 +1,6 @@
 use std::env::current_dir;
 use std::mem::size_of;
+use std::path::Path;
 use std::sync::Arc;
 use std::{fs, process, thread};
 use tempfile::{NamedTempFile, TempDir};
@@ -70,7 +71,7 @@ fn make_rng_shards(shards: usize, elements: usize) -> Vec<fastrand::Rng> {
     rngs
 }
 
-fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
+fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, ResultType)> {
     let mut rng = make_rng();
     let mut results = Vec::new();
     let db = Arc::new(db);
@@ -95,7 +96,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
         ELEMENTS,
         duration.as_millis()
     );
-    results.push(("bulk load".to_string(), duration));
+    results.push(("bulk load".to_string(), ResultType::Duration(duration)));
 
     let start = Instant::now();
     let writes = 100;
@@ -118,7 +119,10 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
         writes,
         duration.as_millis()
     );
-    results.push(("individual writes".to_string(), duration));
+    results.push((
+        "individual writes".to_string(),
+        ResultType::Duration(duration),
+    ));
 
     let start = Instant::now();
     let batch_size = 1000;
@@ -144,7 +148,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
         batch_size,
         duration.as_millis()
     );
-    results.push(("batch writes".to_string(), duration));
+    results.push(("batch writes".to_string(), ResultType::Duration(duration)));
 
     let txn = db.read_transaction();
     {
@@ -155,7 +159,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
             let end = Instant::now();
             let duration = end - start;
             println!("{}: len() in {}ms", T::db_type_name(), duration.as_millis());
-            results.push(("len()".to_string(), duration));
+            results.push(("len()".to_string(), ResultType::Duration(duration)));
         }
 
         for _ in 0..ITERATIONS {
@@ -179,7 +183,7 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
                 ELEMENTS,
                 duration.as_millis()
             );
-            results.push(("random reads".to_string(), duration));
+            results.push(("random reads".to_string(), ResultType::Duration(duration)));
         }
 
         for _ in 0..ITERATIONS {
@@ -208,7 +212,10 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
                 ELEMENTS * num_scan,
                 duration.as_millis()
             );
-            results.push(("random range reads".to_string(), duration));
+            results.push((
+                "random range reads".to_string(),
+                ResultType::Duration(duration),
+            ));
         }
     }
     drop(txn);
@@ -246,7 +253,10 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
             ELEMENTS,
             duration.as_millis()
         );
-        results.push((format!("random reads ({num_threads} threads)"), duration));
+        results.push((
+            format!("random reads ({num_threads} threads)"),
+            ResultType::Duration(duration),
+        ));
     }
 
     let start = Instant::now();
@@ -271,11 +281,40 @@ fn benchmark<T: BenchDatabase + Send + Sync>(db: T) -> Vec<(String, Duration)> {
         deletes,
         duration.as_millis()
     );
-    results.push(("removals".to_string(), duration));
+    results.push(("removals".to_string(), ResultType::Duration(duration)));
 
     results
 }
 
+fn database_size(path: &Path) -> u64 {
+    let mut size = 0u64;
+    for result in walkdir::WalkDir::new(path) {
+        let entry = result.unwrap();
+        size += entry.metadata().unwrap().len();
+    }
+    size
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
+enum ResultType {
+    Duration(Duration),
+    SizeInBytes(u64),
+}
+
+impl std::fmt::Display for ResultType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use byte_unit::{Byte, UnitType};
+
+        match self {
+            ResultType::Duration(d) => write!(f, "{d:.2?}"),
+            ResultType::SizeInBytes(s) => {
+                let b = Byte::from_u64(*s).get_appropriate_unit(UnitType::Binary);
+                write!(f, "{b:.2}")
+            }
+        }
+    }
+}
+
 fn main() {
     let tmpdir = current_dir().unwrap().join(".benchmark");
     fs::create_dir(&tmpdir).unwrap();
@@ -294,7 +333,13 @@ fn main() {
             .create(tmpfile.path())
             .unwrap();
         let table = RedbBenchDatabase::new(&db);
-        benchmark(table)
+        let mut results = benchmark(table);
+        let size = database_size(tmpfile.path());
+        results.push((
+            "size after bench".to_string(),
+            ResultType::SizeInBytes(size),
+        ));
+        results
     };
 
     let lmdb_results = {
@@ -306,7 +351,13 @@ fn main() {
                 .unwrap()
         };
         let table = HeedBenchDatabase::new(&env);
-        benchmark(table)
+        let mut results = benchmark(table);
+        let size = database_size(tmpfile.path());
+        results.push((
+            "size after bench".to_string(),
+            ResultType::SizeInBytes(size),
+        ));
+        results
     };
 
     let rocksdb_results = {
@@ -321,14 +372,26 @@ fn main() {
 
         let db = rocksdb::TransactionDB::open(&opts, &Default::default(), tmpfile.path()).unwrap();
         let table = RocksdbBenchDatabase::new(&db);
-        benchmark(table)
+        let mut results = benchmark(table);
+        let size = database_size(tmpfile.path());
+        results.push((
+            "size after bench".to_string(),
+            ResultType::SizeInBytes(size),
+        ));
+        results
     };
 
     let sled_results = {
         let tmpfile: TempDir = tempfile::tempdir_in(&tmpdir).unwrap();
         let db = sled::Config::new().path(tmpfile.path()).open().unwrap();
         let table = SledBenchDatabase::new(&db, tmpfile.path());
-        benchmark(table)
+        let mut results = benchmark(table);
+        let size = database_size(tmpfile.path());
+        results.push((
+            "size after bench".to_string(),
+            ResultType::SizeInBytes(size),
+        ));
+        results
     };
 
     let sanakirja_results = {
@@ -336,7 +399,13 @@ fn main() {
         fs::remove_file(tmpfile.path()).unwrap();
         let db = sanakirja::Env::new(tmpfile.path(), 4096 * 1024 * 1024, 2).unwrap();
         let table = SanakirjaBenchDatabase::new(&db);
-        benchmark(table)
+        let mut results = benchmark(table);
+        let size = database_size(tmpfile.path());
+        results.push((
+            "size after bench".to_string(),
+            ResultType::SizeInBytes(size),
+        ));
+        results
     };
 
     fs::remove_dir_all(&tmpdir).unwrap();
@@ -347,19 +416,41 @@ fn main() {
         rows.push(vec![benchmark.to_string()]);
     }
 
-    for results in [
+    let results = [
         redb_latency_results,
         lmdb_results,
         rocksdb_results,
         sled_results,
         sanakirja_results,
-    ] {
-        for (i, (_benchmark, duration)) in results.iter().enumerate() {
-            rows[i].push(format!("{}ms", duration.as_millis()));
+    ];
+
+    let mut identified_smallests = vec![vec![false; results.len()]; rows.len()];
+    for (i, identified_smallests_row) in identified_smallests.iter_mut().enumerate() {
+        let mut smallest = None;
+        for (j, _) in identified_smallests_row.iter().enumerate() {
+            let (_, rt) = &results[j][i];
+            smallest = match smallest {
+                Some((_, prev)) if rt < prev => Some((j, rt)),
+                Some((pi, prev)) => Some((pi, prev)),
+                None => Some((j, rt)),
+            };
+        }
+        let (j, _rt) = smallest.unwrap();
+        identified_smallests_row[j] = true;
+    }
+
+    for (j, results) in results.iter().enumerate() {
+        for (i, (_benchmark, result_type)) in results.iter().enumerate() {
+            rows[i].push(if identified_smallests[i][j] {
+                format!("**{result_type}**")
+            } else {
+                result_type.to_string()
+            });
         }
     }
 
     let mut table = comfy_table::Table::new();
+    table.load_preset(comfy_table::presets::ASCII_MARKDOWN);
     table.set_width(100);
     table.set_header(["", "redb", "lmdb", "rocksdb", "sled", "sanakirja"]);
     for row in rows {