address comments

MystenLabs · Jan 15, 2025 · 489ed78 · 489ed78
1 parent 0582f1f
commit 489ed78
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 28 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/sui-rpc-benchmark/Cargo.toml b/crates/sui-rpc-benchmark/Cargo.toml
@@ -10,13 +10,15 @@ edition = "2021"
 anyhow.workspace = true
 clap = { workspace = true, features = ["derive"] }
 dashmap.workspace = true
+futures.workspace = true
+rand.workspace = true
+sui-indexer-alt-framework.workspace = true
 telemetry-subscribers.workspace = true
 tracing.workspace = true
 tokio = { workspace = true, features = ["full"] }
 tokio-postgres = "0.7.12"
 bb8 = "0.9.0"
 bb8-postgres = "0.9.0"
-rand = "0.8.5"
 
 [[bin]]
 name = "sui-rpc-benchmark"

diff --git a/crates/sui-rpc-benchmark/src/direct/metrics.rs b/crates/sui-rpc-benchmark/src/direct/metrics.rs
@@ -1,6 +1,9 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+/// This module defines data structures and functions for collecting
+/// and summarizing performance metrics from benchmark queries. It
+/// supports tracking overall and per-table query latencies, error counts, total queries,
 use dashmap::DashMap;
 use std::sync::Arc;
 use std::time::Duration;

diff --git a/crates/sui-rpc-benchmark/src/direct/query_executor.rs b/crates/sui-rpc-benchmark/src/direct/query_executor.rs
@@ -7,13 +7,31 @@ use anyhow::Result;
 use bb8::Pool;
 use bb8_postgres::PostgresConnectionManager;
 use rand::seq::SliceRandom;
+use sui_indexer_alt_framework::task::TrySpawnStreamExt;
 use tokio_postgres::{types::ToSql, types::Type, NoTls, Row};
 use tracing::info;
 
 use crate::direct::benchmark_config::BenchmarkConfig;
 use crate::direct::metrics::{BenchmarkResult, MetricsCollector};
 use crate::direct::query_generator::BenchmarkQuery;
 
+/// This module contains the QueryExecutor, which coordinates benchmark queries
+/// against the database. It can “enrich” each BenchmarkQuery by sampling real
+/// data from the relevant table. Each query’s execution is timed and recorded
+/// via MetricsCollector, which is defined in the metrics module.
+pub struct QueryExecutor {
+    pool: Pool<PostgresConnectionManager<NoTls>>,
+    queries: Vec<BenchmarkQuery>,
+    enriched_queries: Vec<EnrichedBenchmarkQuery>,
+    config: BenchmarkConfig,
+    metrics: MetricsCollector,
+}
+
+/// Represents strongly typed SQL values used in parametric queries.
+/// Storing them as an enum allows us to handle different column types
+/// transparently when performing random queries from the database.
+/// This approach lets us build parameter lists matching each column's
+/// actual type at runtime, ensuring correct and safe query execution.
 #[derive(Clone, Debug)]
 pub enum SqlValue {
     Text(Option<String>),
@@ -32,14 +50,6 @@ pub struct EnrichedBenchmarkQuery {
     pub types: Vec<Type>,
 }
 
-pub struct QueryExecutor {
-    pool: Pool<PostgresConnectionManager<NoTls>>,
-    queries: Vec<BenchmarkQuery>,
-    enriched_queries: Vec<EnrichedBenchmarkQuery>,
-    config: BenchmarkConfig,
-    metrics: MetricsCollector,
-}
-
 impl QueryExecutor {
     pub async fn new(
         db_url: &str,
@@ -73,6 +83,10 @@ impl QueryExecutor {
             .collect()
     }
 
+    /// "Enriching" a query involves discovering valid column values for
+    /// placeholders. By sampling data from the table, we can produce
+    /// realistic sets of parameters, rather than random or empty
+    /// placeholders, leading to more accurate benchmark results.
     async fn enrich_query(&self, query: &BenchmarkQuery) -> Result<EnrichedBenchmarkQuery> {
         let client = self.pool.get().await?;
         let sql = format!(
@@ -176,26 +190,24 @@ impl QueryExecutor {
 
         let start = Instant::now();
         let deadline = start + self.config.duration;
-
-        let queries_per_worker = self.enriched_queries.chunks(
-            (self.enriched_queries.len() + self.config.concurrency - 1) / self.config.concurrency,
+        let (concurrency, metrics, pool, queries) = (
+            self.config.concurrency,
+            self.metrics.clone(),
+            self.pool.clone(),
+            self.enriched_queries.clone(),
         );
-
-        let mut handles = Vec::new();
-        for worker_queries in queries_per_worker {
-            let pool = self.pool.clone();
-            let worker_queries = worker_queries.to_vec();
-            let metrics = self.metrics.clone();
-
-            let handle = tokio::spawn(async move {
-                QueryExecutor::worker_task(pool, worker_queries, metrics, deadline).await
-            });
-            handles.push(handle);
-        }
-
-        for handle in handles {
-            handle.await??;
-        }
+        futures::stream::iter(
+            queries
+                .into_iter()
+                .map(move |query| (pool.clone(), vec![query], metrics.clone(), deadline)),
+        )
+        .try_for_each_spawned(
+            concurrency,
+            |(pool, queries, metrics, deadline)| async move {
+                QueryExecutor::worker_task(pool, queries, metrics, deadline).await
+            },
+        )
+        .await?;
 
         Ok(self.metrics.generate_report())
     }

diff --git a/crates/sui-rpc-benchmark/src/direct/query_generator.rs b/crates/sui-rpc-benchmark/src/direct/query_generator.rs
@@ -1,6 +1,15 @@
 // Copyright (c) Mysten Labs, Inc.
 // SPDX-License-Identifier: Apache-2.0
 
+/// This module generates SQL queries for benchmarking, including
+/// queries based on primary key columns and indexed columns.
+///
+/// The primary key queries ("pk queries") select a row by each PK,
+/// while the "index queries" filter by indexed columns. Instead
+/// of returning just a list of tables and indexes, this module
+/// returns a vector of BenchmarkQuery objects, each of which is
+/// ready to be executed. This approach streamlines the pipeline
+/// so we can directly run these queries as part of the benchmark.
 use tokio_postgres::NoTls;
 use tracing::info;