From 96aeb52372543bf1cc5ab814e50210cb3fb1d898 Mon Sep 17 00:00:00 2001 From: Noel Kwan <47273164+kwannoel@users.noreply.github.com> Date: Fri, 18 Oct 2024 20:48:31 +0800 Subject: [PATCH] test: use tempfile for rw metadata storage in deterministic tests (#18970) --- ci/scripts/deterministic-e2e-test.sh | 16 ++--- ci/scripts/deterministic-it-test.sh | 2 +- ci/scripts/deterministic-recovery-test.sh | 16 ++--- ci/scripts/run-deterministic-fuzz-test.sh | 2 +- src/tests/simulation/src/cluster.rs | 71 +++-------------------- src/tests/simulation/src/main.rs | 7 --- 6 files changed, 23 insertions(+), 91 deletions(-) diff --git a/ci/scripts/deterministic-e2e-test.sh b/ci/scripts/deterministic-e2e-test.sh index b0965688003c..202e2dd1e2af 100755 --- a/ci/scripts/deterministic-e2e-test.sh +++ b/ci/scripts/deterministic-e2e-test.sh @@ -30,25 +30,25 @@ export LOGDIR=.risingwave/log mkdir -p $LOGDIR echo "--- deterministic simulation e2e, ci-3cn-2fe, ddl" -seq "$TEST_NUM" | parallel './risingwave_simulation ./e2e_test/ddl/\*\*/\*.slt 2> $LOGDIR/ddl-{}.log && rm $LOGDIR/ddl-{}.log' +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation ./e2e_test/ddl/\*\*/\*.slt 2> $LOGDIR/ddl-{}.log && rm $LOGDIR/ddl-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, streaming" -seq "$TEST_NUM" | parallel './risingwave_simulation ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/streaming-{}.log && rm $LOGDIR/streaming-{}.log' +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/streaming-{}.log && rm $LOGDIR/streaming-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, batch" -seq "$TEST_NUM" | parallel './risingwave_simulation ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/batch-{}.log && rm $LOGDIR/batch-{}.log' +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/batch-{}.log && rm $LOGDIR/batch-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, kafka source" -seq "$TEST_NUM" | parallel './risingwave_simulation --kafka-datadir=./e2e_test/source_legacy/basic/scripts/test_data ./e2e_test/source_legacy/basic/kafka\*.slt 2> $LOGDIR/source-{}.log && rm $LOGDIR/source-{}.log' +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation --kafka-datadir=./e2e_test/source_legacy/basic/scripts/test_data ./e2e_test/source_legacy/basic/kafka\*.slt 2> $LOGDIR/source-{}.log && rm $LOGDIR/source-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, parallel, streaming" -seq "$TEST_NUM" | parallel './risingwave_simulation -j 16 ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/parallel-streaming-{}.log && rm $LOGDIR/parallel-streaming-{}.log' +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation -j 16 ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/parallel-streaming-{}.log && rm $LOGDIR/parallel-streaming-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, parallel, batch" -seq "$TEST_NUM" | parallel './risingwave_simulation -j 16 ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/parallel-batch-{}.log && rm $LOGDIR/parallel-batch-{}.log' +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation -j 16 ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/parallel-batch-{}.log && rm $LOGDIR/parallel-batch-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, fuzzing (pre-generated-queries)" -timeout 10m seq 64 | parallel RUST_MIN_STACK=4194304 './risingwave_simulation --run-sqlsmith-queries ./src/tests/sqlsmith/tests/sqlsmith-query-snapshots/{} 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log' +timeout 10m seq 64 | parallel RUST_MIN_STACK=4194304 'MADSIM_TEST_SEED={} ./risingwave_simulation --run-sqlsmith-queries ./src/tests/sqlsmith/tests/sqlsmith-query-snapshots/{} 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe, e2e extended mode test" -seq "$TEST_NUM" | parallel 'RUST_LOG=info ./risingwave_simulation -e 2> $LOGDIR/extended-{}.log && rm $LOGDIR/extended-{}.log' +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} RUST_LOG=info ./risingwave_simulation -e 2> $LOGDIR/extended-{}.log && rm $LOGDIR/extended-{}.log' diff --git a/ci/scripts/deterministic-it-test.sh b/ci/scripts/deterministic-it-test.sh index 1190f8b0178f..40288f5848b1 100755 --- a/ci/scripts/deterministic-it-test.sh +++ b/ci/scripts/deterministic-it-test.sh @@ -19,7 +19,7 @@ mv target/ci-sim target/sim TEST_PATTERN="$@" echo "--- Run integration tests in deterministic simulation mode" -seq "$TEST_NUM" | parallel "NEXTEST_PROFILE=ci-sim \ +seq "$TEST_NUM" | parallel "MADSIM_TEST_SEED={} NEXTEST_PROFILE=ci-sim \ cargo nextest run \ --no-fail-fast \ --cargo-metadata target/nextest/cargo-metadata.json \ diff --git a/ci/scripts/deterministic-recovery-test.sh b/ci/scripts/deterministic-recovery-test.sh index bf1f28861c24..82641b66afc5 100755 --- a/ci/scripts/deterministic-recovery-test.sh +++ b/ci/scripts/deterministic-recovery-test.sh @@ -46,11 +46,7 @@ filter_stack_trace_for_all_logs() { # trap filter_stack_trace_for_all_logs ERR -# NOTE(kwannoel): We must use `export` here, because the variables are not substituted -# directly via bash subtitution. Instead, the `parallel` command substitutes the variables -# from the environment. If they are declared without `export`, `parallel` can't read them from the env. -export EXTRA_ARGS="--sqlite-data-dir=." - +export EXTRA_ARGS="${EXTRA_ARGS:-}" if [[ -n "${USE_ARRANGEMENT_BACKFILL:-}" ]]; then export EXTRA_ARGS="$EXTRA_ARGS --use-arrangement-backfill" fi @@ -58,7 +54,7 @@ fi echo "--- EXTRA_ARGS: ${EXTRA_ARGS}" echo "--- deterministic simulation e2e, ci-3cn-2fe-1meta, recovery, background_ddl" -seq "$TEST_NUM" | parallel './risingwave_simulation \ +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation \ --kill \ --kill-rate=${KILL_RATE} \ ${EXTRA_ARGS:-} \ @@ -66,7 +62,7 @@ ${EXTRA_ARGS:-} \ 2> $LOGDIR/recovery-background-ddl-{}.log && rm $LOGDIR/recovery-background-ddl-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe-1meta, recovery, ddl" -seq "$TEST_NUM" | parallel './risingwave_simulation \ +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation \ --kill \ --kill-rate=${KILL_RATE} \ --background-ddl-rate=${BACKGROUND_DDL_RATE} \ @@ -74,7 +70,7 @@ ${EXTRA_ARGS:-} \ ./e2e_test/ddl/\*\*/\*.slt 2> $LOGDIR/recovery-ddl-{}.log && rm $LOGDIR/recovery-ddl-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe-1meta, recovery, streaming" -seq "$TEST_NUM" | parallel './risingwave_simulation \ +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation \ --kill \ --kill-rate=${KILL_RATE} \ --background-ddl-rate=${BACKGROUND_DDL_RATE} \ @@ -82,7 +78,7 @@ ${EXTRA_ARGS:-} \ ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/recovery-streaming-{}.log && rm $LOGDIR/recovery-streaming-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe-1meta, recovery, batch" -seq "$TEST_NUM" | parallel './risingwave_simulation \ +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation \ --kill \ --kill-rate=${KILL_RATE} \ --background-ddl-rate=${BACKGROUND_DDL_RATE} \ @@ -90,7 +86,7 @@ ${EXTRA_ARGS:-} \ ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/recovery-batch-{}.log && rm $LOGDIR/recovery-batch-{}.log' echo "--- deterministic simulation e2e, ci-3cn-2fe-1meta, recovery, kafka source,sink" -seq "$TEST_NUM" | parallel './risingwave_simulation \ +seq "$TEST_NUM" | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation \ --kill \ --kill-rate=${KILL_RATE} \ --kafka-datadir=./e2e_test/source_legacy/basic/scripts/test_data \ diff --git a/ci/scripts/run-deterministic-fuzz-test.sh b/ci/scripts/run-deterministic-fuzz-test.sh index 85965446b5fc..720fef67b945 100755 --- a/ci/scripts/run-deterministic-fuzz-test.sh +++ b/ci/scripts/run-deterministic-fuzz-test.sh @@ -30,4 +30,4 @@ download-and-decompress-artifact risingwave_simulation . chmod +x ./risingwave_simulation echo "--- deterministic simulation e2e, ci-3cn-2fe, fuzzing (seed)" -seq 32 | parallel './risingwave_simulation --sqlsmith 100 ./src/tests/sqlsmith/tests/testdata 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log' +seq 32 | parallel 'MADSIM_TEST_SEED={} ./risingwave_simulation --sqlsmith 100 ./src/tests/sqlsmith/tests/testdata 2> $LOGDIR/fuzzing-{}.log && rm $LOGDIR/fuzzing-{}.log' diff --git a/src/tests/simulation/src/cluster.rs b/src/tests/simulation/src/cluster.rs index 3b41c82afab0..a6369b6f8420 100644 --- a/src/tests/simulation/src/cluster.rs +++ b/src/tests/simulation/src/cluster.rs @@ -37,6 +37,7 @@ use risingwave_common::util::tokio_util::sync::CancellationToken; use risingwave_object_store::object::sim::SimServer as ObjectStoreSimServer; use risingwave_pb::common::WorkerNode; use sqllogictest::AsyncDB; +use tempfile::NamedTempFile; #[cfg(not(madsim))] use tokio::runtime::Handle; use uuid::Uuid; @@ -88,9 +89,6 @@ pub struct Configuration { /// Queries to run per session. pub per_session_queries: Arc>, - - /// dir to store SQL backend sqlite db - pub sqlite_data_dir: Option, } impl Default for Configuration { @@ -118,7 +116,6 @@ metrics_level = "Disabled" compactor_nodes: 1, compute_node_cores: 1, per_session_queries: vec![].into(), - sqlite_data_dir: None, } } } @@ -206,7 +203,6 @@ metrics_level = "Disabled" "create view if not exists mview_parallelism as select m.name, tf.parallelism from rw_materialized_views m, rw_table_fragments tf where m.id = tf.table_id;".into(), ] .into(), - ..Default::default() } } @@ -253,7 +249,6 @@ metrics_level = "Disabled" compute_node_cores: 1, per_session_queries: vec!["SET STREAMING_USE_ARRANGEMENT_BACKFILL = true;".into()] .into(), - ..Default::default() } } @@ -300,7 +295,6 @@ metrics_level = "Disabled" compactor_nodes: 1, compute_node_cores: 1, per_session_queries: vec![].into(), - ..Default::default() } } } @@ -327,6 +321,8 @@ pub struct Cluster { pub(crate) client: NodeHandle, #[cfg(madsim)] pub(crate) ctl: NodeHandle, + #[cfg(madsim)] + pub(crate) sqlite_file_handle: NamedTempFile, } impl Cluster { @@ -399,39 +395,12 @@ impl Cluster { } std::env::set_var("RW_META_ADDR", meta_addrs.join(",")); - // FIXME: some tests like integration tests will run concurrently, - // resulting in connecting to the same sqlite file if they're using the same seed. - let file_path = if let Some(sqlite_data_dir) = conf.sqlite_data_dir.as_ref() { - format!( - "{}/stest-{}-{}.sqlite", - sqlite_data_dir.display(), - handle.seed(), - Uuid::new_v4() - ) - } else { - format!("./stest-{}-{}.sqlite", handle.seed(), Uuid::new_v4()) - }; - if std::fs::exists(&file_path).unwrap() { - panic!( - "sqlite file already exists and used by other cluster: {}", - file_path - ) - } + let sqlite_file_handle: NamedTempFile = NamedTempFile::new().unwrap(); + let file_path = sqlite_file_handle.path().display().to_string(); + tracing::info!(?file_path, "sqlite_file_path"); let sql_endpoint = format!("sqlite://{}?mode=rwc", file_path); let backend_args = vec!["--backend", "sql", "--sql-endpoint", &sql_endpoint]; - // FIXME(kwannoel): - // Currently we just use the on-disk version, - // but it can lead to randomness due to disk io. - // We can use shared in-memory db instead. - // However sqlite cannot be started inside meta. - // Because if cluster stops, then this db will be dropped. - // We must instantiate it outside, not just pass the path in. - // let sqlite_path = format!( - // "sqlite::file:memdb{}?mode=memory&cache=shared", - // Uuid::new_v4() - // ); - // meta node for i in 1..=conf.meta_nodes { let args = [ @@ -570,6 +539,7 @@ impl Cluster { handle, client, ctl, + sqlite_file_handle, }) } @@ -888,33 +858,6 @@ impl Cluster { } } -#[cfg_or_panic(madsim)] -impl Drop for Cluster { - fn drop(&mut self) { - // FIXME: remove it when deprecate the on-disk version. - let default_path = PathBuf::from("."); - let sqlite_data_dir = self - .config - .sqlite_data_dir - .as_ref() - .unwrap_or_else(|| &default_path); - for entry in std::fs::read_dir(sqlite_data_dir).unwrap() { - let entry = entry.unwrap(); - let path = entry.path(); - if path - .file_name() - .unwrap() - .to_str() - .unwrap() - .starts_with(&format!("stest-{}-", self.handle.seed())) - { - std::fs::remove_file(path).unwrap(); - break; - } - } - } -} - type SessionRequest = ( String, // query sql oneshot::Sender>, // channel to send result back diff --git a/src/tests/simulation/src/main.rs b/src/tests/simulation/src/main.rs index 6ef89afc5eb9..4d3122c486d9 100644 --- a/src/tests/simulation/src/main.rs +++ b/src/tests/simulation/src/main.rs @@ -14,8 +14,6 @@ #![cfg_attr(not(madsim), allow(dead_code))] -use std::path::PathBuf; - use cfg_or_panic::cfg_or_panic; use clap::Parser; @@ -124,10 +122,6 @@ pub struct Args { #[clap(long)] run_differential_tests: bool, - /// dir to store sqlite backend data of meta node - #[clap(long)] - sqlite_data_dir: Option, - #[arg(short, long)] e2e_extended_test: bool, @@ -166,7 +160,6 @@ async fn main() { compactor_nodes: args.compactor_nodes, compute_node_cores: args.compute_node_cores, meta_nodes: args.meta_nodes, - sqlite_data_dir: args.sqlite_data_dir, per_session_queries: if args.use_arrangement_backfill { vec!["SET STREAMING_USE_ARRANGEMENT_BACKFILL = true;".to_string()].into() } else {