diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 1e4b655cb9d..c48288fc082 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -83,6 +83,23 @@ ptime -m cargo build -Z unstable-options --timings=json --workspace --tests --lo banner test ptime -m timeout 2h cargo nextest run --profile ci --locked --verbose +# +# If upgraded CockroachDB clusters are running on a different cluster version +# than newly-initialized ones, run database-related tests from the older cluster +# version. +# +if cmp tools/cockroachdb_version tools/prev_cockroachdb_version; then + # files are the same + true +else + # if return code is > 1, cmp failed + [[ $? -gt 1 ]] && exit 2 + # otherwise, files differ + cargo xtask download cockroach-prev + CRDB_SEED_USE_PREV=yes ptime -m timeout 1h cargo nextest run --profile ci --locked --verbose \ + --filter-expr 'rdeps(nexus-test-utils) - package(omicron-dev)' +fi + # # https://github.com/nextest-rs/nextest/issues/16 # diff --git a/Cargo.lock b/Cargo.lock index d79933cd87f..8a923486a9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5618,6 +5618,7 @@ dependencies = [ "gateway-client", "gateway-messages", "gateway-test-utils", + "http 0.2.12", "humantime", "indicatif", "internal-dns", diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 9cdf03093c8..656ad74530c 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -58,13 +58,14 @@ multimap.workspace = true indicatif.workspace = true [dev-dependencies] +camino-tempfile.workspace = true expectorate.workspace = true +http.workspace = true nexus-test-utils.workspace = true nexus-test-utils-macros.workspace = true omicron-nexus.workspace = true omicron-test-utils.workspace = true subprocess.workspace = true -camino-tempfile.workspace = true # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 47d091443db..b330a1ad908 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -602,8 +602,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -640,8 +640,8 @@ WARNING: Zones exist without physical disks! COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de - cluster.preserve_downgrade_option: "22.1" + state fingerprint::::::::::::::::: + cluster.preserve_downgrade_option: METADATA: created by::::::::::: nexus-test-utils @@ -681,8 +681,8 @@ to: blueprint ............. COCKROACHDB SETTINGS: - state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de (unchanged) - cluster.preserve_downgrade_option: "22.1" (unchanged) + state fingerprint::::::::::::::::: (unchanged) + cluster.preserve_downgrade_option: (unchanged) METADATA: internal DNS version: 1 (unchanged) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 19be33631db..f516de4cd74 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -7,8 +7,11 @@ //! Feel free to change the tool's output. This test just makes it easy to make //! sure you're only breaking what you intend. +use dropshot::Method; use expectorate::assert_contents; +use http::StatusCode; use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::Blueprint; use nexus_types::deployment::SledFilter; use nexus_types::deployment::UnstableReconfiguratorState; use omicron_test_utils::dev::test_cmds::path_to_executable; @@ -83,6 +86,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let tmppath = tmpdir.path().join("reconfigurator-save.out"); let initial_blueprint_id = cptestctx.initial_blueprint_id.to_string(); + // Get the CockroachDB metadata from the blueprint so we can redact it + let initial_blueprint: Blueprint = dropshot::test_util::read_json( + &mut cptestctx + .internal_client + .make_request_no_body( + Method::GET, + &format!("/deployment/blueprints/all/{initial_blueprint_id}"), + StatusCode::OK, + ) + .await + .unwrap(), + ) + .await; + let mut output = String::new(); let invocations: &[&[&str]] = &[ @@ -119,6 +136,19 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { // ControlPlaneTestContext. ]; + let mut redactions = ExtraRedactions::new(); + redactions + .variable_length("tmp_path", tmppath.as_str()) + .fixed_length("blueprint_id", &initial_blueprint_id) + .variable_length( + "cockroachdb_fingerprint", + &initial_blueprint.cockroachdb_fingerprint, + ); + let crdb_version = + initial_blueprint.cockroachdb_setting_preserve_downgrade.to_string(); + if initial_blueprint.cockroachdb_setting_preserve_downgrade.is_set() { + redactions.variable_length("cockroachdb_version", &crdb_version); + } for args in invocations { println!("running commands with args: {:?}", args); let p = postgres_url.to_string(); @@ -133,9 +163,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { }, &cmd_path, args, - ExtraRedactions::new() - .variable_length("tmp_path", tmppath.as_str()) - .fixed_length("blueprint_id", &initial_blueprint_id), + &redactions, ) .await; } diff --git a/dev-tools/xtask/src/download.rs b/dev-tools/xtask/src/download.rs index 2790a638a71..5b1e899a757 100644 --- a/dev-tools/xtask/src/download.rs +++ b/dev-tools/xtask/src/download.rs @@ -22,6 +22,7 @@ use strum::IntoEnumIterator; use tar::Archive; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::process::Command; +use tokio::sync::Mutex; const BUILDOMAT_URL: &'static str = "https://buildomat.eng.oxide.computer/public/file"; @@ -50,6 +51,9 @@ enum Target { /// CockroachDB binary Cockroach, + /// CockroachDB binary (previous major version) + CockroachPrev, + /// Web console assets Console, @@ -125,7 +129,8 @@ pub async fn run_cmd(args: DownloadArgs) -> Result<()> { bail!("We should have already filtered this 'All' target out?"); } Target::Clickhouse => downloader.download_clickhouse().await, - Target::Cockroach => downloader.download_cockroach().await, + Target::Cockroach => downloader.download_cockroach("").await, + Target::CockroachPrev => downloader.download_cockroach("prev_").await, Target::Console => downloader.download_console().await, Target::DendriteOpenapi => { downloader.download_dendrite_openapi().await @@ -197,43 +202,52 @@ impl<'a> Downloader<'a> { } } -/// Parses a file of the format: -/// -/// ```ignore -/// KEY1="value1" -/// KEY2="value2" -/// ``` -/// -/// And returns an array of the values in the same order as keys. -async fn get_values_from_file( - keys: [&str; N], - path: &Utf8Path, -) -> Result<[String; N]> { - // Map of "key" => "Position in output". - let mut keys: HashMap<&str, usize> = - keys.into_iter().enumerate().map(|(i, s)| (s, i)).collect(); - - const EMPTY_STRING: String = String::new(); - let mut values = [EMPTY_STRING; N]; - - let content = tokio::fs::read_to_string(&path) - .await - .context("Failed to read {path}")?; - for line in content.lines() { - let line = line.trim(); - let Some((key, value)) = line.split_once('=') else { - continue; - }; - let value = value.trim_matches('"'); - if let Some(i) = keys.remove(key) { - values[i] = value.to_string(); +macro_rules! get_values_fn { + ($name:ident, $ty:ty, $def:expr, $check:expr) => { + /// Parses a file of the format: + /// + /// ```ignore + /// KEY1="value1" + /// KEY2="value2" + /// ``` + /// + /// And returns an array of the values in the same order as keys. + async fn $name( + keys: [&str; N], + path: &Utf8Path, + ) -> Result<[$ty; N]> { + // Map of "key" => "Position in output". + let mut keys: HashMap<&str, usize> = + keys.into_iter().enumerate().map(|(i, s)| (s, i)).collect(); + + const DEFAULT: $ty = $def; + let mut values = [DEFAULT; N]; + + let content = tokio::fs::read_to_string(&path) + .await + .context("Failed to read {path}")?; + for line in content.lines() { + let line = line.trim(); + let Some((key, value)) = line.split_once('=') else { + continue; + }; + let value = value.trim_matches('"'); + if let Some(i) = keys.remove(key) { + values[i] = value.to_string().into(); + } + } + if $check && !keys.is_empty() { + bail!( + "Could not find keys: {:?}", + keys.keys().collect::>(), + ); + } + Ok(values) } - } - if !keys.is_empty() { - bail!("Could not find keys: {:?}", keys.keys().collect::>(),); - } - Ok(values) + }; } +get_values_fn!(get_values_from_file, String, String::new(), true); +get_values_fn!(try_get_values_from_file, Option, None, false); /// Send a GET request to `url`, downloading the contents to `path`. /// @@ -486,34 +500,52 @@ impl<'a> Downloader<'a> { Ok(()) } - async fn download_cockroach(&self) -> Result<()> { + async fn download_cockroach(&self, prefix: &str) -> Result<()> { let os = os_name()?; let download_dir = self.output_dir.join("downloads"); - let destination_dir = self.output_dir.join("cockroachdb"); + let destination_dir = + self.output_dir.join(format!("{prefix}cockroachdb")); - let checksums_path = self.versions_dir.join("cockroachdb_checksums"); - let [checksum] = get_values_from_file( + let checksums_path = + self.versions_dir.join(format!("{prefix}cockroachdb_checksums")); + let [mut checksum] = get_values_from_file( [&format!("CIDL_SHA256_{}", os.env_name())], &checksums_path, ) .await?; + let mut build = match os { + Os::Illumos => "illumos", + Os::Linux => "linux-amd64", + Os::Mac => "darwin-10.9-amd64", + }; + + if matches!(os, Os::Mac) && std::env::consts::ARCH == "aarch64" { + if let [Some(s)] = try_get_values_from_file( + ["CIDL_SHA256_DARWIN_AMD64"], + &checksums_path, + ) + .await? + { + checksum = s; + build = "darwin-11.0-amd64"; + } + } - let versions_path = self.versions_dir.join("cockroachdb_version"); + let versions_path = + self.versions_dir.join(format!("{prefix}cockroachdb_version")); let version = tokio::fs::read_to_string(&versions_path) .await .context("Failed to read version from {versions_path}")?; let version = version.trim(); let (url_base, suffix) = match os { - Os::Illumos => ("https://illumos.org/downloads", "tar.gz"), + Os::Illumos => ( + "https://oxide-cockroachdb-build.s3.us-west-2.amazonaws.com", + "tar.gz", + ), Os::Linux | Os::Mac => ("https://binaries.cockroachdb.com", "tgz"), }; - let build = match os { - Os::Illumos => "illumos", - Os::Linux => "linux-amd64", - Os::Mac => "darwin-10.9-amd64", - }; let version_directory = format!("cockroach-{version}"); let tarball_name = format!("{version_directory}.{build}"); @@ -522,6 +554,11 @@ impl<'a> Downloader<'a> { let tarball_path = download_dir.join(tarball_filename); + // Ensure that the download and unpack steps, which might write to the + // same paths, only run one at a time. + static MUTEX: Mutex<()> = Mutex::const_new(()); + let mutex_lock = MUTEX.lock().await; + tokio::fs::create_dir_all(&download_dir).await?; tokio::fs::create_dir_all(&destination_dir).await?; @@ -539,6 +576,9 @@ impl<'a> Downloader<'a> { info!(self.log, "tarball path: {tarball_path}"); unpack_tarball(&self.log, &tarball_path, &download_dir).await?; + // We are done writing to potentially shared files + drop(mutex_lock); + // This is where the binary will end up eventually let cockroach_binary = destination_dir.join("bin/cockroach"); diff --git a/docs/crdb-upgrades.adoc b/docs/crdb-upgrades.adoc index eecfa9194e9..52231ee1995 100644 --- a/docs/crdb-upgrades.adoc +++ b/docs/crdb-upgrades.adoc @@ -60,13 +60,15 @@ a tick, but they must occur in that order.) . Add an enum variant for the new version to `CockroachDbClusterVersion` in `nexus/types/src/deployment/planning_input.rs`, and change the associated constant `NEWLY_INITIALIZED` to that value. -. Run the test suite, which should catch any unexpected SQL +. Regenerate the Nexus internal OpenAPI document, which contains an enum + of CockroachDB versions: ++ +.... +EXPECTORATE=overwrite cargo nextest run -p omicron-nexus -- integration_tests::commands::test_nexus_openapi_internal +.... +. Run the full test suite, which should catch any unexpected SQL compatibility issues between releases and help validate that your build works. - * You will need to run the `test_omdb_success_cases` test from - omicron-omdb with `EXPECTORATE=overwrite`; this file contains the - expected output of various omdb commands, including a fingerprint of - CockroachDB's cluster state. . Submit a PR for your changes to garbage-compactor; when merged, publish the final build to the `oxide-cockroachdb-build` S3 bucket. . Update `tools/cockroachdb_checksums`. For non-illumos checksums, use diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs index e7a975fa691..2816abd62df 100644 --- a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs +++ b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs @@ -153,10 +153,21 @@ mod test { ); let settings = datastore.cockroachdb_settings(&opctx).await.unwrap(); - // With a fresh cluster, this is the expected state - let version = CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(); - assert_eq!(settings.version, version); - assert_eq!(settings.preserve_downgrade, ""); + let version: CockroachDbClusterVersion = + settings.version.parse().expect("unexpected cluster version"); + if settings.preserve_downgrade == "" { + // This is the expected value while running tests normally. + assert_eq!(version, CockroachDbClusterVersion::NEWLY_INITIALIZED); + } else if settings.preserve_downgrade == version.to_string() { + // This is the expected value if we are running under + // `CRDB_SEED_USE_PREV=yes`. + assert_eq!(version, CockroachDbClusterVersion::POLICY); + } else { + panic!( + "`cluster.preserve_downgrade_option` should not be {:?}", + settings.preserve_downgrade + ); + } // Verify that if a fingerprint is wrong, we get the expected SQL error // back. @@ -165,7 +176,7 @@ mod test { &opctx, String::new(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await else { @@ -190,7 +201,7 @@ mod test { &opctx, settings.state_fingerprint.clone(), "cluster.preserve_downgrade_option", - version.clone(), + version.to_string(), ) .await .unwrap(); @@ -198,8 +209,8 @@ mod test { datastore.cockroachdb_settings(&opctx).await.unwrap(), CockroachDbSettings { state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: version.clone(), + version: version.to_string(), + preserve_downgrade: version.to_string(), } ); } @@ -215,14 +226,24 @@ mod test { ) .await .unwrap(); - assert_eq!( - datastore.cockroachdb_settings(&opctx).await.unwrap(), - CockroachDbSettings { - state_fingerprint: settings.state_fingerprint.clone(), - version: version.clone(), - preserve_downgrade: String::new(), - } - ); + let settings = + datastore.cockroachdb_settings(&opctx).await.unwrap(); + if version == CockroachDbClusterVersion::NEWLY_INITIALIZED { + assert_eq!( + settings, + CockroachDbSettings { + state_fingerprint: settings.state_fingerprint.clone(), + version: version.to_string(), + preserve_downgrade: String::new(), + } + ); + } else { + // Resetting it permits auto-finalization, so the state + // fingerprint and version are not predictable until that + // completes, but we can still verify that the variable was + // reset. + assert!(settings.preserve_downgrade.is_empty()); + } } db.cleanup().await.unwrap(); diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 6bd72955c7d..c46c74b2d12 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -37,7 +37,7 @@ mod test { use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_test_utils_macros::nexus_test; - use nexus_types::deployment::CockroachDbClusterVersion; + use nexus_types::deployment::CockroachDbPreserveDowngrade; use std::sync::Arc; type ControlPlaneTestContext = @@ -69,24 +69,26 @@ mod test { .await .expect("failed to get blueprint from datastore"); eprintln!("blueprint: {}", blueprint.display()); - // The initial blueprint should already have these filled in. + // The initial blueprint should already have the state fingerprint + // filled in. assert_eq!( blueprint.cockroachdb_fingerprint, settings.state_fingerprint ); - assert_eq!( - blueprint.cockroachdb_setting_preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.into() - ); - // The cluster version, preserve downgrade setting, and - // `NEWLY_INITIALIZED` should all match. - assert_eq!( - settings.version, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() - ); + // The initial blueprint should already have the preserve downgrade + // setting filled in. (It might be the current or previous version, but + // it should be `Set` regardless.) + let CockroachDbPreserveDowngrade::Set(bp_preserve_downgrade) = + blueprint.cockroachdb_setting_preserve_downgrade + else { + panic!("blueprint does not set preserve downgrade option"); + }; + // The cluster version, preserve downgrade setting, and the value in the + // blueprint should all match. + assert_eq!(settings.version, bp_preserve_downgrade.to_string()); assert_eq!( settings.preserve_downgrade, - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string() + bp_preserve_downgrade.to_string() ); // Execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index ee3818f40c8..eab646d6311 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -241,22 +241,41 @@ impl super::Nexus { .internal_context( "fetching cockroachdb settings for rack initialization", )?; - self.datastore() - .cockroachdb_setting_set_string( - opctx, - cockroachdb_settings.state_fingerprint.clone(), - "cluster.preserve_downgrade_option", - CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string(), - ) - .await - .internal_context( - "setting `cluster.preserve_downgrade_option` \ - for rack initialization", - )?; + blueprint.cockroachdb_setting_preserve_downgrade = + if cockroachdb_settings.preserve_downgrade.is_empty() { + // Set the option to the current policy in both the database and + // the blueprint. + self.datastore() + .cockroachdb_setting_set_string( + opctx, + cockroachdb_settings.state_fingerprint.clone(), + "cluster.preserve_downgrade_option", + CockroachDbClusterVersion::NEWLY_INITIALIZED + .to_string(), + ) + .await + .internal_context( + "setting `cluster.preserve_downgrade_option` \ + for rack initialization", + )?; + CockroachDbClusterVersion::NEWLY_INITIALIZED + } else { + // Fill in the blueprint with the current value of the option in + // the database. + CockroachDbClusterVersion::from_str( + &cockroachdb_settings.preserve_downgrade, + ) + .map_err(|_| { + Error::internal_error(&format!( + "database has `cluster.preserve_downgrade_option` \ + set to invalid version {}", + cockroachdb_settings.preserve_downgrade + )) + })? + } + .into(); blueprint.cockroachdb_fingerprint = cockroachdb_settings.state_fingerprint; - blueprint.cockroachdb_setting_preserve_downgrade = - CockroachDbClusterVersion::NEWLY_INITIALIZED.into(); // Administrators of the Recovery Silo are automatically made // administrators of the Fleet. diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 3e8bc9aa2c9..88ce8438ed1 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -276,6 +276,13 @@ pub enum CockroachDbPreserveDowngrade { } impl CockroachDbPreserveDowngrade { + pub fn is_set(self) -> bool { + match self { + CockroachDbPreserveDowngrade::Set(_) => true, + _ => false, + } + } + pub fn from_optional_string( value: &Option, ) -> Result { @@ -829,11 +836,15 @@ mod tests { cockroachdb_version ); - // In the next "tick" release, this version will be stored in a - // different file. + let prev_cockroachdb_version = + include_str!("../../../../tools/prev_cockroachdb_version") + .trim_start_matches('v') + .rsplit_once('.') + .unwrap() + .0; assert_eq!( CockroachDbClusterVersion::POLICY.to_string(), - cockroachdb_version + prev_cockroachdb_version ); } } diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index fcb14a4f151..7338b883d6f 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -47,10 +47,18 @@ const COCKROACHDB_USER: &'static str = "root"; /// Path to the CockroachDB binary const COCKROACHDB_BIN: &str = "cockroach"; +/// Path to the CockroachDB binary for the previous major version +const PREV_COCKROACHDB_BIN: &str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/../out/prev_cockroachdb/bin/cockroach" +); /// The expected CockroachDB version -const COCKROACHDB_VERSION: &str = +pub(super) const COCKROACHDB_VERSION: &str = include_str!("../../../tools/cockroachdb_version"); +/// The expected previous CockroachDB version +pub(super) const PREV_COCKROACHDB_VERSION: &str = + include_str!("../../../tools/prev_cockroachdb_version"); /// Builder for [`CockroachStarter`] that supports setting some command-line /// arguments for the `cockroach start-single-node` command @@ -76,6 +84,8 @@ pub struct CockroachStarterBuilder { args: Vec, /// describes the command line that we're going to execute cmd_builder: tokio::process::Command, + /// expected version of CockroachDB + expected_version: &'static str, /// how long to wait for CockroachDB to report itself listening start_timeout: Duration, /// redirect stdout and stderr to files @@ -83,8 +93,41 @@ pub struct CockroachStarterBuilder { } impl CockroachStarterBuilder { + /// Create a starter for `cockroach` on `$PATH`. pub fn new() -> CockroachStarterBuilder { - let mut builder = CockroachStarterBuilder::new_raw(COCKROACHDB_BIN); + CockroachStarterBuilder::new_raw(COCKROACHDB_BIN) + .new_setup(COCKROACHDB_VERSION.trim()) + } + + /// Create a starter for the previous major version of `cockroach`, found in + /// `out/prev_cockroachdb/bin`. + pub fn new_prev() -> CockroachStarterBuilder { + CockroachStarterBuilder::new_raw(PREV_COCKROACHDB_BIN) + .new_setup(PREV_COCKROACHDB_VERSION.trim()) + } + + /// Helper for constructing a `CockroachStarterBuilder` that runs a specific + /// command instead of the usual `cockroach` binary + /// + /// This is used by `new()` as a starting point. It's also used by the + /// tests to trigger failure modes that would be hard to reproduce with + /// `cockroach` itself. + fn new_raw(cmd: &str) -> CockroachStarterBuilder { + CockroachStarterBuilder { + store_dir: None, + listen_port: COCKROACHDB_DEFAULT_LISTEN_PORT, + env: BTreeMap::new(), + args: vec![String::from(cmd)], + cmd_builder: tokio::process::Command::new(cmd), + expected_version: "", + start_timeout: COCKROACHDB_START_TIMEOUT_DEFAULT, + redirect_stdio: false, + } + } + + /// Common setup to run after `new_raw`. + fn new_setup(mut self, expected_version: &'static str) -> Self { + self.expected_version = expected_version; // Copy the current set of environment variables. We could instead // allow the default behavior of inheriting the current process @@ -92,14 +135,14 @@ impl CockroachStarterBuilder { // behavior, it's possible that what we print out wouldn't match what // was used if some other thread modified the process environment in // between. - builder.cmd_builder.env_clear(); + self.cmd_builder.env_clear(); // Configure Go to generate a core file on fatal error. This behavior // may be overridden by the user if they've set this variable in their // environment. - builder.env("GOTRACEBACK", "crash"); + self.env("GOTRACEBACK", "crash"); for (key, value) in std::env::vars_os() { - builder.env(key, value); + self.env(key, value); } // We use single-node insecure mode listening only on localhost. We @@ -112,29 +155,9 @@ impl CockroachStarterBuilder { // If we decide to let callers customize various listening addresses, we // should be careful about making it too easy to generate a more // insecure configuration. - builder - .arg("start-single-node") - .arg("--insecure") - .arg("--http-addr=:0"); - builder - } + self.arg("start-single-node").arg("--insecure").arg("--http-addr=:0"); - /// Helper for constructing a `CockroachStarterBuilder` that runs a specific - /// command instead of the usual `cockroach` binary - /// - /// This is used by `new()` as a starting point. It's also used by the - /// tests to trigger failure modes that would be hard to reproduce with - /// `cockroach` itself. - fn new_raw(cmd: &str) -> CockroachStarterBuilder { - CockroachStarterBuilder { - store_dir: None, - listen_port: COCKROACHDB_DEFAULT_LISTEN_PORT, - env: BTreeMap::new(), - args: vec![String::from(cmd)], - cmd_builder: tokio::process::Command::new(cmd), - start_timeout: COCKROACHDB_START_TIMEOUT_DEFAULT, - redirect_stdio: false, - } + self } /// Redirect stdout and stderr for the "cockroach" process to files within @@ -246,6 +269,7 @@ impl CockroachStarterBuilder { args: self.args, env: self.env, cmd_builder: self.cmd_builder, + expected_version: self.expected_version, start_timeout: self.start_timeout, }) } @@ -301,6 +325,8 @@ pub struct CockroachStarter { args: Vec, /// the command line that we're going to execute cmd_builder: tokio::process::Command, + /// expected version of CockroachDB + expected_version: &'static str, /// how long to wait for the listen URL to be written start_timeout: Duration, } @@ -341,7 +367,11 @@ impl CockroachStarter { pub async fn start( mut self, ) -> Result { - check_db_version().await?; + check_db_version( + self.cmd_builder.as_std().get_program(), + self.expected_version, + ) + .await?; let mut child_process = self.cmd_builder.spawn().map_err(|source| { CockroachStartError::BadCmd { cmd: self.args[0].clone(), source } @@ -703,18 +733,30 @@ pub async fn format_sql(input: &str) -> Result { } /// Verify that CockroachDB has the correct version -pub async fn check_db_version() -> Result<(), CockroachStartError> { - let mut cmd = tokio::process::Command::new(COCKROACHDB_BIN); +async fn check_db_version( + bin: &OsStr, + expected_version: &str, +) -> Result<(), CockroachStartError> { + if expected_version.is_empty() { + // This can happen when `new_raw` is called without running `new_setup`, + // which we do in some tests. In that situation, `bin` might not be + // CockroachDB! + return Ok(()); + } + + let mut cmd = tokio::process::Command::new(bin); cmd.args(&["version", "--build-tag"]); cmd.env("GOTRACEBACK", "crash"); - let output = cmd.output().await.map_err(|source| { - CockroachStartError::BadCmd { cmd: COCKROACHDB_BIN.to_string(), source } - })?; + let output = + cmd.output().await.map_err(|source| CockroachStartError::BadCmd { + cmd: bin.to_string_lossy().into_owned(), + source, + })?; let stdout = String::from_utf8_lossy(&output.stdout).into_owned(); let stderr = String::from_utf8_lossy(&output.stderr).into_owned(); if !output.status.success() { return Err(CockroachStartError::BadVersion { - expected: COCKROACHDB_VERSION.trim().to_string(), + expected: expected_version.to_string(), found: Err(anyhow!( "error {:?} when checking CockroachDB version", output.status.code() @@ -741,10 +783,10 @@ pub async fn check_db_version() -> Result<(), CockroachStartError> { version_str }; - if version_str != COCKROACHDB_VERSION.trim() { + if version_str != expected_version { return Err(CockroachStartError::BadVersion { found: Ok(version_str.to_string()), - expected: COCKROACHDB_VERSION.trim().to_string(), + expected: expected_version.to_string(), stdout, stderr, }); diff --git a/test-utils/src/dev/mod.rs b/test-utils/src/dev/mod.rs index 31a448c9880..eaa8f29651f 100644 --- a/test-utils/src/dev/mod.rs +++ b/test-utils/src/dev/mod.rs @@ -60,7 +60,9 @@ pub async fn test_setup_database( source: StorageSource, ) -> db::CockroachInstance { usdt::register_probes().expect("Failed to register USDT DTrace probes"); - setup_database(log, source).await.unwrap() + setup_database(log, source, db::CockroachStarterBuilder::new()) + .await + .unwrap() } // TODO: switch to anyhow entirely -- this function is currently a mishmash of @@ -68,8 +70,8 @@ pub async fn test_setup_database( async fn setup_database( log: &Logger, storage_source: StorageSource, + builder: db::CockroachStarterBuilder, ) -> Result { - let builder = db::CockroachStarterBuilder::new(); let mut builder = match &storage_source { StorageSource::DoNotPopulate | StorageSource::CopyFromSeed { .. } => { builder diff --git a/test-utils/src/dev/seed.rs b/test-utils/src/dev/seed.rs index 7a75d0bbd3d..28908aa753f 100644 --- a/test-utils/src/dev/seed.rs +++ b/test-utils/src/dev/seed.rs @@ -9,18 +9,31 @@ use camino::{Utf8Path, Utf8PathBuf}; use filetime::FileTime; use slog::Logger; +use super::db::CockroachStarterBuilder; +use super::db::COCKROACHDB_VERSION; +use super::db::PREV_COCKROACHDB_VERSION; use super::CRDB_SEED_TAR_ENV; +fn use_prev() -> bool { + std::env::var("CRDB_SEED_USE_PREV").as_deref() == Ok("yes") +} + /// Creates a string identifier for the current DB schema and version. // /// The goal here is to allow to create different "seed" tarballs /// for each revision of the DB. pub fn digest_unique_to_schema() -> String { - let schema = include_str!("../../../schema/crdb/dbinit.sql"); - let crdb_version = include_str!("../../../tools/cockroachdb_version"); let mut ctx = ring::digest::Context::new(&ring::digest::SHA256); - ctx.update(&schema.as_bytes()); - ctx.update(&crdb_version.as_bytes()); + ctx.update(include_bytes!("../../../schema/crdb/dbinit.sql")); + if use_prev() { + ctx.update(PREV_COCKROACHDB_VERSION.as_bytes()); + // This seed tarball needs to be different from one generated for the + // same version without `CRDB_SEED_USE_PREV=yes`, because we set the + // preserve downgrade option during initialization. + ctx.update(b"prev"); + } else { + ctx.update(COCKROACHDB_VERSION.as_bytes()); + } let digest = ctx.finish(); hex::encode(digest.as_ref()) } @@ -173,9 +186,31 @@ pub async fn test_setup_database_seed( super::StorageSource::PopulateLatest { output_dir: tmp_seed_dir.path().to_owned(), }, + if use_prev() { + CockroachStarterBuilder::new_prev() + } else { + CockroachStarterBuilder::new() + }, ) .await .context("failed to setup database")?; + if use_prev() { + // ensure the cluster doesn't get upgraded once the newer binary starts + let (major, _) = PREV_COCKROACHDB_VERSION + .trim() + .trim_start_matches('v') + .rsplit_once('.') + .context("failed to get cockroachdb major version")?; + let sql = format!( + "SET CLUSTER SETTING cluster.preserve_downgrade_option = '{major}'" + ); + db.connect() + .await + .context("failed to connect to database")? + .execute(&sql, &[]) + .await + .context("failed to set cluster.preserve_downgrade_option")?; + } db.cleanup().await.context("failed to cleanup database")?; // See https://github.com/cockroachdb/cockroach/issues/74231 for context on diff --git a/tools/prev_cockroachdb_checksums b/tools/prev_cockroachdb_checksums new file mode 100644 index 00000000000..20b6e237f82 --- /dev/null +++ b/tools/prev_cockroachdb_checksums @@ -0,0 +1,3 @@ +CIDL_SHA256_DARWIN="1ca69e0911af11a73305c3c6f4650b912d70754900b5bf7b80a1d361efe36561" +CIDL_SHA256_LINUX="24c321820e7ee45fa07fe91ac138befe13ad860e41c6ed595ce58823205ff4a9" +CIDL_SHA256_ILLUMOS="f151714ba3a6e02caaaa59727482c36085e60d6bd2fa963938e9a3d8c8a77088" diff --git a/tools/prev_cockroachdb_version b/tools/prev_cockroachdb_version new file mode 100644 index 00000000000..4e11c6a7da1 --- /dev/null +++ b/tools/prev_cockroachdb_version @@ -0,0 +1 @@ +v22.1.9