Skip to content

Commit

Permalink
add infrastructure for prev_cockroachdb_version
Browse files Browse the repository at this point in the history
  • Loading branch information
iliana committed Aug 19, 2024
1 parent 6dd9802 commit 4f199c0
Show file tree
Hide file tree
Showing 16 changed files with 313 additions and 111 deletions.
17 changes: 17 additions & 0 deletions .github/buildomat/build-and-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,23 @@ ptime -m cargo build -Z unstable-options --timings=json --workspace --tests --lo
banner test
ptime -m timeout 2h cargo nextest run --profile ci --locked --verbose

#
# If upgraded CockroachDB clusters are running on a different cluster version
# than newly-initialized ones, run database-related tests from the older cluster
# version.
#
if cmp tools/cockroachdb_version tools/prev_cockroachdb_version; then
# files are the same
true
else
# if return code is > 1, cmp failed
[[ $? -gt 1 ]] && exit 2
# otherwise, files differ
cargo xtask download cockroach-prev
CRDB_SEED_USE_PREV=yes ptime -m timeout 1h cargo nextest run --profile ci --locked --verbose \
--filter-expr 'rdeps(nexus-test-utils) - package(omicron-dev)'
fi

#
# https://github.com/nextest-rs/nextest/issues/16
#
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 25 additions & 6 deletions dev-tools/downloader/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ use strum::IntoEnumIterator;
use tar::Archive;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::process::Command;
use tokio::sync::Mutex;

const BUILDOMAT_URL: &'static str =
"https://buildomat.eng.oxide.computer/public/file";
Expand Down Expand Up @@ -60,6 +61,9 @@ enum Target {
/// CockroachDB binary
Cockroach,

/// CockroachDB binary (previous major version)
CockroachPrev,

/// Web console assets
Console,

Expand Down Expand Up @@ -136,7 +140,8 @@ pub async fn run_cmd(args: DownloadArgs) -> Result<()> {
}
Target::CargoHack => downloader.download_cargo_hack().await,
Target::Clickhouse => downloader.download_clickhouse().await,
Target::Cockroach => downloader.download_cockroach().await,
Target::Cockroach => downloader.download_cockroach("").await,
Target::CockroachPrev => downloader.download_cockroach("prev_").await,
Target::Console => downloader.download_console().await,
Target::DendriteOpenapi => {
downloader.download_dendrite_openapi().await
Expand Down Expand Up @@ -566,27 +571,33 @@ impl<'a> Downloader<'a> {
Ok(())
}

async fn download_cockroach(&self) -> Result<()> {
async fn download_cockroach(&self, prefix: &str) -> Result<()> {
let os = os_name()?;

let download_dir = self.output_dir.join("downloads");
let destination_dir = self.output_dir.join("cockroachdb");
let destination_dir =
self.output_dir.join(format!("{prefix}cockroachdb"));

let checksums_path = self.versions_dir.join("cockroachdb_checksums");
let checksums_path =
self.versions_dir.join(format!("{prefix}cockroachdb_checksums"));
let [checksum] = get_values_from_file(
[&format!("CIDL_SHA256_{}", os.env_name())],
&checksums_path,
)
.await?;

let versions_path = self.versions_dir.join("cockroachdb_version");
let versions_path =
self.versions_dir.join(format!("{prefix}cockroachdb_version"));
let version = tokio::fs::read_to_string(&versions_path)
.await
.context("Failed to read version from {versions_path}")?;
let version = version.trim();

let (url_base, suffix) = match os {
Os::Illumos => ("https://illumos.org/downloads", "tar.gz"),
Os::Illumos => (
"https://oxide-cockroachdb-build.s3.us-west-2.amazonaws.com",
"tar.gz",
),
Os::Linux | Os::Mac => ("https://binaries.cockroachdb.com", "tgz"),
};
let build = match os {
Expand All @@ -602,6 +613,11 @@ impl<'a> Downloader<'a> {

let tarball_path = download_dir.join(tarball_filename);

// Ensure that the download and unpack steps, which might write to the
// same paths, only run one at a time.
static MUTEX: Mutex<()> = Mutex::const_new(());
let mutex_lock = MUTEX.lock().await;

tokio::fs::create_dir_all(&download_dir).await?;
tokio::fs::create_dir_all(&destination_dir).await?;

Expand All @@ -619,6 +635,9 @@ impl<'a> Downloader<'a> {
info!(self.log, "tarball path: {tarball_path}");
unpack_tarball(&self.log, &tarball_path, &download_dir).await?;

// We are done writing to potentially shared files
drop(mutex_lock);

// This is where the binary will end up eventually
let cockroach_binary = destination_dir.join("bin/cockroach");

Expand Down
3 changes: 2 additions & 1 deletion dev-tools/omdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,14 @@ multimap.workspace = true
indicatif.workspace = true

[dev-dependencies]
camino-tempfile.workspace = true
expectorate.workspace = true
http.workspace = true
nexus-test-utils.workspace = true
nexus-test-utils-macros.workspace = true
omicron-nexus.workspace = true
omicron-test-utils.workspace = true
subprocess.workspace = true
camino-tempfile.workspace = true

# Disable doc builds by default for our binaries to work around issue
# rust-lang/cargo#8373. These docs would not be very useful anyway.
Expand Down
12 changes: 6 additions & 6 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -1135,8 +1135,8 @@ WARNING: Zones exist without physical disks!


COCKROACHDB SETTINGS:
state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de
cluster.preserve_downgrade_option: "22.1"
state fingerprint::::::::::::::::: <COCKROACHDB_FINGERPRINT_REDACTED>
cluster.preserve_downgrade_option: <COCKROACHDB_VERSION_REDACTED>

METADATA:
created by::::::::::: nexus-test-utils
Expand Down Expand Up @@ -1173,8 +1173,8 @@ WARNING: Zones exist without physical disks!


COCKROACHDB SETTINGS:
state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de
cluster.preserve_downgrade_option: "22.1"
state fingerprint::::::::::::::::: <COCKROACHDB_FINGERPRINT_REDACTED>
cluster.preserve_downgrade_option: <COCKROACHDB_VERSION_REDACTED>

METADATA:
created by::::::::::: nexus-test-utils
Expand Down Expand Up @@ -1214,8 +1214,8 @@ to: blueprint ......<REDACTED_BLUEPRINT_ID>.......


COCKROACHDB SETTINGS:
state fingerprint::::::::::::::::: d4d87aa2ad877a4cc2fddd0573952362739110de (unchanged)
cluster.preserve_downgrade_option: "22.1" (unchanged)
state fingerprint::::::::::::::::: <COCKROACHDB_FINGERPRINT_REDACTED> (unchanged)
cluster.preserve_downgrade_option: <COCKROACHDB_VERSION_REDACTED> (unchanged)

METADATA:
internal DNS version: 1 (unchanged)
Expand Down
36 changes: 31 additions & 5 deletions dev-tools/omdb/tests/test_all_output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
//! Feel free to change the tool's output. This test just makes it easy to make
//! sure you're only breaking what you intend.
use dropshot::Method;
use expectorate::assert_contents;
use http::StatusCode;
use nexus_test_utils::{OXIMETER_UUID, PRODUCER_UUID};
use nexus_test_utils_macros::nexus_test;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::UnstableReconfiguratorState;
use omicron_test_utils::dev::test_cmds::path_to_executable;
Expand Down Expand Up @@ -134,6 +137,20 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) {
let tmppath = tmpdir.path().join("reconfigurator-save.out");
let initial_blueprint_id = cptestctx.initial_blueprint_id.to_string();

// Get the CockroachDB metadata from the blueprint so we can redact it
let initial_blueprint: Blueprint = dropshot::test_util::read_json(
&mut cptestctx
.internal_client
.make_request_no_body(
Method::GET,
&format!("/deployment/blueprints/all/{initial_blueprint_id}"),
StatusCode::OK,
)
.await
.unwrap(),
)
.await;

let mut output = String::new();

let invocations: &[&[&str]] = &[
Expand Down Expand Up @@ -186,6 +203,19 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) {
// ControlPlaneTestContext.
];

let mut redactions = ExtraRedactions::new();
redactions
.variable_length("tmp_path", tmppath.as_str())
.fixed_length("blueprint_id", &initial_blueprint_id)
.variable_length(
"cockroachdb_fingerprint",
&initial_blueprint.cockroachdb_fingerprint,
);
let crdb_version =
initial_blueprint.cockroachdb_setting_preserve_downgrade.to_string();
if initial_blueprint.cockroachdb_setting_preserve_downgrade.is_set() {
redactions.variable_length("cockroachdb_version", &crdb_version);
}
for args in invocations {
println!("running commands with args: {:?}", args);
let p = postgres_url.to_string();
Expand All @@ -204,11 +234,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) {
},
&cmd_path,
args,
Some(
ExtraRedactions::new()
.variable_length("tmp_path", tmppath.as_str())
.fixed_length("blueprint_id", &initial_blueprint_id),
),
Some(&redactions),
)
.await;
}
Expand Down
12 changes: 7 additions & 5 deletions docs/crdb-upgrades.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,15 @@ a tick, but they must occur in that order.)
. Add an enum variant for the new version to `CockroachDbClusterVersion`
in `nexus/types/src/deployment/planning_input.rs`, and change the
associated constant `NEWLY_INITIALIZED` to that value.
. Run the test suite, which should catch any unexpected SQL
. Regenerate the Nexus internal OpenAPI document, which contains an enum
of CockroachDB versions:
+
....
EXPECTORATE=overwrite cargo nextest run -p omicron-nexus -- integration_tests::commands::test_nexus_openapi_internal
....
. Run the full test suite, which should catch any unexpected SQL
compatibility issues between releases and help validate that your
build works.
* You will need to run the `test_omdb_success_cases` test from
omicron-omdb with `EXPECTORATE=overwrite`; this file contains the
expected output of various omdb commands, including a fingerprint of
CockroachDB's cluster state.
. Submit a PR for your changes to garbage-compactor; when merged,
publish the final build to the `oxide-cockroachdb-build` S3 bucket.
. Update `tools/cockroachdb_checksums`. For non-illumos checksums, use
Expand Down
53 changes: 37 additions & 16 deletions nexus/db-queries/src/db/datastore/cockroachdb_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,21 @@ mod test {
);

let settings = datastore.cockroachdb_settings(&opctx).await.unwrap();
// With a fresh cluster, this is the expected state
let version = CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string();
assert_eq!(settings.version, version);
assert_eq!(settings.preserve_downgrade, "");
let version: CockroachDbClusterVersion =
settings.version.parse().expect("unexpected cluster version");
if settings.preserve_downgrade == "" {
// This is the expected value while running tests normally.
assert_eq!(version, CockroachDbClusterVersion::NEWLY_INITIALIZED);
} else if settings.preserve_downgrade == version.to_string() {
// This is the expected value if we are running under
// `CRDB_SEED_USE_PREV=yes`.
assert_eq!(version, CockroachDbClusterVersion::POLICY);
} else {
panic!(
"`cluster.preserve_downgrade_option` should not be {:?}",
settings.preserve_downgrade
);
}

// Verify that if a fingerprint is wrong, we get the expected SQL error
// back.
Expand All @@ -165,7 +176,7 @@ mod test {
&opctx,
String::new(),
"cluster.preserve_downgrade_option",
version.clone(),
version.to_string(),
)
.await
else {
Expand All @@ -190,16 +201,16 @@ mod test {
&opctx,
settings.state_fingerprint.clone(),
"cluster.preserve_downgrade_option",
version.clone(),
version.to_string(),
)
.await
.unwrap();
assert_eq!(
datastore.cockroachdb_settings(&opctx).await.unwrap(),
CockroachDbSettings {
state_fingerprint: settings.state_fingerprint.clone(),
version: version.clone(),
preserve_downgrade: version.clone(),
version: version.to_string(),
preserve_downgrade: version.to_string(),
}
);
}
Expand All @@ -215,14 +226,24 @@ mod test {
)
.await
.unwrap();
assert_eq!(
datastore.cockroachdb_settings(&opctx).await.unwrap(),
CockroachDbSettings {
state_fingerprint: settings.state_fingerprint.clone(),
version: version.clone(),
preserve_downgrade: String::new(),
}
);
let settings =
datastore.cockroachdb_settings(&opctx).await.unwrap();
if version == CockroachDbClusterVersion::NEWLY_INITIALIZED {
assert_eq!(
settings,
CockroachDbSettings {
state_fingerprint: settings.state_fingerprint.clone(),
version: version.to_string(),
preserve_downgrade: String::new(),
}
);
} else {
// Resetting it permits auto-finalization, so the state
// fingerprint and version are not predictable until that
// completes, but we can still verify that the variable was
// reset.
assert!(settings.preserve_downgrade.is_empty());
}
}

db.cleanup().await.unwrap();
Expand Down
28 changes: 15 additions & 13 deletions nexus/reconfigurator/execution/src/cockroachdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ mod test {
use nexus_db_queries::authn;
use nexus_db_queries::authz;
use nexus_test_utils_macros::nexus_test;
use nexus_types::deployment::CockroachDbClusterVersion;
use nexus_types::deployment::CockroachDbPreserveDowngrade;
use std::sync::Arc;
use uuid::Uuid;

Expand Down Expand Up @@ -70,24 +70,26 @@ mod test {
.await
.expect("failed to get blueprint from datastore");
eprintln!("blueprint: {}", blueprint.display());
// The initial blueprint should already have these filled in.
// The initial blueprint should already have the state fingerprint
// filled in.
assert_eq!(
blueprint.cockroachdb_fingerprint,
settings.state_fingerprint
);
assert_eq!(
blueprint.cockroachdb_setting_preserve_downgrade,
CockroachDbClusterVersion::NEWLY_INITIALIZED.into()
);
// The cluster version, preserve downgrade setting, and
// `NEWLY_INITIALIZED` should all match.
assert_eq!(
settings.version,
CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string()
);
// The initial blueprint should already have the preserve downgrade
// setting filled in. (It might be the current or previous version, but
// it should be `Set` regardless.)
let CockroachDbPreserveDowngrade::Set(bp_preserve_downgrade) =
blueprint.cockroachdb_setting_preserve_downgrade
else {
panic!("blueprint does not set preserve downgrade option");
};
// The cluster version, preserve downgrade setting, and the value in the
// blueprint should all match.
assert_eq!(settings.version, bp_preserve_downgrade.to_string());
assert_eq!(
settings.preserve_downgrade,
CockroachDbClusterVersion::NEWLY_INITIALIZED.to_string()
bp_preserve_downgrade.to_string()
);
// Record the zpools so we don't fail to ensure datasets (unrelated to
// crdb settings) during blueprint execution.
Expand Down
Loading

0 comments on commit 4f199c0

Please sign in to comment.