From 9aabe2aee4bbfe9af1cb9424a93cf6a59f13b9a6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 5 Oct 2023 09:46:26 -0700 Subject: [PATCH 01/23] Add transaction retry to schema upgrade integration tests (#4209) Fixes https://github.com/oxidecomputer/omicron/issues/4207 --- nexus/tests/integration_tests/schema.rs | 51 ++++++++++++++++++++----- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 2c62f156e1..1d4556e8ed 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -62,6 +62,47 @@ async fn test_setup<'a>( builder } +// Attempts to apply an update as a transaction. +// +// Only returns an error if the transaction failed to commit. +async fn apply_update_as_transaction_inner( + client: &omicron_test_utils::dev::db::Client, + sql: &str, +) -> Result<(), tokio_postgres::Error> { + client.batch_execute("BEGIN;").await.expect("Failed to BEGIN transaction"); + client.batch_execute(&sql).await.expect("Failed to execute update"); + client.batch_execute("COMMIT;").await?; + Ok(()) +} + +// Applies an update as a transaction. +// +// Automatically retries transactions that can be retried client-side. +async fn apply_update_as_transaction( + log: &Logger, + client: &omicron_test_utils::dev::db::Client, + sql: &str, +) { + loop { + match apply_update_as_transaction_inner(client, sql).await { + Ok(()) => break, + Err(err) => { + client + .batch_execute("ROLLBACK;") + .await + .expect("Failed to ROLLBACK failed transaction"); + if let Some(code) = err.code() { + if code == &tokio_postgres::error::SqlState::T_R_SERIALIZATION_FAILURE { + warn!(log, "Transaction retrying"); + continue; + } + } + panic!("Failed to apply update: {err}"); + } + } + } +} + async fn apply_update( log: &Logger, crdb: &CockroachInstance, @@ -87,15 +128,7 @@ async fn apply_update( for _ in 0..times_to_apply { for sql in sqls.iter() { - client - .batch_execute("BEGIN;") - .await - .expect("Failed to BEGIN update"); - client.batch_execute(&sql).await.expect("Failed to execute update"); - client - .batch_execute("COMMIT;") - .await - .expect("Failed to COMMIT update"); + apply_update_as_transaction(log, &client, sql).await; } } From 6cf8181ba678855e3f131ad2914e90d06de02ac3 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 5 Oct 2023 10:28:21 -0700 Subject: [PATCH 02/23] top-level cleanup: move `thing-flinger` into `dev-tools` (#4213) --- Cargo.toml | 4 ++-- {deploy => dev-tools/thing-flinger}/.gitignore | 0 {deploy => dev-tools/thing-flinger}/Cargo.toml | 0 {deploy => dev-tools/thing-flinger}/README.adoc | 0 .../thing-flinger}/src/bin/deployment-example.toml | 0 {deploy => dev-tools/thing-flinger}/src/bin/thing-flinger.rs | 0 6 files changed, 2 insertions(+), 2 deletions(-) rename {deploy => dev-tools/thing-flinger}/.gitignore (100%) rename {deploy => dev-tools/thing-flinger}/Cargo.toml (100%) rename {deploy => dev-tools/thing-flinger}/README.adoc (100%) rename {deploy => dev-tools/thing-flinger}/src/bin/deployment-example.toml (100%) rename {deploy => dev-tools/thing-flinger}/src/bin/thing-flinger.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index 2af44b5559..29291e8a19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,9 +8,9 @@ members = [ "common", "crdb-seed", "ddm-admin-client", - "deploy", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", "dns-service-client", @@ -75,9 +75,9 @@ default-members = [ "common", "ddm-admin-client", "dpd-client", - "deploy", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", "dns-service-client", diff --git a/deploy/.gitignore b/dev-tools/thing-flinger/.gitignore similarity index 100% rename from deploy/.gitignore rename to dev-tools/thing-flinger/.gitignore diff --git a/deploy/Cargo.toml b/dev-tools/thing-flinger/Cargo.toml similarity index 100% rename from deploy/Cargo.toml rename to dev-tools/thing-flinger/Cargo.toml diff --git a/deploy/README.adoc b/dev-tools/thing-flinger/README.adoc similarity index 100% rename from deploy/README.adoc rename to dev-tools/thing-flinger/README.adoc diff --git a/deploy/src/bin/deployment-example.toml b/dev-tools/thing-flinger/src/bin/deployment-example.toml similarity index 100% rename from deploy/src/bin/deployment-example.toml rename to dev-tools/thing-flinger/src/bin/deployment-example.toml diff --git a/deploy/src/bin/thing-flinger.rs b/dev-tools/thing-flinger/src/bin/thing-flinger.rs similarity index 100% rename from deploy/src/bin/thing-flinger.rs rename to dev-tools/thing-flinger/src/bin/thing-flinger.rs From ba291b8ab2293eb3e4cdf85a1bae072d75343b5e Mon Sep 17 00:00:00 2001 From: Jordan Hendricks Date: Thu, 5 Oct 2023 14:06:04 -0700 Subject: [PATCH 03/23] Add example of using an SSH tunnel to access the console in development deployments (#4200) --- docs/how-to-run.adoc | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index aa1ee3c73d..04d274da8b 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -143,7 +143,10 @@ $ svcadm enable ipfilter Other network configurations are possible but beyond the scope of this doc. -When making this choice, note that **in order to use the system once it's set up, you will need to be able to access it from a web browser.** If you go with option 2 here, you may need to use an ssh tunnel or the like to do this. +When making this choice, note that **in order to use the system once it's set +up, you will need to be able to access it from a web browser.** If you go with +option 2 here, you may need to use an SSH tunnel (see: +<>) or the like to do this. === Picking a "machine" type @@ -433,7 +436,32 @@ Where did 192.168.1.20 come from? That's the external address of the external DNS server. We knew that because it's listed in the `external_dns_ips` entry of the `config-rss.toml` file we're using. -Having looked this up, the easiest thing will be to use `http://192.168.1.21` for your URL (replacing with `https` if you used a certificate, and replacing that IP if needed). If you've set up networking right, you should be able to reach this from your web browser. You may have to instruct the browser to accept a self-signed TLS certificate. See also <<_connecting_securely_with_tls_using_the_cli>>. +Having looked this up, the easiest thing will be to use `http://192.168.1.21` for your URL (replacing with `https` if you used a certificate, and replacing that IP if needed). If you've set up networking right, you should be able to reach this from your web browser. You may have to instruct the browser to accept a self-signed TLS certificate. See also <>. + +=== Setting up an SSH tunnel for console access + +If you set up a fake external network (method 2 in <>), one +way to be able to access the console of your deployment is by setting up an SSH +tunnel. Console access is required to use the CLI for device authentication. +The following is an example of how to access the console with an SSH tunnel. + +Nexus serves the console, so first get a nexus IP from the instructions above. + +In this example, Omicron is running on the lab machine `dunkin`. Usually, you'll +want to set up the tunnel from the machine where you run a browser, to the +machine running Omicron. In this example, one would run this on the machine +running the browser: + +``` +$ ssh -L 1234:192.168.1.22:80 dunkin.eng.oxide.computer +``` + +The above command configures `ssh` to bind to the TCP port `1234` on the machine +running the browser, forward packets through the ssh connection, and redirect +them to 192.168.1.22 port 80 *as seen from the other side of the connection*. + +Now you should be able to access the console from the browser on this machine, +via something like: `127.0.0.1:1234`, using the port from the `ssh` command. === Using the CLI From a2bb889cd21aeef7c287ee2da469a771bc684c01 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 5 Oct 2023 14:29:43 -0700 Subject: [PATCH 04/23] top-level cleanup: consolidate clients (#4214) --- Cargo.toml | 78 +++++++++---------- .../bootstrap-agent-client}/Cargo.toml | 0 .../bootstrap-agent-client}/src/lib.rs | 2 +- .../ddm-admin-client}/Cargo.toml | 0 .../ddm-admin-client}/build.rs | 14 ++-- .../ddm-admin-client}/src/lib.rs | 0 .../dns-service-client}/Cargo.toml | 0 .../dns-service-client}/src/lib.rs | 2 +- {dpd-client => clients/dpd-client}/Cargo.toml | 0 {dpd-client => clients/dpd-client}/build.rs | 18 ++--- {dpd-client => clients/dpd-client}/src/lib.rs | 0 .../gateway-client}/Cargo.toml | 0 .../gateway-client}/src/lib.rs | 2 +- .../installinator-artifact-client}/Cargo.toml | 0 .../installinator-artifact-client}/src/lib.rs | 2 +- .../nexus-client}/Cargo.toml | 0 .../nexus-client}/src/lib.rs | 2 +- .../oxide-client}/Cargo.toml | 0 .../oxide-client}/src/lib.rs | 2 +- .../oximeter-client}/Cargo.toml | 0 .../oximeter-client}/src/lib.rs | 2 +- .../sled-agent-client}/Cargo.toml | 0 .../sled-agent-client}/src/lib.rs | 2 +- .../wicketd-client}/Cargo.toml | 0 .../wicketd-client}/src/lib.rs | 2 +- 25 files changed, 64 insertions(+), 64 deletions(-) rename {bootstrap-agent-client => clients/bootstrap-agent-client}/Cargo.toml (100%) rename {bootstrap-agent-client => clients/bootstrap-agent-client}/src/lib.rs (97%) rename {ddm-admin-client => clients/ddm-admin-client}/Cargo.toml (100%) rename {ddm-admin-client => clients/ddm-admin-client}/build.rs (87%) rename {ddm-admin-client => clients/ddm-admin-client}/src/lib.rs (100%) rename {dns-service-client => clients/dns-service-client}/Cargo.toml (100%) rename {dns-service-client => clients/dns-service-client}/src/lib.rs (98%) rename {dpd-client => clients/dpd-client}/Cargo.toml (100%) rename {dpd-client => clients/dpd-client}/build.rs (87%) rename {dpd-client => clients/dpd-client}/src/lib.rs (100%) rename {gateway-client => clients/gateway-client}/Cargo.toml (100%) rename {gateway-client => clients/gateway-client}/src/lib.rs (98%) rename {installinator-artifact-client => clients/installinator-artifact-client}/Cargo.toml (100%) rename {installinator-artifact-client => clients/installinator-artifact-client}/src/lib.rs (96%) rename {nexus-client => clients/nexus-client}/Cargo.toml (100%) rename {nexus-client => clients/nexus-client}/src/lib.rs (99%) rename {oxide-client => clients/oxide-client}/Cargo.toml (100%) rename {oxide-client => clients/oxide-client}/src/lib.rs (99%) rename {oximeter-client => clients/oximeter-client}/Cargo.toml (100%) rename {oximeter-client => clients/oximeter-client}/src/lib.rs (93%) rename {sled-agent-client => clients/sled-agent-client}/Cargo.toml (100%) rename {sled-agent-client => clients/sled-agent-client}/src/lib.rs (99%) rename {wicketd-client => clients/wicketd-client}/Cargo.toml (100%) rename {wicketd-client => clients/wicketd-client}/src/lib.rs (99%) diff --git a/Cargo.toml b/Cargo.toml index 29291e8a19..1ca8a02886 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,26 +2,31 @@ members = [ "api_identity", "bootstore", - "bootstrap-agent-client", "caboose-util", "certificates", + "clients/bootstrap-agent-client", + "clients/ddm-admin-client", + "clients/dns-service-client", + "clients/dpd-client", + "clients/gateway-client", + "clients/installinator-artifact-client", + "clients/nexus-client", + "clients/oxide-client", + "clients/oximeter-client", + "clients/sled-agent-client", + "clients/wicketd-client", "common", "crdb-seed", - "ddm-admin-client", "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", - "dns-service-client", - "dpd-client", "end-to-end-tests", "gateway-cli", - "gateway-client", "gateway-test-utils", "gateway", "illumos-utils", - "installinator-artifact-client", "installinator-artifactd", "installinator-common", "installinator", @@ -29,7 +34,6 @@ members = [ "internal-dns", "ipcc-key-value", "key-manager", - "nexus-client", "nexus", "nexus/authz-macros", "nexus/db-macros", @@ -40,8 +44,6 @@ members = [ "nexus/test-utils-macros", "nexus/test-utils", "nexus/types", - "oxide-client", - "oximeter-client", "oximeter/collector", "oximeter/db", "oximeter/instruments", @@ -51,7 +53,6 @@ members = [ "package", "passwords", "rpaths", - "sled-agent-client", "sled-agent", "sled-hardware", "sp-sim", @@ -62,70 +63,69 @@ members = [ "wicket-common", "wicket-dbg", "wicket", - "wicketd-client", "wicketd", "workspace-hack", ] default-members = [ - "bootstrap-agent-client", "bootstore", "caboose-util", "certificates", + "clients/bootstrap-agent-client", + "clients/ddm-admin-client", + "clients/dns-service-client", + "clients/dpd-client", + "clients/gateway-client", + "clients/installinator-artifact-client", + "clients/nexus-client", + "clients/oxide-client", + "clients/oximeter-client", + "clients/sled-agent-client", + "clients/wicketd-client", "common", - "ddm-admin-client", - "dpd-client", "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", - "dns-service-client", - "gateway", "gateway-cli", - "gateway-client", "gateway-test-utils", + "gateway", "illumos-utils", - "installinator", - "installinator-artifact-client", "installinator-artifactd", "installinator-common", - "internal-dns", + "installinator", "internal-dns-cli", + "internal-dns", "ipcc-key-value", "key-manager", "nexus", - "nexus-client", "nexus/authz-macros", "nexus/db-macros", "nexus/db-model", "nexus/db-queries", "nexus/defaults", "nexus/types", - "oxide-client", - "oximeter-client", "oximeter/collector", "oximeter/db", "oximeter/instruments", - "oximeter/oximeter", "oximeter/oximeter-macro-impl", + "oximeter/oximeter", "oximeter/producer", "package", "passwords", "rpaths", "sled-agent", - "sled-agent-client", "sled-hardware", "sp-sim", "test-utils", - "tufaceous", "tufaceous-lib", + "tufaceous", "update-engine", - "wicket", "wicket-common", "wicket-dbg", + "wicket", "wicketd", - "wicketd-client", ] resolver = "2" @@ -144,7 +144,7 @@ bb8 = "0.8.1" bcs = "0.1.5" bincode = "1.3.3" bootstore = { path = "bootstore" } -bootstrap-agent-client = { path = "bootstrap-agent-client" } +bootstrap-agent-client = { path = "clients/bootstrap-agent-client" } buf-list = { version = "1.0.3", features = ["tokio1"] } byteorder = "1.4.3" bytes = "1.5.0" @@ -168,7 +168,7 @@ crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "20273 curve25519-dalek = "4" datatest-stable = "0.1.3" display-error-chain = "0.1.1" -ddm-admin-client = { path = "ddm-admin-client" } +ddm-admin-client = { path = "clients/ddm-admin-client" } db-macros = { path = "nexus/db-macros" } debug-ignore = "1.0.5" derive_more = "0.99.17" @@ -176,8 +176,8 @@ derive-where = "1.2.5" diesel = { version = "2.1.1", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } -dns-service-client = { path = "dns-service-client" } -dpd-client = { path = "dpd-client" } +dns-service-client = { path = "clients/dns-service-client" } +dpd-client = { path = "clients/dpd-client" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } either = "1.9.0" expectorate = "1.1.0" @@ -187,7 +187,7 @@ flume = "0.11.0" foreign-types = "0.3.2" fs-err = "2.9.0" futures = "0.3.28" -gateway-client = { path = "gateway-client" } +gateway-client = { path = "clients/gateway-client" } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "1e180ae55e56bd17af35cb868ffbd18ce487351d", default-features = false, features = ["std"] } gateway-sp-comms = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "1e180ae55e56bd17af35cb868ffbd18ce487351d" } gateway-test-utils = { path = "gateway-test-utils" } @@ -209,7 +209,7 @@ indexmap = "2.0.0" indicatif = { version = "0.17.6", features = ["rayon"] } installinator = { path = "installinator" } installinator-artifactd = { path = "installinator-artifactd" } -installinator-artifact-client = { path = "installinator-artifact-client" } +installinator-artifact-client = { path = "clients/installinator-artifact-client" } installinator-common = { path = "installinator-common" } internal-dns = { path = "internal-dns" } ipcc-key-value = { path = "ipcc-key-value" } @@ -223,7 +223,7 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } mime_guess = "2.0.4" mockall = "0.11" newtype_derive = "0.1.6" -nexus-client = { path = "nexus-client" } +nexus-client = { path = "clients/nexus-client" } nexus-db-model = { path = "nexus/db-model" } nexus-db-queries = { path = "nexus/db-queries" } nexus-defaults = { path = "nexus/defaults" } @@ -244,7 +244,7 @@ omicron-rpaths = { path = "rpaths" } omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.8.3" -oxide-client = { path = "oxide-client" } +oxide-client = { path = "clients/oxide-client" } oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "98d33125413f01722947e322f82caf9d22209434", features = [ "api", "std" ] } once_cell = "1.18.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } @@ -257,7 +257,7 @@ opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "98d33125413 oso = "0.26" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } -oximeter-client = { path = "oximeter-client" } +oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/" } oximeter-collector = { path = "oximeter/collector" } oximeter-instruments = { path = "oximeter/instruments" } @@ -315,7 +315,7 @@ signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = [ "futures-v0_3" ] } similar-asserts = "1.5.0" sled = "0.34" -sled-agent-client = { path = "sled-agent-client" } +sled-agent-client = { path = "clients/sled-agent-client" } sled-hardware = { path = "sled-hardware" } slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "release_max_level_debug" ] } slog-async = "2.8" @@ -370,7 +370,7 @@ usdt = "0.3" walkdir = "2.4" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } -wicketd-client = { path = "wicketd-client" } +wicketd-client = { path = "clients/wicketd-client" } zeroize = { version = "1.6.0", features = ["zeroize_derive", "std"] } zip = { version = "0.6.6", default-features = false, features = ["deflate","bzip2"] } zone = { version = "0.2", default-features = false, features = ["async"] } diff --git a/bootstrap-agent-client/Cargo.toml b/clients/bootstrap-agent-client/Cargo.toml similarity index 100% rename from bootstrap-agent-client/Cargo.toml rename to clients/bootstrap-agent-client/Cargo.toml diff --git a/bootstrap-agent-client/src/lib.rs b/clients/bootstrap-agent-client/src/lib.rs similarity index 97% rename from bootstrap-agent-client/src/lib.rs rename to clients/bootstrap-agent-client/src/lib.rs index 5a159e299a..3f8b20e1f5 100644 --- a/bootstrap-agent-client/src/lib.rs +++ b/clients/bootstrap-agent-client/src/lib.rs @@ -5,7 +5,7 @@ //! Interface for making API requests to a Bootstrap Agent progenitor::generate_api!( - spec = "../openapi/bootstrap-agent.json", + spec = "../../openapi/bootstrap-agent.json", inner_type = slog::Logger, pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; diff --git a/ddm-admin-client/Cargo.toml b/clients/ddm-admin-client/Cargo.toml similarity index 100% rename from ddm-admin-client/Cargo.toml rename to clients/ddm-admin-client/Cargo.toml diff --git a/ddm-admin-client/build.rs b/clients/ddm-admin-client/build.rs similarity index 87% rename from ddm-admin-client/build.rs rename to clients/ddm-admin-client/build.rs index ef4183fee3..e3c1345eda 100644 --- a/ddm-admin-client/build.rs +++ b/clients/ddm-admin-client/build.rs @@ -16,23 +16,23 @@ use std::path::Path; fn main() -> Result<()> { // Find the current maghemite repo commit from our package manifest. - let manifest = fs::read_to_string("../package-manifest.toml") - .context("failed to read ../package-manifest.toml")?; - println!("cargo:rerun-if-changed=../package-manifest.toml"); + let manifest = fs::read_to_string("../../package-manifest.toml") + .context("failed to read ../../package-manifest.toml")?; + println!("cargo:rerun-if-changed=../../package-manifest.toml"); let config: Config = toml::from_str(&manifest) - .context("failed to parse ../package-manifest.toml")?; + .context("failed to parse ../../package-manifest.toml")?; let maghemite = config .packages .get("maghemite") - .context("missing maghemite package in ../package-manifest.toml")?; + .context("missing maghemite package in ../../package-manifest.toml")?; let local_path = match &maghemite.source { PackageSource::Prebuilt { commit, .. } => { // Report a relatively verbose error if we haven't downloaded the requisite // openapi spec. let local_path = - format!("../out/downloads/ddm-admin-{commit}.json"); + format!("../../out/downloads/ddm-admin-{commit}.json"); if !Path::new(&local_path).exists() { bail!("{local_path} doesn't exist; rerun `tools/ci_download_maghemite_openapi` (after updating `tools/maghemite_openapi_version` if the maghemite commit in package-manifest.toml has changed)"); } @@ -42,7 +42,7 @@ fn main() -> Result<()> { PackageSource::Manual => { let local_path = - "../out/downloads/ddm-admin-manual.json".to_string(); + "../../out/downloads/ddm-admin-manual.json".to_string(); if !Path::new(&local_path).exists() { bail!("{local_path} doesn't exist, please copy manually built ddm-admin.json there!"); } diff --git a/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs similarity index 100% rename from ddm-admin-client/src/lib.rs rename to clients/ddm-admin-client/src/lib.rs diff --git a/dns-service-client/Cargo.toml b/clients/dns-service-client/Cargo.toml similarity index 100% rename from dns-service-client/Cargo.toml rename to clients/dns-service-client/Cargo.toml diff --git a/dns-service-client/src/lib.rs b/clients/dns-service-client/src/lib.rs similarity index 98% rename from dns-service-client/src/lib.rs rename to clients/dns-service-client/src/lib.rs index 9b729b1c5c..931e68322f 100644 --- a/dns-service-client/src/lib.rs +++ b/clients/dns-service-client/src/lib.rs @@ -3,7 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. progenitor::generate_api!( - spec = "../openapi/dns-server.json", + spec = "../../openapi/dns-server.json", inner_type = slog::Logger, derives = [schemars::JsonSchema, Eq, PartialEq], pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { diff --git a/dpd-client/Cargo.toml b/clients/dpd-client/Cargo.toml similarity index 100% rename from dpd-client/Cargo.toml rename to clients/dpd-client/Cargo.toml diff --git a/dpd-client/build.rs b/clients/dpd-client/build.rs similarity index 87% rename from dpd-client/build.rs rename to clients/dpd-client/build.rs index 2aaa8437e7..6a65ab9495 100644 --- a/dpd-client/build.rs +++ b/clients/dpd-client/build.rs @@ -22,23 +22,23 @@ use std::path::Path; fn main() -> Result<()> { // Find the current dendrite repo commit from our package manifest. - let manifest = fs::read_to_string("../package-manifest.toml") - .context("failed to read ../package-manifest.toml")?; - println!("cargo:rerun-if-changed=../package-manifest.toml"); + let manifest = fs::read_to_string("../../package-manifest.toml") + .context("failed to read ../../package-manifest.toml")?; + println!("cargo:rerun-if-changed=../../package-manifest.toml"); let config: Config = toml::from_str(&manifest) - .context("failed to parse ../package-manifest.toml")?; + .context("failed to parse ../../package-manifest.toml")?; let dendrite = config .packages .get("dendrite-asic") - .context("missing dendrite package in ../package-manifest.toml")?; + .context("missing dendrite package in ../../package-manifest.toml")?; let local_path = match &dendrite.source { PackageSource::Prebuilt { commit, .. } => { - // Report a relatively verbose error if we haven't downloaded the requisite - // openapi spec. - let local_path = format!("../out/downloads/dpd-{commit}.json"); + // Report a relatively verbose error if we haven't downloaded the + // requisite openapi spec. + let local_path = format!("../../out/downloads/dpd-{commit}.json"); if !Path::new(&local_path).exists() { bail!("{local_path} doesn't exist; rerun `tools/ci_download_dendrite_openapi` (after updating `tools/dendrite_openapi_version` if the dendrite commit in package-manifest.toml has changed)"); } @@ -47,7 +47,7 @@ fn main() -> Result<()> { } PackageSource::Manual => { - let local_path = "../out/downloads/dpd-manual.json".to_string(); + let local_path = "../../out/downloads/dpd-manual.json".to_string(); if !Path::new(&local_path).exists() { bail!("{local_path} doesn't exist, please copy manually built dpd.json there!"); } diff --git a/dpd-client/src/lib.rs b/clients/dpd-client/src/lib.rs similarity index 100% rename from dpd-client/src/lib.rs rename to clients/dpd-client/src/lib.rs diff --git a/gateway-client/Cargo.toml b/clients/gateway-client/Cargo.toml similarity index 100% rename from gateway-client/Cargo.toml rename to clients/gateway-client/Cargo.toml diff --git a/gateway-client/src/lib.rs b/clients/gateway-client/src/lib.rs similarity index 98% rename from gateway-client/src/lib.rs rename to clients/gateway-client/src/lib.rs index 7992eff9e4..800254b197 100644 --- a/gateway-client/src/lib.rs +++ b/clients/gateway-client/src/lib.rs @@ -34,7 +34,7 @@ // it is no longer useful to directly expose the JsonSchema types, we can go // back to reusing `omicron_common`. progenitor::generate_api!( - spec = "../openapi/gateway.json", + spec = "../../openapi/gateway.json", inner_type = slog::Logger, pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; diff --git a/installinator-artifact-client/Cargo.toml b/clients/installinator-artifact-client/Cargo.toml similarity index 100% rename from installinator-artifact-client/Cargo.toml rename to clients/installinator-artifact-client/Cargo.toml diff --git a/installinator-artifact-client/src/lib.rs b/clients/installinator-artifact-client/src/lib.rs similarity index 96% rename from installinator-artifact-client/src/lib.rs rename to clients/installinator-artifact-client/src/lib.rs index aa5ceb863a..de3072a34a 100644 --- a/installinator-artifact-client/src/lib.rs +++ b/clients/installinator-artifact-client/src/lib.rs @@ -5,7 +5,7 @@ //! Interface for making API requests to installinator-artifactd. progenitor::generate_api!( - spec = "../openapi/installinator-artifactd.json", + spec = "../../openapi/installinator-artifactd.json", inner_type = slog::Logger, pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; diff --git a/nexus-client/Cargo.toml b/clients/nexus-client/Cargo.toml similarity index 100% rename from nexus-client/Cargo.toml rename to clients/nexus-client/Cargo.toml diff --git a/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs similarity index 99% rename from nexus-client/src/lib.rs rename to clients/nexus-client/src/lib.rs index e5cec83f39..412ca70497 100644 --- a/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -8,7 +8,7 @@ use std::collections::HashMap; progenitor::generate_api!( - spec = "../openapi/nexus-internal.json", + spec = "../../openapi/nexus-internal.json", derives = [schemars::JsonSchema, PartialEq], inner_type = slog::Logger, pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { diff --git a/oxide-client/Cargo.toml b/clients/oxide-client/Cargo.toml similarity index 100% rename from oxide-client/Cargo.toml rename to clients/oxide-client/Cargo.toml diff --git a/oxide-client/src/lib.rs b/clients/oxide-client/src/lib.rs similarity index 99% rename from oxide-client/src/lib.rs rename to clients/oxide-client/src/lib.rs index 7d34697002..07a190c38e 100644 --- a/oxide-client/src/lib.rs +++ b/clients/oxide-client/src/lib.rs @@ -16,7 +16,7 @@ use trust_dns_resolver::config::{ use trust_dns_resolver::TokioAsyncResolver; progenitor::generate_api!( - spec = "../openapi/nexus.json", + spec = "../../openapi/nexus.json", interface = Builder, tags = Separate, ); diff --git a/oximeter-client/Cargo.toml b/clients/oximeter-client/Cargo.toml similarity index 100% rename from oximeter-client/Cargo.toml rename to clients/oximeter-client/Cargo.toml diff --git a/oximeter-client/src/lib.rs b/clients/oximeter-client/src/lib.rs similarity index 93% rename from oximeter-client/src/lib.rs rename to clients/oximeter-client/src/lib.rs index 9f326fdee8..7bd17d7e76 100644 --- a/oximeter-client/src/lib.rs +++ b/clients/oximeter-client/src/lib.rs @@ -6,7 +6,7 @@ //! Interface for API requests to an Oximeter metric collection server -omicron_common::generate_logging_api!("../openapi/oximeter.json"); +omicron_common::generate_logging_api!("../../openapi/oximeter.json"); impl omicron_common::api::external::ClientError for types::Error { fn message(&self) -> String { diff --git a/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml similarity index 100% rename from sled-agent-client/Cargo.toml rename to clients/sled-agent-client/Cargo.toml diff --git a/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs similarity index 99% rename from sled-agent-client/src/lib.rs rename to clients/sled-agent-client/src/lib.rs index 98e7f207e3..68e60e8d95 100644 --- a/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -9,7 +9,7 @@ use omicron_common::generate_logging_api; use std::convert::TryFrom; use uuid::Uuid; -generate_logging_api!("../openapi/sled-agent.json"); +generate_logging_api!("../../openapi/sled-agent.json"); impl omicron_common::api::external::ClientError for types::Error { fn message(&self) -> String { diff --git a/wicketd-client/Cargo.toml b/clients/wicketd-client/Cargo.toml similarity index 100% rename from wicketd-client/Cargo.toml rename to clients/wicketd-client/Cargo.toml diff --git a/wicketd-client/src/lib.rs b/clients/wicketd-client/src/lib.rs similarity index 99% rename from wicketd-client/src/lib.rs rename to clients/wicketd-client/src/lib.rs index 3f113ea271..ff45232520 100644 --- a/wicketd-client/src/lib.rs +++ b/clients/wicketd-client/src/lib.rs @@ -5,7 +5,7 @@ //! Interface for making API requests to wicketd progenitor::generate_api!( - spec = "../openapi/wicketd.json", + spec = "../../openapi/wicketd.json", inner_type = slog::Logger, pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { slog::debug!(log, "client request"; From fb2b4a1d3286bb0c1bb6207fd1aa541037709295 Mon Sep 17 00:00:00 2001 From: Luqman Aden Date: Thu, 5 Oct 2023 15:16:22 -0700 Subject: [PATCH 05/23] tools/install_opte: Freeze the package to the pinned version. (#4215) --- tools/install_opte.sh | 27 +++++++++++++++++++++++++++ tools/uninstall_opte.sh | 14 ++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/tools/install_opte.sh b/tools/install_opte.sh index f670adf163..20a33b05a5 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -51,6 +51,26 @@ fi # Grab the version of the opte package to install OPTE_VERSION="$(cat "$OMICRON_TOP/tools/opte_version")" +OMICRON_FROZEN_PKG_COMMENT="OMICRON-PINNED-PACKAGE" + +# Once we install, we mark the package as frozen at that particular version. +# This makes sure that a `pkg update` won't automatically move us forward +# (and hence defeat the whole point of pinning). +# But this also prevents us from installig the next version so we must +# unfreeze first. +if PKG_FROZEN=$(pkg freeze | grep driver/network/opte); then + FROZEN_COMMENT=$(echo "$PKG_FROZEN" | awk '{ print $(NF) }') + + # Compare the comment to make sure this is indeed our previous doing + if [ "$FROZEN_COMMENT" != "$OMICRON_FROZEN_PKG_COMMENT" ]; then + echo "Found driver/network/opte previously frozen but not by us:" + echo $PKG_FROZEN + exit 1 + fi + + pfexec pkg unfreeze driver/network/opte +fi + # Actually install the xde kernel module and opteadm tool RC=0 pfexec pkg install -v pkg://helios-dev/driver/network/opte@"$OPTE_VERSION" || RC=$? @@ -63,6 +83,13 @@ else exit "$RC" fi +RC=0 +pfexec pkg freeze -c "$OMICRON_FROZEN_PKG_COMMENT" driver/network/opte@"$OPTE_VERSION" || RC=$? +if [[ "$RC" -ne 0 ]]; then + echo "Failed to pin opte package to $OPTE_VERSION" + exit $RC +fi + # Check the user's path RC=0 which opteadm > /dev/null || RC=$? diff --git a/tools/uninstall_opte.sh b/tools/uninstall_opte.sh index a833d029aa..c8ee0f5b28 100755 --- a/tools/uninstall_opte.sh +++ b/tools/uninstall_opte.sh @@ -165,6 +165,19 @@ function restore_xde_and_opte { fi } +function unfreeze_opte_pkg { + OMICRON_FROZEN_PKG_COMMENT="OMICRON-PINNED-PACKAGE" + + # If we've frozen a particular version, let's be good citizens + # and clear that as well. + if PKG_FROZEN=$(pkg freeze | grep driver/network/opte); then + FROZEN_COMMENT=$(echo "$PKG_FROZEN" | awk '{ print $(NF) }') + if [ "$FROZEN_COMMENT" == "$OMICRON_FROZEN_PKG_COMMENT" ]; then + pkg unfreeze driver/network/opte + fi + fi +} + function ensure_not_already_on_helios { local RC=0 pkg list "$STOCK_CONSOLIDATION"* || RC=$? @@ -179,5 +192,6 @@ uninstall_xde_and_opte for PUBLISHER in "${PUBLISHERS[@]}"; do remove_publisher "$PUBLISHER" done +unfreeze_opte_pkg ensure_not_already_on_helios to_stock_helios "$CONSOLIDATION" From c76ca69d34729d430f2779bfabb556c33eaf9bd6 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 5 Oct 2023 18:22:25 -0500 Subject: [PATCH 06/23] [trivial] add doc comments to snapshot ID and image ID on disk response type (#4217) Noticed these fields on the disk response, wondered what they are, and was surprised to see no description in the docs. https://docs-2meozyi0z-oxidecomputer.vercel.app/api/disk_view image --- common/src/api/external/mod.rs | 2 ++ openapi/nexus.json | 2 ++ 2 files changed, 4 insertions(+) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 1d7e6884d1..91ed7e4240 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -952,7 +952,9 @@ pub struct Disk { #[serde(flatten)] pub identity: IdentityMetadata, pub project_id: Uuid, + /// ID of snapshot from which disk was created, if any pub snapshot_id: Option, + /// ID of image from which disk was created, if any pub image_id: Option, pub size: ByteCount, pub block_size: ByteCount, diff --git a/openapi/nexus.json b/openapi/nexus.json index 779b1f556c..9330b0ef47 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -9615,6 +9615,7 @@ }, "image_id": { "nullable": true, + "description": "ID of image from which disk was created, if any", "type": "string", "format": "uuid" }, @@ -9635,6 +9636,7 @@ }, "snapshot_id": { "nullable": true, + "description": "ID of snapshot from which disk was created, if any", "type": "string", "format": "uuid" }, From 230637ab5c22e4e0d6c82d5198887e2240289958 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 5 Oct 2023 19:25:47 -0700 Subject: [PATCH 07/23] `omdb` support for showing devices visible to MGS (#4162) --- Cargo.lock | 5 + clients/gateway-client/src/lib.rs | 16 +- dev-tools/omdb/Cargo.toml | 5 +- dev-tools/omdb/src/bin/omdb/main.rs | 4 + dev-tools/omdb/src/bin/omdb/mgs.rs | 488 ++++++++++++++++++ dev-tools/omdb/tests/successes.out | 105 ++++ dev-tools/omdb/tests/test_all_output.rs | 16 +- dev-tools/omdb/tests/usage_errors.out | 20 + dev-tools/omicron-dev/Cargo.toml | 2 + dev-tools/omicron-dev/src/bin/omicron-dev.rs | 38 ++ .../output/cmd-omicron-dev-noargs-stderr | 1 + 11 files changed, 697 insertions(+), 3 deletions(-) create mode 100644 dev-tools/omdb/src/bin/omdb/mgs.rs diff --git a/Cargo.lock b/Cargo.lock index 27a165c307..aad3a8782a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4951,6 +4951,8 @@ dependencies = [ "dropshot", "expectorate", "futures", + "gateway-messages", + "gateway-test-utils", "libc", "nexus-test-interface", "nexus-test-utils", @@ -5140,6 +5142,9 @@ dependencies = [ "dropshot", "expectorate", "futures", + "gateway-client", + "gateway-messages", + "gateway-test-utils", "humantime", "internal-dns 0.1.0", "ipnetwork", diff --git a/clients/gateway-client/src/lib.rs b/clients/gateway-client/src/lib.rs index 800254b197..b071d34975 100644 --- a/clients/gateway-client/src/lib.rs +++ b/clients/gateway-client/src/lib.rs @@ -48,7 +48,7 @@ progenitor::generate_api!( }), derives = [schemars::JsonSchema], patch = { - SpIdentifier = { derives = [Copy, PartialEq, Hash, Eq, PartialOrd, Ord, Serialize, Deserialize] }, + SpIdentifier = { derives = [Copy, PartialEq, Hash, Eq, Serialize, Deserialize] }, SpIgnition = { derives = [PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] }, SpIgnitionSystemType = { derives = [Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] }, SpState = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] }, @@ -59,3 +59,17 @@ progenitor::generate_api!( HostPhase2RecoveryImageId = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] }, }, ); + +// Override the impl of Ord for SpIdentifier because the default one orders the +// fields in a different order than people are likely to want. +impl Ord for crate::types::SpIdentifier { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.type_.cmp(&other.type_).then(self.slot.cmp(&other.slot)) + } +} + +impl PartialOrd for crate::types::SpIdentifier { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index cd4af6e947..ff3c650d6d 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -14,9 +14,12 @@ chrono.workspace = true clap.workspace = true diesel.workspace = true dropshot.workspace = true +futures.workspace = true +gateway-client.workspace = true +gateway-messages.workspace = true +gateway-test-utils.workspace = true humantime.workspace = true internal-dns.workspace = true -futures.workspace = true nexus-client.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs index d1a56e1d80..32141d2809 100644 --- a/dev-tools/omdb/src/bin/omdb/main.rs +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -41,6 +41,7 @@ use std::net::SocketAddr; use std::net::SocketAddrV6; mod db; +mod mgs; mod nexus; mod oximeter; mod sled_agent; @@ -57,6 +58,7 @@ async fn main() -> Result<(), anyhow::Error> { match &args.command { OmdbCommands::Db(db) => db.run_cmd(&args, &log).await, + OmdbCommands::Mgs(mgs) => mgs.run_cmd(&args, &log).await, OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await, OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&log).await, OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await, @@ -155,6 +157,8 @@ impl Omdb { enum OmdbCommands { /// Query the control plane database (CockroachDB) Db(db::DbArgs), + /// Debug a specific Management Gateway Service instance + Mgs(mgs::MgsArgs), /// Debug a specific Nexus instance Nexus(nexus::NexusArgs), /// Query oximeter collector state diff --git a/dev-tools/omdb/src/bin/omdb/mgs.rs b/dev-tools/omdb/src/bin/omdb/mgs.rs new file mode 100644 index 0000000000..d2938418e1 --- /dev/null +++ b/dev-tools/omdb/src/bin/omdb/mgs.rs @@ -0,0 +1,488 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Prototype code for collecting information from systems in the rack + +use crate::Omdb; +use anyhow::Context; +use clap::Args; +use clap::Subcommand; +use futures::StreamExt; +use gateway_client::types::PowerState; +use gateway_client::types::RotSlot; +use gateway_client::types::RotState; +use gateway_client::types::SpComponentCaboose; +use gateway_client::types::SpComponentInfo; +use gateway_client::types::SpIdentifier; +use gateway_client::types::SpIgnition; +use gateway_client::types::SpIgnitionInfo; +use gateway_client::types::SpIgnitionSystemType; +use gateway_client::types::SpState; +use gateway_client::types::SpType; +use tabled::Tabled; + +/// Arguments to the "omdb mgs" subcommand +#[derive(Debug, Args)] +pub struct MgsArgs { + /// URL of an MGS instance to query + #[clap(long, env("OMDB_MGS_URL"))] + mgs_url: Option, + + #[command(subcommand)] + command: MgsCommands, +} + +#[derive(Debug, Subcommand)] +enum MgsCommands { + /// Show information about devices and components visible to MGS + Inventory(InventoryArgs), +} + +#[derive(Debug, Args)] +struct InventoryArgs {} + +impl MgsArgs { + pub(crate) async fn run_cmd( + &self, + omdb: &Omdb, + log: &slog::Logger, + ) -> Result<(), anyhow::Error> { + let mgs_url = match &self.mgs_url { + Some(cli_or_env_url) => cli_or_env_url.clone(), + None => { + eprintln!( + "note: MGS URL not specified. Will pick one from DNS." + ); + let addrs = omdb + .dns_lookup_all( + log.clone(), + internal_dns::ServiceName::ManagementGatewayService, + ) + .await?; + let addr = addrs.into_iter().next().expect( + "expected at least one MGS address from \ + successful DNS lookup", + ); + format!("http://{}", addr) + } + }; + eprintln!("note: using MGS URL {}", &mgs_url); + let mgs_client = gateway_client::Client::new(&mgs_url, log.clone()); + + match &self.command { + MgsCommands::Inventory(inventory_args) => { + cmd_mgs_inventory(&mgs_client, inventory_args).await + } + } + } +} + +/// Runs `omdb mgs inventory` +/// +/// Shows devices and components that are visible to an MGS instance. +async fn cmd_mgs_inventory( + mgs_client: &gateway_client::Client, + _args: &InventoryArgs, +) -> Result<(), anyhow::Error> { + // Report all the SP identifiers that MGS is configured to talk to. + println!("ALL CONFIGURED SPs\n"); + let mut sp_ids = mgs_client + .sp_all_ids() + .await + .context("listing SP identifiers")? + .into_inner(); + sp_ids.sort(); + show_sp_ids(&sp_ids)?; + println!(""); + + // Report which SPs are visible via Ignition. + println!("SPs FOUND THROUGH IGNITION\n"); + let mut sp_list_ignition = mgs_client + .ignition_list() + .await + .context("listing ignition")? + .into_inner(); + sp_list_ignition.sort_by(|a, b| a.id.cmp(&b.id)); + show_sps_from_ignition(&sp_list_ignition)?; + println!(""); + + // Print basic state about each SP that's visible to ignition. + println!("SERVICE PROCESSOR STATES\n"); + let mgs_client = std::sync::Arc::new(mgs_client); + let c = &mgs_client; + let mut sp_infos = + futures::stream::iter(sp_list_ignition.iter().filter_map(|ignition| { + if matches!(ignition.details, SpIgnition::Yes { .. }) { + Some(ignition.id) + } else { + None + } + })) + .then(|sp_id| async move { + c.sp_get(sp_id.type_, sp_id.slot) + .await + .with_context(|| format!("fetching info about SP {:?}", sp_id)) + .map(|s| (sp_id, s)) + }) + .collect::>>() + .await + .into_iter() + .filter_map(|r| match r { + Ok((sp_id, v)) => Some((sp_id, v.into_inner())), + Err(error) => { + eprintln!("error: {:?}", error); + None + } + }) + .collect::>(); + sp_infos.sort(); + show_sp_states(&sp_infos)?; + println!(""); + + // Print detailed information about each SP that we've found so far. + for (sp_id, sp_state) in &sp_infos { + show_sp_details(&mgs_client, sp_id, sp_state).await?; + } + + Ok(()) +} + +fn sp_type_to_str(s: &SpType) -> &'static str { + match s { + SpType::Sled => "Sled", + SpType::Power => "Power", + SpType::Switch => "Switch", + } +} + +fn show_sp_ids(sp_ids: &[SpIdentifier]) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct SpIdRow { + #[tabled(rename = "TYPE")] + type_: &'static str, + slot: u32, + } + + impl<'a> From<&'a SpIdentifier> for SpIdRow { + fn from(id: &SpIdentifier) -> Self { + SpIdRow { type_: sp_type_to_str(&id.type_), slot: id.slot } + } + } + + let table_rows = sp_ids.iter().map(SpIdRow::from); + let table = tabled::Table::new(table_rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", textwrap::indent(&table.to_string(), " ")); + Ok(()) +} + +fn show_sps_from_ignition( + sp_list_ignition: &[SpIgnitionInfo], +) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct IgnitionRow { + #[tabled(rename = "TYPE")] + type_: &'static str, + slot: u32, + system_type: String, + } + + impl<'a> From<&'a SpIgnitionInfo> for IgnitionRow { + fn from(value: &SpIgnitionInfo) -> Self { + IgnitionRow { + type_: sp_type_to_str(&value.id.type_), + slot: value.id.slot, + system_type: match value.details { + SpIgnition::No => "-".to_string(), + SpIgnition::Yes { + id: SpIgnitionSystemType::Gimlet, + .. + } => "Gimlet".to_string(), + SpIgnition::Yes { + id: SpIgnitionSystemType::Sidecar, + .. + } => "Sidecar".to_string(), + SpIgnition::Yes { + id: SpIgnitionSystemType::Psc, .. + } => "PSC".to_string(), + SpIgnition::Yes { + id: SpIgnitionSystemType::Unknown(v), + .. + } => format!("unknown: type {}", v), + }, + } + } + } + + let table_rows = sp_list_ignition.iter().map(IgnitionRow::from); + let table = tabled::Table::new(table_rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", textwrap::indent(&table.to_string(), " ")); + Ok(()) +} + +fn show_sp_states( + sp_states: &[(SpIdentifier, SpState)], +) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct SpStateRow<'a> { + #[tabled(rename = "TYPE")] + type_: &'static str, + slot: u32, + model: String, + serial: String, + rev: u32, + hubris: &'a str, + pwr: &'static str, + rot_active: String, + } + + impl<'a> From<&'a (SpIdentifier, SpState)> for SpStateRow<'a> { + fn from((id, v): &'a (SpIdentifier, SpState)) -> Self { + SpStateRow { + type_: sp_type_to_str(&id.type_), + slot: id.slot, + model: v.model.clone(), + serial: v.serial_number.clone(), + rev: v.revision, + hubris: &v.hubris_archive_id, + pwr: match v.power_state { + PowerState::A0 => "A0", + PowerState::A1 => "A1", + PowerState::A2 => "A2", + }, + rot_active: match &v.rot { + RotState::CommunicationFailed { message } => { + format!("error: {}", message) + } + RotState::Enabled { active: RotSlot::A, .. } => { + "slot A".to_string() + } + RotState::Enabled { active: RotSlot::B, .. } => { + "slot B".to_string() + } + }, + } + } + } + + let table_rows = sp_states.iter().map(SpStateRow::from); + let table = tabled::Table::new(table_rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", textwrap::indent(&table.to_string(), " ")); + Ok(()) +} + +const COMPONENTS_WITH_CABOOSES: &'static [&'static str] = &["sp", "rot"]; + +async fn show_sp_details( + mgs_client: &gateway_client::Client, + sp_id: &SpIdentifier, + sp_state: &SpState, +) -> Result<(), anyhow::Error> { + println!( + "SP DETAILS: type {:?} slot {}\n", + sp_type_to_str(&sp_id.type_), + sp_id.slot + ); + + println!(" ROOT OF TRUST\n"); + match &sp_state.rot { + RotState::CommunicationFailed { message } => { + println!(" error: {}", message); + } + RotState::Enabled { + active, + pending_persistent_boot_preference, + persistent_boot_preference, + slot_a_sha3_256_digest, + slot_b_sha3_256_digest, + transient_boot_preference, + } => { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct Row { + name: &'static str, + value: String, + } + + let rows = vec![ + Row { + name: "active slot", + value: format!("slot {:?}", active), + }, + Row { + name: "persistent boot preference", + value: format!("slot {:?}", persistent_boot_preference), + }, + Row { + name: "pending persistent boot preference", + value: pending_persistent_boot_preference + .map(|s| format!("slot {:?}", s)) + .unwrap_or_else(|| "-".to_string()), + }, + Row { + name: "transient boot preference", + value: transient_boot_preference + .map(|s| format!("slot {:?}", s)) + .unwrap_or_else(|| "-".to_string()), + }, + Row { + name: "slot A SHA3 256 digest", + value: slot_a_sha3_256_digest + .clone() + .unwrap_or_else(|| "-".to_string()), + }, + Row { + name: "slot B SHA3 256 digest", + value: slot_b_sha3_256_digest + .clone() + .unwrap_or_else(|| "-".to_string()), + }, + ]; + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", textwrap::indent(&table.to_string(), " ")); + println!(""); + } + } + + let component_list = mgs_client + .sp_component_list(sp_id.type_, sp_id.slot) + .await + .with_context(|| format!("fetching components for SP {:?}", sp_id)); + let list = match component_list { + Ok(l) => l.into_inner(), + Err(e) => { + eprintln!("error: {:#}", e); + return Ok(()); + } + }; + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct SpComponentRow<'a> { + name: &'a str, + description: &'a str, + device: &'a str, + presence: String, + serial: String, + } + + impl<'a> From<&'a SpComponentInfo> for SpComponentRow<'a> { + fn from(v: &'a SpComponentInfo) -> Self { + SpComponentRow { + name: &v.component, + description: &v.description, + device: &v.device, + presence: format!("{:?}", v.presence), + serial: format!("{:?}", v.serial_number), + } + } + } + + if list.components.is_empty() { + println!(" COMPONENTS: none found\n"); + return Ok(()); + } + + let table_rows = list.components.iter().map(SpComponentRow::from); + let table = tabled::Table::new(table_rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!(" COMPONENTS\n"); + println!("{}", textwrap::indent(&table.to_string(), " ")); + println!(""); + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct CabooseRow { + component: String, + board: String, + git_commit: String, + name: String, + version: String, + } + + impl<'a> From<(&'a SpIdentifier, &'a SpComponentInfo, SpComponentCaboose)> + for CabooseRow + { + fn from( + (_sp_id, component, caboose): ( + &'a SpIdentifier, + &'a SpComponentInfo, + SpComponentCaboose, + ), + ) -> Self { + CabooseRow { + component: component.component.clone(), + board: caboose.board, + git_commit: caboose.git_commit, + name: caboose.name, + version: caboose.version.unwrap_or_else(|| "-".to_string()), + } + } + } + + let mut cabooses = Vec::new(); + for c in &list.components { + if !COMPONENTS_WITH_CABOOSES.contains(&c.component.as_str()) { + continue; + } + + for i in 0..1 { + let r = mgs_client + .sp_component_caboose_get( + sp_id.type_, + sp_id.slot, + &c.component, + i, + ) + .await + .with_context(|| { + format!( + "get caboose for sp type {:?} sp slot {} \ + component {:?} slot {}", + sp_id.type_, sp_id.slot, &c.component, i + ) + }); + match r { + Ok(v) => { + cabooses.push(CabooseRow::from((sp_id, c, v.into_inner()))) + } + Err(error) => { + eprintln!("warn: {:#}", error); + } + } + } + } + + if cabooses.is_empty() { + println!(" CABOOSES: none found\n"); + return Ok(()); + } + + let table = tabled::Table::new(cabooses) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!(" COMPONENT CABOOSES\n"); + println!("{}", textwrap::indent(&table.to_string(), " ")); + println!(""); + + Ok(()) +} diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index b1464cb824..eb075a84ea 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -84,6 +84,111 @@ stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected (5.0.0) ============================================= +EXECUTING COMMAND: omdb ["mgs", "inventory"] +termination: Exited(0) +--------------------------------------------- +stdout: +ALL CONFIGURED SPs + + TYPE SLOT + Sled 0 + Sled 1 + Switch 0 + Switch 1 + +SPs FOUND THROUGH IGNITION + + TYPE SLOT SYSTEM_TYPE + Sled 0 Gimlet + Sled 1 Gimlet + Switch 0 Sidecar + Switch 1 Sidecar + +SERVICE PROCESSOR STATES + + TYPE SLOT MODEL SERIAL REV HUBRIS PWR ROT_ACTIVE + Sled 0 FAKE_SIM_GIMLET SimGimlet00 0 0000000000000000 A2 slot A + Sled 1 FAKE_SIM_GIMLET SimGimlet01 0 0000000000000000 A2 slot A + Switch 0 FAKE_SIM_SIDECAR SimSidecar0 0 0000000000000000 A2 slot A + Switch 1 FAKE_SIM_SIDECAR SimSidecar1 0 0000000000000000 A2 slot A + +SP DETAILS: type "Sled" slot 0 + + ROOT OF TRUST + + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A SHA3 256 digest - + slot B SHA3 256 digest - + + COMPONENTS + + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + dev-0 FAKE temperature sensor fake-tmp-sensor Failed None + + CABOOSES: none found + +SP DETAILS: type "Sled" slot 1 + + ROOT OF TRUST + + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A SHA3 256 digest - + slot B SHA3 256 digest - + + COMPONENTS + + NAME DESCRIPTION DEVICE PRESENCE SERIAL + sp3-host-cpu FAKE host cpu sp3-host-cpu Present None + + CABOOSES: none found + +SP DETAILS: type "Switch" slot 0 + + ROOT OF TRUST + + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A SHA3 256 digest - + slot B SHA3 256 digest - + + COMPONENTS + + NAME DESCRIPTION DEVICE PRESENCE SERIAL + dev-0 FAKE temperature sensor 1 fake-tmp-sensor Present None + dev-1 FAKE temperature sensor 2 fake-tmp-sensor Failed None + + CABOOSES: none found + +SP DETAILS: type "Switch" slot 1 + + ROOT OF TRUST + + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A SHA3 256 digest - + slot B SHA3 256 digest - + + COMPONENTS: none found + +--------------------------------------------- +stderr: +note: using MGS URL http://[::1]:REDACTED_PORT/ +============================================= EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] termination: Exited(0) --------------------------------------------- diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index d757369ead..90e93ee429 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -42,6 +42,7 @@ async fn test_omdb_usage_errors() { &["db", "dns", "names"], &["db", "services"], &["db", "network"], + &["mgs"], &["nexus"], &["nexus", "background-tasks"], &["sled-agent"], @@ -58,10 +59,16 @@ async fn test_omdb_usage_errors() { #[nexus_test] async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { + let gwtestctx = gateway_test_utils::setup::test_setup( + "test_omdb_success_case", + gateway_messages::SpPort::One, + ) + .await; let cmd_path = path_to_executable(CMD_OMDB); let postgres_url = cptestctx.database.listen_url(); let nexus_internal_url = format!("http://{}/", cptestctx.internal_client.bind_address); + let mgs_url = format!("http://{}/", gwtestctx.client.bind_address); let mut output = String::new(); let invocations: &[&[&'static str]] = &[ &["db", "dns", "show"], @@ -70,6 +77,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["db", "services", "list-instances"], &["db", "services", "list-by-sled"], &["db", "sleds"], + &["mgs", "inventory"], &["nexus", "background-tasks", "doc"], &["nexus", "background-tasks", "show"], // We can't easily test the sled agent output because that's only @@ -81,9 +89,14 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { println!("running commands with args: {:?}", args); let p = postgres_url.to_string(); let u = nexus_internal_url.clone(); + let g = mgs_url.clone(); do_run( &mut output, - move |exec| exec.env("OMDB_DB_URL", &p).env("OMDB_NEXUS_URL", &u), + move |exec| { + exec.env("OMDB_DB_URL", &p) + .env("OMDB_NEXUS_URL", &u) + .env("OMDB_MGS_URL", &g) + }, &cmd_path, args, ) @@ -91,6 +104,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { } assert_contents("tests/successes.out", &output); + gwtestctx.teardown().await; } /// Verify that we properly deal with cases where: diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index dc2a16bc47..7bedc3ecbc 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -10,6 +10,7 @@ Usage: omdb [OPTIONS] Commands: db Query the control plane database (CockroachDB) + mgs Debug a specific Management Gateway Service instance nexus Debug a specific Nexus instance oximeter Query oximeter collector state sled-agent Debug a specific Sled @@ -33,6 +34,7 @@ Usage: omdb [OPTIONS] Commands: db Query the control plane database (CockroachDB) + mgs Debug a specific Management Gateway Service instance nexus Debug a specific Nexus instance oximeter Query oximeter collector state sled-agent Debug a specific Sled @@ -208,6 +210,24 @@ Options: --verbose Print out raw data structures from the data store -h, --help Print help ============================================= +EXECUTING COMMAND: omdb ["mgs"] +termination: Exited(2) +--------------------------------------------- +stdout: +--------------------------------------------- +stderr: +Debug a specific Management Gateway Service instance + +Usage: omdb mgs [OPTIONS] + +Commands: + inventory Show information about devices and components visible to MGS + help Print this message or the help of the given subcommand(s) + +Options: + --mgs-url URL of an MGS instance to query [env: OMDB_MGS_URL=] + -h, --help Print help +============================================= EXECUTING COMMAND: omdb ["nexus"] termination: Exited(2) --------------------------------------------- diff --git a/dev-tools/omicron-dev/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml index 5439b69c76..251ee16c01 100644 --- a/dev-tools/omicron-dev/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -13,6 +13,8 @@ camino.workspace = true clap.workspace = true dropshot.workspace = true futures.workspace = true +gateway-messages.workspace = true +gateway-test-utils.workspace = true libc.workspace = true nexus-test-utils.workspace = true nexus-test-interface.workspace = true diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs index 14617d6ba4..9107766d8a 100644 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -30,6 +30,7 @@ async fn main() -> Result<(), anyhow::Error> { OmicronDb::DbPopulate { ref args } => cmd_db_populate(args).await, OmicronDb::DbWipe { ref args } => cmd_db_wipe(args).await, OmicronDb::ChRun { ref args } => cmd_clickhouse_run(args).await, + OmicronDb::MgsRun { ref args } => cmd_mgs_run(args).await, OmicronDb::RunAll { ref args } => cmd_run_all(args).await, OmicronDb::CertCreate { ref args } => cmd_cert_create(args).await, }; @@ -68,6 +69,12 @@ enum OmicronDb { args: ChRunArgs, }, + /// Run a simulated Management Gateway Service for development + MgsRun { + #[clap(flatten)] + args: MgsRunArgs, + }, + /// Run a full simulated control plane RunAll { #[clap(flatten)] @@ -465,3 +472,34 @@ fn write_private_file( .with_context(|| format!("open {:?} for writing", path))?; file.write_all(contents).with_context(|| format!("write to {:?}", path)) } + +#[derive(Clone, Debug, Args)] +struct MgsRunArgs {} + +async fn cmd_mgs_run(_args: &MgsRunArgs) -> Result<(), anyhow::Error> { + // Start a stream listening for SIGINT + let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); + let mut signal_stream = signals.fuse(); + + println!("omicron-dev: setting up MGS ... "); + let gwtestctx = gateway_test_utils::setup::test_setup( + "omicron-dev", + gateway_messages::SpPort::One, + ) + .await; + println!("omicron-dev: MGS is running."); + + let addr = gwtestctx.client.bind_address; + println!("omicron-dev: MGS API: http://{:?}", addr); + + // Wait for a signal. + let caught_signal = signal_stream.next().await; + assert_eq!(caught_signal.unwrap(), SIGINT); + eprintln!( + "omicron-dev: caught signal, shutting down and removing \ + temporary directory" + ); + + gwtestctx.teardown().await; + Ok(()) +} diff --git a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr index f3c28e1ab9..ac1c87e165 100644 --- a/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr +++ b/dev-tools/omicron-dev/tests/output/cmd-omicron-dev-noargs-stderr @@ -7,6 +7,7 @@ Commands: db-populate Populate an existing CockroachDB cluster with the Omicron schema db-wipe Wipe the Omicron schema (and all data) from an existing CockroachDB cluster ch-run Run a ClickHouse database server for development + mgs-run Run a simulated Management Gateway Service for development run-all Run a full simulated control plane cert-create Create a self-signed certificate for use with Omicron help Print this message or the help of the given subcommand(s) From 7ab9c1936a714a10e3b51ee1214cb21e08ec0af8 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 6 Oct 2023 15:36:11 +0900 Subject: [PATCH 08/23] Chore: Networking stack update (#4169) Automated dendrite updates have been stalled on: * a breaking API change in oxidecomputer/dendrite#0933cb0, * a breaking behavioural change in oxidecomputer/dendrite#616862d and its accompanying sidecar-lite/npuzone change. This PR updates these dependencies and pulls in the OPTE version needed to handle the new switch logic on ingress traffic. Once merged, Helios users will need to reinstall dependencies. --- Cargo.lock | 12 ++++++------ Cargo.toml | 4 ++-- nexus/src/app/sagas/switch_port_settings_apply.rs | 4 ++-- package-manifest.toml | 12 ++++++------ sled-agent/src/bootstrap/early_networking.rs | 3 +-- tools/ci_download_softnpu_machinery | 2 +- tools/dendrite_openapi_version | 4 ++-- tools/dendrite_stub_checksums | 6 +++--- tools/opte_version | 2 +- wicketd/src/preflight_check/uplink.rs | 2 +- 10 files changed, 25 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aad3a8782a..18e0e15c3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3390,7 +3390,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=98d33125413f01722947e322f82caf9d22209434#98d33125413f01722947e322f82caf9d22209434" +source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" [[package]] name = "illumos-utils" @@ -3811,7 +3811,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=98d33125413f01722947e322f82caf9d22209434#98d33125413f01722947e322f82caf9d22209434" +source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" dependencies = [ "quote", "syn 1.0.109", @@ -5583,7 +5583,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=98d33125413f01722947e322f82caf9d22209434#98d33125413f01722947e322f82caf9d22209434" +source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" dependencies = [ "cfg-if 0.1.10", "dyn-clone", @@ -5600,7 +5600,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=98d33125413f01722947e322f82caf9d22209434#98d33125413f01722947e322f82caf9d22209434" +source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" dependencies = [ "cfg-if 0.1.10", "illumos-sys-hdrs", @@ -5613,7 +5613,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=98d33125413f01722947e322f82caf9d22209434#98d33125413f01722947e322f82caf9d22209434" +source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" dependencies = [ "libc", "libnet", @@ -5693,7 +5693,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=98d33125413f01722947e322f82caf9d22209434#98d33125413f01722947e322f82caf9d22209434" +source = "git+https://github.com/oxidecomputer/opte?rev=631c2017f19cafb1535f621e9e5aa9198ccad869#631c2017f19cafb1535f621e9e5aa9198ccad869" dependencies = [ "cfg-if 0.1.10", "illumos-sys-hdrs", diff --git a/Cargo.toml b/Cargo.toml index 1ca8a02886..3b83b2f7c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -245,7 +245,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.8.3" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "98d33125413f01722947e322f82caf9d22209434", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "631c2017f19cafb1535f621e9e5aa9198ccad869", features = [ "api", "std" ] } once_cell = "1.18.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "1.0" @@ -253,7 +253,7 @@ openapiv3 = "1.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.2" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "98d33125413f01722947e322f82caf9d22209434" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "631c2017f19cafb1535f621e9e5aa9198ccad869" } oso = "0.26" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 07d4dd17fb..687613f0cc 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -175,7 +175,7 @@ pub(crate) fn api_to_dpd_port_settings( .to_string(), RouteSettingsV4 { link_id: link_id.0, - nexthop: Some(gw), + nexthop: gw, vid: r.vid.map(Into::into), }, ); @@ -194,7 +194,7 @@ pub(crate) fn api_to_dpd_port_settings( .to_string(), RouteSettingsV6 { link_id: link_id.0, - nexthop: Some(gw), + nexthop: gw, vid: r.vid.map(Into::into), }, ); diff --git a/package-manifest.toml b/package-manifest.toml index ff229e5def..a7f8683eee 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -458,8 +458,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "2dc34eaac7eb9d320594f3ac125df6a601fe020e0b3c7f16eb0a5ebddc8e18b9" +source.commit = "7712104585266a2898da38c1345210ad26f9e71d" +source.sha256 = "486b0b016c0df06947810b90f3a3dd40423f0ee6f255ed079dc8e5618c9a7281" output.type = "zone" output.intermediate_only = true @@ -483,8 +483,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "1616eb25ab3d3a8b678b6cf3675af7ba61d455c3e6c2ba2a2d35a663861bc8e8" +source.commit = "7712104585266a2898da38c1345210ad26f9e71d" +source.sha256 = "76ff76d3526323c3fcbe2351cf9fbda4840e0dc11cd0eb6b71a3e0bd36c5e5e8" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "a045e6dbb84dbceaf3a8a7dc33d283449fbeaf081442d0ae14ce8d8ffcdda4e9" +source.commit = "7712104585266a2898da38c1345210ad26f9e71d" +source.sha256 = "b8e5c176070f9bc9ea0028de1999c77d66ea3438913664163975964effe4481b" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 78e54b3db4..61d4c84af3 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -495,14 +495,13 @@ impl<'a> EarlyNetworkSetup<'a> { e )) })?; - let nexthop = Some(uplink_config.gateway_ip); dpd_port_settings.v4_routes.insert( Ipv4Cidr { prefix: "0.0.0.0".parse().unwrap(), prefix_len: 0 } .to_string(), RouteSettingsV4 { link_id: link_id.0, vid: uplink_config.uplink_vid, - nexthop, + nexthop: uplink_config.gateway_ip, }, ); Ok((ipv6_entry, dpd_port_settings, port_id)) diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index 2575d6a186..d37d428476 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -15,7 +15,7 @@ OUT_DIR="out/npuzone" # Pinned commit for softnpu ASIC simulator SOFTNPU_REPO="softnpu" -SOFTNPU_COMMIT="64beaff129b7f63a04a53dd5ed0ec09f012f5756" +SOFTNPU_COMMIT="41b3a67b3d44f51528816ff8e539b4001df48305" # This is the softnpu ASIC simulator echo "fetching npuzone" diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index cbdbca7662..b1f210a647 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="363e365135cfa46d7f7558d8670f35aa8fe412e9" -SHA2="4da5edf1571a550a90aa8679a25c1535d2b02154dfb6034f170e421c2633bc31" +COMMIT="7712104585266a2898da38c1345210ad26f9e71d" +SHA2="cb3f0cfbe6216d2441d34e0470252e0fb142332e47b33b65c24ef7368a694b6d" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index acff400104..9538bc0d00 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="2dc34eaac7eb9d320594f3ac125df6a601fe020e0b3c7f16eb0a5ebddc8e18b9" -CIDL_SHA256_LINUX_DPD="5a976d1e43031f4790d1cd2f42d226b47c1be9c998917666f21cfaa3a7b13939" -CIDL_SHA256_LINUX_SWADM="38680e69364ffbfc43fea524786580d151ff45ce2f1802bd5179599f7c80e5f8" +CIDL_SHA256_ILLUMOS="486b0b016c0df06947810b90f3a3dd40423f0ee6f255ed079dc8e5618c9a7281" +CIDL_SHA256_LINUX_DPD="af97aaf7e1046a5c651d316c384171df6387b4c54c8ae4a3ef498e532eaa5a4c" +CIDL_SHA256_LINUX_SWADM="909e400dcc9880720222c6dc3919404d83687f773f668160f66f38b51a81c188" diff --git a/tools/opte_version b/tools/opte_version index 83a91f78b4..2dbaeb7154 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.23.173 +0.23.181 diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index c0f5d0c6bb..58955d04d6 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -777,7 +777,7 @@ fn build_port_settings( DPD_DEFAULT_IPV4_CIDR.parse().unwrap(), RouteSettingsV4 { link_id: link_id.0, - nexthop: Some(uplink.gateway_ip), + nexthop: uplink.gateway_ip, vid: uplink.uplink_vid, }, ); From c03029373982a528d372c68f3015506f15dd2a07 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 6 Oct 2023 07:07:06 -0700 Subject: [PATCH 09/23] Pick up correctly signed RoT sidecar images (#4216) --- tools/dvt_dock_version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/dvt_dock_version b/tools/dvt_dock_version index e2151b846f..790bd3ec26 100644 --- a/tools/dvt_dock_version +++ b/tools/dvt_dock_version @@ -1 +1 @@ -COMMIT=65f1979c1d3f4d0874a64144941cc41b46a70c80 +COMMIT=7cbfa19bad077a3c42976357a317d18291533ba2 From cf3bdaee3885dc34c838c5587e92787b772133a9 Mon Sep 17 00:00:00 2001 From: Patrick Mooney Date: Fri, 6 Oct 2023 16:40:00 -0500 Subject: [PATCH 10/23] Update Propolis to include UART fix This updates the Propolis packaging to use a version with the fix to oxidecomputer/propolis#540. --- Cargo.lock | 22 +++++++++++----------- Cargo.toml | 6 +++--- package-manifest.toml | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 18e0e15c3b..ec4efec0fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -485,7 +485,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "bhyve_api_sys", "libc", @@ -495,7 +495,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", "strum", @@ -1225,7 +1225,7 @@ dependencies = [ [[package]] name = "cpuid_profile_config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "propolis", "serde", @@ -2016,7 +2016,7 @@ checksum = "7e1a8646b2c125eeb9a84ef0faa6d2d102ea0d5da60b824ade2743263117b848" [[package]] name = "dladm" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", "strum", @@ -6593,7 +6593,7 @@ dependencies = [ [[package]] name = "propolis" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "anyhow", "bhyve_api", @@ -6626,7 +6626,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "async-trait", "base64 0.21.4", @@ -6650,7 +6650,7 @@ dependencies = [ [[package]] name = "propolis-server" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "anyhow", "async-trait", @@ -6702,7 +6702,7 @@ dependencies = [ [[package]] name = "propolis-server-config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "cpuid_profile_config", "serde", @@ -6714,7 +6714,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "schemars", "serde", @@ -9761,7 +9761,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "viona_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", "viona_api_sys", @@ -9770,7 +9770,7 @@ dependencies = [ [[package]] name = "viona_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=42c878b71a58d430dfc306126af5d40ca816d70f#42c878b71a58d430dfc306126af5d40ca816d70f" +source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" dependencies = [ "libc", ] diff --git a/Cargo.toml b/Cargo.toml index 3b83b2f7c5..9388b2c7d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -277,9 +277,9 @@ pretty-hex = "0.3.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "42c878b71a58d430dfc306126af5d40ca816d70f" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "42c878b71a58d430dfc306126af5d40ca816d70f", features = [ "generated-migration" ] } -propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "42c878b71a58d430dfc306126af5d40ca816d70f", default-features = false, features = ["mock-only"] } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", features = [ "generated-migration" ] } +propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", default-features = false, features = ["mock-only"] } proptest = "1.2.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index a7f8683eee..7cf235c24a 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -406,10 +406,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "42c878b71a58d430dfc306126af5d40ca816d70f" +source.commit = "901b710b6e5bd05a94a323693c2b971e7e7b240e" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "dce4d82bb936e990262abcaa279eee7e33a19930880b23f49fa3851cded18567" +source.sha256 = "0f681cdbe7312f66fd3c99fe033b379e49c59fa4ad04d307f68b12514307e976" output.type = "zone" [package.maghemite] From 9f004d2759b7ae827bc5e49d6cc8b5c5956573a8 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 6 Oct 2023 21:11:29 -0700 Subject: [PATCH 11/23] [crdb-seed] use a tarball, fix omicron-dev run-all (#4208) Several changes: 1. In https://github.com/oxidecomputer/omicron/issues/4193, @david-crespo observed some missing files in the crdb-seed generated directory. My suspicion is that that is due to the `/tmp` cleaner that runs on macOS. @davepacheco suggested using a tarball to get atomicity (either the file exists or it doesn't), and it ended up being pretty simple to do that at the end. 2. Teach nexus-test-utils to ensure that the seed tarball exists, fixing `omicron-dev run-all` and anything else that uses nexus-test-utils (and avoiding a dependency on the environment). 3. Move `crdb-seed` to `dev-tools` (thanks Dave for pointing it out!) 4. Add a way to invalidate the cache with `CRDB_SEED_INVALIDATE=1` in the environment. 5. Add a readme for `crdb-seed`. Fixes #4206. Hopefully addresses #4193. --- .config/nextest.toml | 4 +- Cargo.lock | 27 +- Cargo.toml | 5 +- crdb-seed/src/main.rs | 92 ------- {crdb-seed => dev-tools/crdb-seed}/Cargo.toml | 8 +- dev-tools/crdb-seed/README.md | 11 + dev-tools/crdb-seed/src/main.rs | 39 +++ dev-tools/omicron-dev/Cargo.toml | 2 +- dev-tools/omicron-dev/src/bin/omicron-dev.rs | 10 +- .../omicron-dev/tests/test_omicron_dev.rs | 11 + nexus/test-utils/Cargo.toml | 3 + nexus/test-utils/src/db.rs | 35 ++- nexus/test-utils/src/lib.rs | 105 +++++++- test-utils/Cargo.toml | 11 +- test-utils/src/dev/mod.rs | 107 +++----- test-utils/src/dev/seed.rs | 239 ++++++++++++++++++ workspace-hack/Cargo.toml | 24 +- 17 files changed, 518 insertions(+), 215 deletions(-) delete mode 100644 crdb-seed/src/main.rs rename {crdb-seed => dev-tools/crdb-seed}/Cargo.toml (60%) create mode 100644 dev-tools/crdb-seed/README.md create mode 100644 dev-tools/crdb-seed/src/main.rs create mode 100644 test-utils/src/dev/seed.rs diff --git a/.config/nextest.toml b/.config/nextest.toml index b2a8b360bb..ba07186c8a 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -8,7 +8,9 @@ nextest-version = { required = "0.9.59", recommended = "0.9.59" } experimental = ["setup-scripts"] [[profile.default.scripts]] -filter = 'rdeps(nexus-test-utils)' +# Exclude omicron-dev tests from crdb-seed as we explicitly want to simulate an +# environment where the seed file doesn't exist. +filter = 'rdeps(nexus-test-utils) - package(omicron-dev)' setup = 'crdb-seed' [profile.ci] diff --git a/Cargo.lock b/Cargo.lock index ec4efec0fc..306e953049 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -366,6 +366,17 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" +[[package]] +name = "atomicwrites" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1163d9d7c51de51a2b79d6df5e8888d11e9df17c752ce4a285fb6ca1580734e" +dependencies = [ + "rustix 0.37.23", + "tempfile", + "windows-sys 0.48.0", +] + [[package]] name = "atty" version = "0.2.14" @@ -1268,13 +1279,10 @@ dependencies = [ name = "crdb-seed" version = "0.1.0" dependencies = [ - "camino", - "camino-tempfile", + "anyhow", "dropshot", - "hex", "omicron-test-utils", "omicron-workspace-hack", - "ring", "slog", "tokio", ] @@ -5338,11 +5346,15 @@ name = "omicron-test-utils" version = "0.1.0" dependencies = [ "anyhow", + "atomicwrites", "camino", + "camino-tempfile", "dropshot", "expectorate", + "filetime", "futures", "headers", + "hex", "http", "libc", "omicron-common 0.1.0", @@ -5351,9 +5363,11 @@ dependencies = [ "rcgen", "regex", "reqwest", + "ring", "rustls", "slog", "subprocess", + "tar", "tempfile", "thiserror", "tokio", @@ -5436,6 +5450,7 @@ dependencies = [ "regex-syntax 0.7.5", "reqwest", "ring", + "rustix 0.37.23", "rustix 0.38.9", "schemars", "semver 1.0.18", @@ -9456,8 +9471,8 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", - "rand 0.4.6", + "cfg-if 1.0.0", + "rand 0.8.5", "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index 9388b2c7d6..da7b582fe3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ members = [ "clients/sled-agent-client", "clients/wicketd-client", "common", - "crdb-seed", + "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/thing-flinger", @@ -83,6 +83,7 @@ default-members = [ "clients/sled-agent-client", "clients/wicketd-client", "common", + "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", "dev-tools/thing-flinger", @@ -137,6 +138,7 @@ assert_matches = "1.5.0" assert_cmd = "2.0.12" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "da04c087f835a51e0441addb19c5ef4986e1fcf2" } async-trait = "0.1.73" +atomicwrites = "0.4.1" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } base64 = "0.21.4" @@ -182,6 +184,7 @@ dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", either = "1.9.0" expectorate = "1.1.0" fatfs = "0.3.6" +filetime = "0.2.22" flate2 = "1.0.27" flume = "0.11.0" foreign-types = "0.3.2" diff --git a/crdb-seed/src/main.rs b/crdb-seed/src/main.rs deleted file mode 100644 index b8572bd886..0000000000 --- a/crdb-seed/src/main.rs +++ /dev/null @@ -1,92 +0,0 @@ -use camino::Utf8PathBuf; -use dropshot::{test_util::LogContext, ConfigLogging, ConfigLoggingLevel}; -use omicron_test_utils::dev; -use slog::Logger; -use std::io::Write; - -// Creates a string identifier for the current DB schema and version. -// -// The goal here is to allow to create different "seed" directories -// for each revision of the DB. -fn digest_unique_to_schema() -> String { - let schema = include_str!("../../schema/crdb/dbinit.sql"); - let crdb_version = include_str!("../../tools/cockroachdb_version"); - let mut ctx = ring::digest::Context::new(&ring::digest::SHA256); - ctx.update(&schema.as_bytes()); - ctx.update(&crdb_version.as_bytes()); - let digest = ctx.finish(); - hex::encode(digest.as_ref()) -} - -enum SeedDirectoryStatus { - Created, - Existing, -} - -async fn ensure_seed_directory_exists( - log: &Logger, -) -> (Utf8PathBuf, SeedDirectoryStatus) { - let base_seed_dir = Utf8PathBuf::from_path_buf(std::env::temp_dir()) - .expect("Not a UTF-8 path") - .join("crdb-base"); - std::fs::create_dir_all(&base_seed_dir).unwrap(); - let desired_seed_dir = base_seed_dir.join(digest_unique_to_schema()); - - if desired_seed_dir.exists() { - return (desired_seed_dir, SeedDirectoryStatus::Existing); - } - - // The directory didn't exist when we started, so try to create it. - // - // Nextest will execute it just once, but it is possible for a user to start - // up multiple nextest processes to be running at the same time. So we - // should consider it possible for another caller to create this seed - // directory before we finish setting it up ourselves. - let tmp_seed_dir = - camino_tempfile::Utf8TempDir::new_in(base_seed_dir).unwrap(); - dev::test_setup_database_seed(log, tmp_seed_dir.path()).await; - - // If we can successfully perform the rename, there was either no - // contention or we won a creation race. - // - // If we couldn't perform the rename, the directory might already exist. - // Check that this is the error we encountered -- otherwise, we're - // struggling. - if let Err(err) = std::fs::rename(tmp_seed_dir.path(), &desired_seed_dir) { - if !desired_seed_dir.exists() { - panic!("Cannot rename seed directory for CockroachDB: {err}"); - } - } - - (desired_seed_dir, SeedDirectoryStatus::Created) -} - -#[tokio::main] -async fn main() { - // TODO: dropshot is v heavyweight for this, we should be able to pull in a - // smaller binary - let logctx = LogContext::new( - "crdb_seeding", - &ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }, - ); - let (dir, status) = ensure_seed_directory_exists(&logctx.log).await; - match status { - SeedDirectoryStatus::Created => { - slog::info!(logctx.log, "Created seed directory: `{dir}`"); - } - SeedDirectoryStatus::Existing => { - slog::info!(logctx.log, "Using existing seed directory: `{dir}`"); - } - } - if let Ok(env_path) = std::env::var("NEXTEST_ENV") { - let mut file = std::fs::File::create(&env_path) - .expect("failed to open NEXTEST_ENV file"); - writeln!(file, "CRDB_SEED_DIR={dir}") - .expect("failed to write to NEXTEST_ENV file"); - } else { - slog::warn!( - logctx.log, - "NEXTEST_ENV not set (is this script running under nextest?)" - ); - } -} diff --git a/crdb-seed/Cargo.toml b/dev-tools/crdb-seed/Cargo.toml similarity index 60% rename from crdb-seed/Cargo.toml rename to dev-tools/crdb-seed/Cargo.toml index 8d6d570d08..aff26995dc 100644 --- a/crdb-seed/Cargo.toml +++ b/dev-tools/crdb-seed/Cargo.toml @@ -3,14 +3,12 @@ name = "crdb-seed" version = "0.1.0" edition = "2021" license = "MPL-2.0" +readme = "README.md" [dependencies] -camino.workspace = true -camino-tempfile.workspace = true +anyhow.workspace = true dropshot.workspace = true -hex.workspace = true -omicron-test-utils.workspace = true -ring.workspace = true +omicron-test-utils = { workspace = true, features = ["seed-gen"] } slog.workspace = true tokio.workspace = true omicron-workspace-hack.workspace = true diff --git a/dev-tools/crdb-seed/README.md b/dev-tools/crdb-seed/README.md new file mode 100644 index 0000000000..3b77f23066 --- /dev/null +++ b/dev-tools/crdb-seed/README.md @@ -0,0 +1,11 @@ +# crdb-seed + +This is a small utility that creates a seed tarball for our CockroachDB instance +in the temporary directory. It is used as a setup script for nextest (see +`.config/nextest.rs`). + +This utility hashes inputs and attempts to reuse a tarball if it already exists +(see `digest_unique_to_schema` in `omicron/test-utils/src/dev/seed.rs`). + +To invalidate the tarball and cause it to be recreated from scratch, set +`CRDB_SEED_INVALIDATE=1` in the environment. diff --git a/dev-tools/crdb-seed/src/main.rs b/dev-tools/crdb-seed/src/main.rs new file mode 100644 index 0000000000..26b0e19410 --- /dev/null +++ b/dev-tools/crdb-seed/src/main.rs @@ -0,0 +1,39 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{Context, Result}; +use dropshot::{test_util::LogContext, ConfigLogging, ConfigLoggingLevel}; +use omicron_test_utils::dev::seed::{ + ensure_seed_tarball_exists, should_invalidate_seed, +}; +use omicron_test_utils::dev::CRDB_SEED_TAR_ENV; +use std::io::Write; + +#[tokio::main] +async fn main() -> Result<()> { + // TODO: dropshot is v heavyweight for this, we should be able to pull in a + // smaller binary + let logctx = LogContext::new( + "crdb_seeding", + &ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Info }, + ); + let (seed_tar, status) = + ensure_seed_tarball_exists(&logctx.log, should_invalidate_seed()) + .await?; + status.log(&logctx.log, &seed_tar); + + if let Ok(env_path) = std::env::var("NEXTEST_ENV") { + let mut file = std::fs::File::create(&env_path) + .context("failed to open NEXTEST_ENV file")?; + writeln!(file, "{CRDB_SEED_TAR_ENV}={seed_tar}") + .context("failed to write to NEXTEST_ENV file")?; + } else { + slog::warn!( + logctx.log, + "NEXTEST_ENV not set (is this script running under nextest?)" + ); + } + + Ok(()) +} diff --git a/dev-tools/omicron-dev/Cargo.toml b/dev-tools/omicron-dev/Cargo.toml index 251ee16c01..ec7cafb559 100644 --- a/dev-tools/omicron-dev/Cargo.toml +++ b/dev-tools/omicron-dev/Cargo.toml @@ -16,7 +16,7 @@ futures.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true libc.workspace = true -nexus-test-utils.workspace = true +nexus-test-utils = { workspace = true, features = ["omicron-dev"] } nexus-test-interface.workspace = true omicron-common.workspace = true omicron-nexus.workspace = true diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs index 9107766d8a..e79184f7e5 100644 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -14,7 +14,6 @@ use futures::stream::StreamExt; use nexus_test_interface::NexusServer; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use omicron_sled_agent::sim; use omicron_test_utils::dev; use signal_hook::consts::signal::SIGINT; use signal_hook_tokio::Signals; @@ -348,13 +347,12 @@ async fn cmd_run_all(args: &RunAllArgs) -> Result<(), anyhow::Error> { config.deployment.dropshot_external.dropshot.bind_address.set_port(p); } - // Start up a ControlPlaneTestContext, which tautologically sets up - // everything needed for a simulated control plane. println!("omicron-dev: setting up all services ... "); - let cptestctx = nexus_test_utils::test_setup_with_config::< + let cptestctx = nexus_test_utils::omicron_dev_setup_with_config::< omicron_nexus::Server, - >("omicron-dev", &mut config, sim::SimMode::Auto, None) - .await; + >(&mut config) + .await + .context("error setting up services")?; println!("omicron-dev: services are running."); // Print out basic information about what was started. diff --git a/dev-tools/omicron-dev/tests/test_omicron_dev.rs b/dev-tools/omicron-dev/tests/test_omicron_dev.rs index f855d8935d..f1e8177243 100644 --- a/dev-tools/omicron-dev/tests/test_omicron_dev.rs +++ b/dev-tools/omicron-dev/tests/test_omicron_dev.rs @@ -13,6 +13,7 @@ use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; use omicron_test_utils::dev::test_cmds::EXIT_USAGE; +use omicron_test_utils::dev::CRDB_SEED_TAR_ENV; use oxide_client::ClientHiddenExt; use std::io::BufRead; use std::path::Path; @@ -389,6 +390,16 @@ async fn test_db_run() { // This mirrors the `test_db_run()` test. #[tokio::test] async fn test_run_all() { + // Ensure that the CRDB_SEED_TAR environment variable is not set. We want to + // simulate a user running omicron-dev without the test environment. + // Check if CRDB_SEED_TAR_ENV is set and panic if it is + if let Ok(val) = std::env::var(CRDB_SEED_TAR_ENV) { + panic!( + "CRDB_SEED_TAR_ENV should not be set here, but is set to {}", + val + ); + } + let cmd_path = path_to_omicron_dev(); let cmdstr = format!( diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 8eb8df4a5b..8cd25582be 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -39,3 +39,6 @@ trust-dns-proto.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true + +[features] +omicron-dev = ["omicron-test-utils/seed-gen"] diff --git a/nexus/test-utils/src/db.rs b/nexus/test-utils/src/db.rs index 37d7128c49..ff23f35df0 100644 --- a/nexus/test-utils/src/db.rs +++ b/nexus/test-utils/src/db.rs @@ -8,7 +8,7 @@ use camino::Utf8PathBuf; use omicron_test_utils::dev; use slog::Logger; -/// Path to the "seed" CockroachDB directory. +/// Path to the "seed" CockroachDB tarball. /// /// Populating CockroachDB unfortunately isn't free - creation of /// tables, indices, and users takes several seconds to complete. @@ -16,20 +16,39 @@ use slog::Logger; /// By creating a "seed" version of the database, we can cut down /// on the time spent performing this operation. Instead, we opt /// to copy the database from this seed location. -fn seed_dir() -> Utf8PathBuf { +fn seed_tar() -> Utf8PathBuf { // The setup script should set this environment variable. - let seed_dir = std::env::var("CRDB_SEED_DIR") - .expect("CRDB_SEED_DIR missing -- are you running this test with `cargo nextest run`?"); + let seed_dir = std::env::var(dev::CRDB_SEED_TAR_ENV).unwrap_or_else(|_| { + panic!( + "{} missing -- are you running this test \ + with `cargo nextest run`?", + dev::CRDB_SEED_TAR_ENV, + ) + }); seed_dir.into() } -/// Wrapper around [`dev::test_setup_database`] which uses a a -/// seed directory provided at build-time. +/// Wrapper around [`dev::test_setup_database`] which uses a seed tarball +/// provided from the environment. pub async fn test_setup_database(log: &Logger) -> dev::db::CockroachInstance { - let dir = seed_dir(); + let input_tar = seed_tar(); dev::test_setup_database( log, - dev::StorageSource::CopyFromSeed { input_dir: dir }, + dev::StorageSource::CopyFromSeed { input_tar }, + ) + .await +} + +/// Wrapper around [`dev::test_setup_database`] which uses a seed tarball +/// provided as an argument. +#[cfg(feature = "omicron-dev")] +pub async fn test_setup_database_from_seed( + log: &Logger, + input_tar: Utf8PathBuf, +) -> dev::db::CockroachInstance { + dev::test_setup_database( + log, + dev::StorageSource::CopyFromSeed { input_tar }, ) .await } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index d219da7e96..34c218b3e2 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -5,6 +5,7 @@ //! Integration testing facilities for Nexus use anyhow::Context; +use anyhow::Result; use camino::Utf8Path; use dns_service_client::types::DnsConfigParams; use dropshot::test_util::ClientTestContext; @@ -284,14 +285,30 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { } pub async fn start_crdb(&mut self, populate: bool) { + let populate = if populate { + PopulateCrdb::FromEnvironmentSeed + } else { + PopulateCrdb::Empty + }; + self.start_crdb_impl(populate).await; + } + + /// Private implementation of `start_crdb` that allows for a seed tarball to + /// be passed in. See [`PopulateCrdb`] for more details. + async fn start_crdb_impl(&mut self, populate: PopulateCrdb) { let log = &self.logctx.log; debug!(log, "Starting CRDB"); // Start up CockroachDB. - let database = if populate { - db::test_setup_database(log).await - } else { - db::test_setup_database_empty(log).await + let database = match populate { + PopulateCrdb::FromEnvironmentSeed => { + db::test_setup_database(log).await + } + #[cfg(feature = "omicron-dev")] + PopulateCrdb::FromSeed { input_tar } => { + db::test_setup_database_from_seed(log, input_tar).await + } + PopulateCrdb::Empty => db::test_setup_database_empty(log).await, }; eprintln!("DB URL: {}", database.pg_config()); @@ -759,17 +776,89 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { } } +/// How to populate CockroachDB. +/// +/// This is private because we want to ensure that tests use the setup script +/// rather than trying to create their own seed tarballs. This may need to be +/// revisited if circumstances change. +#[derive(Clone, Debug)] +enum PopulateCrdb { + /// Populate Cockroach from the `CRDB_SEED_TAR_ENV` environment variable. + /// + /// Any tests that depend on nexus-test-utils should have this environment + /// variable available. + FromEnvironmentSeed, + + /// Populate Cockroach from the seed located at this path. + #[cfg(feature = "omicron-dev")] + FromSeed { input_tar: camino::Utf8PathBuf }, + + /// Do not populate Cockroach. + Empty, +} + +/// Setup routine to use for `omicron-dev`. Use [`test_setup_with_config`] for +/// tests. +/// +/// The main difference from tests is that this routine ensures the seed tarball +/// exists (or creates a seed tarball if it doesn't exist). For tests, this +/// should be done in the `crdb-seed` setup script. +#[cfg(feature = "omicron-dev")] +pub async fn omicron_dev_setup_with_config( + config: &mut omicron_common::nexus_config::Config, +) -> Result> { + let builder = + ControlPlaneTestContextBuilder::::new("omicron-dev", config); + + let log = &builder.logctx.log; + debug!(log, "Ensuring seed tarball exists"); + + // Start up a ControlPlaneTestContext, which tautologically sets up + // everything needed for a simulated control plane. + let why_invalidate = + omicron_test_utils::dev::seed::should_invalidate_seed(); + let (seed_tar, status) = + omicron_test_utils::dev::seed::ensure_seed_tarball_exists( + log, + why_invalidate, + ) + .await + .context("error ensuring seed tarball exists")?; + status.log(log, &seed_tar); + + Ok(setup_with_config_impl( + builder, + PopulateCrdb::FromSeed { input_tar: seed_tar }, + sim::SimMode::Auto, + None, + ) + .await) +} + +/// Setup routine to use for tests. pub async fn test_setup_with_config( test_name: &str, config: &mut omicron_common::nexus_config::Config, sim_mode: sim::SimMode, initial_cert: Option, ) -> ControlPlaneTestContext { - let mut builder = - ControlPlaneTestContextBuilder::::new(test_name, config); + let builder = ControlPlaneTestContextBuilder::::new(test_name, config); + setup_with_config_impl( + builder, + PopulateCrdb::FromEnvironmentSeed, + sim_mode, + initial_cert, + ) + .await +} - let populate = true; - builder.start_crdb(populate).await; +async fn setup_with_config_impl( + mut builder: ControlPlaneTestContextBuilder<'_, N>, + populate: PopulateCrdb, + sim_mode: sim::SimMode, + initial_cert: Option, +) -> ControlPlaneTestContext { + builder.start_crdb_impl(populate).await; builder.start_clickhouse().await; builder.start_dendrite(SwitchLocation::Switch0).await; builder.start_dendrite(SwitchLocation::Switch1).await; diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 9e21f3ca12..7b1f70c79e 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -6,20 +6,26 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +atomicwrites.workspace = true camino.workspace = true +camino-tempfile.workspace = true dropshot.workspace = true +filetime = { workspace = true, optional = true } futures.workspace = true headers.workspace = true +hex.workspace = true http.workspace = true libc.workspace = true omicron-common.workspace = true pem.workspace = true +ring.workspace = true rustls.workspace = true slog.workspace = true subprocess.workspace = true tempfile.workspace = true thiserror.workspace = true -tokio = { workspace = true, features = [ "full" ] } +tar.workspace = true +tokio = { workspace = true, features = ["full"] } tokio-postgres.workspace = true usdt.workspace = true rcgen.workspace = true @@ -29,3 +35,6 @@ omicron-workspace-hack.workspace = true [dev-dependencies] expectorate.workspace = true + +[features] +seed-gen = ["dep:filetime"] diff --git a/test-utils/src/dev/mod.rs b/test-utils/src/dev/mod.rs index ea95a1de76..dbd66fe1f8 100644 --- a/test-utils/src/dev/mod.rs +++ b/test-utils/src/dev/mod.rs @@ -9,55 +9,21 @@ pub mod clickhouse; pub mod db; pub mod dendrite; pub mod poll; +#[cfg(feature = "seed-gen")] +pub mod seed; pub mod test_cmds; -use anyhow::Context; -use camino::Utf8Path; +use anyhow::{Context, Result}; use camino::Utf8PathBuf; pub use dropshot::test_util::LogContext; use dropshot::ConfigLogging; use dropshot::ConfigLoggingIfExists; use dropshot::ConfigLoggingLevel; use slog::Logger; -use std::path::Path; +use std::io::BufReader; -// Helper for copying all the files in one directory to another. -fn copy_dir( - src: impl AsRef, - dst: impl AsRef, -) -> Result<(), anyhow::Error> { - let src = src.as_ref(); - let dst = dst.as_ref(); - std::fs::create_dir_all(&dst) - .with_context(|| format!("Failed to create dst {}", dst.display()))?; - for entry in std::fs::read_dir(src) - .with_context(|| format!("Failed to read_dir {}", src.display()))? - { - let entry = entry.with_context(|| { - format!("Failed to read entry in {}", src.display()) - })?; - let ty = entry.file_type().context("Failed to access file type")?; - let target = dst.join(entry.file_name()); - if ty.is_dir() { - copy_dir(entry.path(), &target).with_context(|| { - format!( - "Failed to copy subdirectory {} to {}", - entry.path().display(), - target.display() - ) - })?; - } else { - std::fs::copy(entry.path(), &target).with_context(|| { - format!( - "Failed to copy file at {} to {}", - entry.path().display(), - target.display() - ) - })?; - } - } - Ok(()) -} +/// The environment variable via which the path to the seed tarball is passed. +pub static CRDB_SEED_TAR_ENV: &str = "CRDB_SEED_TAR"; /// Set up a [`dropshot::test_util::LogContext`] appropriate for a test named /// `test_name` @@ -80,36 +46,9 @@ pub enum StorageSource { DoNotPopulate, /// Populate the latest version of the database. PopulateLatest { output_dir: Utf8PathBuf }, - /// Copy the database from a seed directory, which has previously + /// Copy the database from a seed tarball, which has previously /// been created with `PopulateLatest`. - CopyFromSeed { input_dir: Utf8PathBuf }, -} - -/// Creates a [`db::CockroachInstance`] with a populated storage directory. -/// -/// This is intended to optimize subsequent calls to [`test_setup_database`] -/// by reducing the latency of populating the storage directory. -pub async fn test_setup_database_seed(log: &Logger, dir: &Utf8Path) { - let _ = std::fs::remove_dir_all(dir); - std::fs::create_dir_all(dir).unwrap(); - let mut db = setup_database( - log, - StorageSource::PopulateLatest { output_dir: dir.to_owned() }, - ) - .await; - db.cleanup().await.unwrap(); - - // See https://github.com/cockroachdb/cockroach/issues/74231 for context on - // this. We use this assertion to check that our seed directory won't point - // back to itself, even if it is copied elsewhere. - assert_eq!( - 0, - dir.join("temp-dirs-record.txt") - .metadata() - .expect("Cannot access metadata") - .len(), - "Temporary directory record should be empty after graceful shutdown", - ); + CopyFromSeed { input_tar: Utf8PathBuf }, } /// Set up a [`db::CockroachInstance`] for running tests. @@ -118,13 +57,15 @@ pub async fn test_setup_database( source: StorageSource, ) -> db::CockroachInstance { usdt::register_probes().expect("Failed to register USDT DTrace probes"); - setup_database(log, source).await + setup_database(log, source).await.unwrap() } +// TODO: switch to anyhow entirely -- this function is currently a mishmash of +// anyhow and unwrap/expect calls. async fn setup_database( log: &Logger, storage_source: StorageSource, -) -> db::CockroachInstance { +) -> Result { let builder = db::CockroachStarterBuilder::new(); let mut builder = match &storage_source { StorageSource::DoNotPopulate | StorageSource::CopyFromSeed { .. } => { @@ -135,7 +76,7 @@ async fn setup_database( } }; builder.redirect_stdio_to_files(); - let starter = builder.build().unwrap(); + let starter = builder.build().context("error building CockroachStarter")?; info!( &log, "cockroach temporary directory: {}", @@ -147,13 +88,22 @@ async fn setup_database( match &storage_source { StorageSource::DoNotPopulate | StorageSource::PopulateLatest { .. } => { } - StorageSource::CopyFromSeed { input_dir } => { + StorageSource::CopyFromSeed { input_tar } => { info!(&log, - "cockroach: copying from seed directory ({}) to storage directory ({})", - input_dir, starter.store_dir().to_string_lossy(), + "cockroach: copying from seed tarball ({}) to storage directory ({})", + input_tar, starter.store_dir().to_string_lossy(), ); - copy_dir(input_dir, starter.store_dir()) - .expect("Cannot copy storage from seed directory"); + let reader = std::fs::File::open(input_tar).with_context(|| { + format!("cannot open input tar {}", input_tar) + })?; + let mut tar = tar::Archive::new(BufReader::new(reader)); + tar.unpack(starter.store_dir()).with_context(|| { + format!( + "cannot unpack input tar {} into {}", + input_tar, + starter.store_dir().display() + ) + })?; } } @@ -184,7 +134,8 @@ async fn setup_database( info!(&log, "cockroach: populated"); } } - database + + Ok(database) } /// Returns whether the given process is currently running diff --git a/test-utils/src/dev/seed.rs b/test-utils/src/dev/seed.rs new file mode 100644 index 0000000000..841ecd5f35 --- /dev/null +++ b/test-utils/src/dev/seed.rs @@ -0,0 +1,239 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::io::{BufWriter, Write}; + +use anyhow::{ensure, Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use filetime::FileTime; +use slog::Logger; + +use super::CRDB_SEED_TAR_ENV; + +/// Creates a string identifier for the current DB schema and version. +// +/// The goal here is to allow to create different "seed" tarballs +/// for each revision of the DB. +pub fn digest_unique_to_schema() -> String { + let schema = include_str!("../../../schema/crdb/dbinit.sql"); + let crdb_version = include_str!("../../../tools/cockroachdb_version"); + let mut ctx = ring::digest::Context::new(&ring::digest::SHA256); + ctx.update(&schema.as_bytes()); + ctx.update(&crdb_version.as_bytes()); + let digest = ctx.finish(); + hex::encode(digest.as_ref()) +} + +/// Looks up the standard environment variable `CRDB_SEED_INVALIDATE` to check +/// if a seed should be invalidated. Returns a string to pass in as the +/// `why_invalidate` argument of [`ensure_seed_tarball_exists`]. +pub fn should_invalidate_seed() -> Option<&'static str> { + (std::env::var("CRDB_SEED_INVALIDATE").as_deref() == Ok("1")) + .then_some("CRDB_SEED_INVALIDATE=1 set in environment") +} + +/// The return value of [`ensure_seed_tarball_exists`]. +#[derive(Clone, Copy, Debug)] +pub enum SeedTarballStatus { + Created, + Invalidated, + Existing, +} + +impl SeedTarballStatus { + pub fn log(self, log: &Logger, seed_tar: &Utf8Path) { + match self { + SeedTarballStatus::Created => { + info!(log, "Created CRDB seed tarball: `{seed_tar}`"); + } + SeedTarballStatus::Invalidated => { + info!( + log, + "Invalidated and created new CRDB seed tarball: `{seed_tar}`", + ); + } + SeedTarballStatus::Existing => { + info!(log, "Using existing CRDB seed tarball: `{seed_tar}`"); + } + } + } +} + +/// Ensures that a seed tarball corresponding to the schema returned by +/// [`digest_unique_to_schema`] exists, recreating it if necessary. +/// +/// This used to create a directory rather than a tarball, but that was changed +/// due to [Omicron issue +/// #4193](https://github.com/oxidecomputer/omicron/issues/4193). +/// +/// If `why_invalidate` is `Some`, then if the seed tarball exists, it will be +/// deleted before being recreated. +/// +/// # Notes +/// +/// This method should _not_ be used by tests. Instead, rely on the `crdb-seed` +/// setup script. +pub async fn ensure_seed_tarball_exists( + log: &Logger, + why_invalidate: Option<&str>, +) -> Result<(Utf8PathBuf, SeedTarballStatus)> { + // If the CRDB_SEED_TAR_ENV variable is set, return an error. + // + // Even though this module is gated behind a feature flag, omicron-dev needs + // this function -- and so, if you're doing a top-level `cargo nextest run` + // like CI does, feature unification would mean this gets included in test + // binaries anyway. So this acts as a belt-and-suspenders check. + if let Ok(val) = std::env::var(CRDB_SEED_TAR_ENV) { + anyhow::bail!( + "{CRDB_SEED_TAR_ENV} is set to `{val}` -- implying that a test called \ + ensure_seed_tarball_exists. Instead, tests should rely on the `crdb-seed` \ + setup script." + ); + } + + // XXX: we aren't considering cross-user permissions for this file. Might be + // worth setting more restrictive permissions on it, or using a per-user + // cache dir. + let base_seed_dir = Utf8PathBuf::from_path_buf(std::env::temp_dir()) + .expect("Not a UTF-8 path") + .join("crdb-base"); + std::fs::create_dir_all(&base_seed_dir).unwrap(); + let mut desired_seed_tar = base_seed_dir.join(digest_unique_to_schema()); + desired_seed_tar.set_extension("tar"); + + let invalidated = match (desired_seed_tar.exists(), why_invalidate) { + (true, Some(why)) => { + slog::info!( + log, + "{why}: invalidating seed tarball: `{desired_seed_tar}`", + ); + std::fs::remove_file(&desired_seed_tar) + .context("failed to remove seed tarball")?; + true + } + (true, None) => { + // The tarball exists. Update its atime and mtime (i.e. `touch` it) + // to ensure that it doesn't get deleted by a /tmp cleaner. + let now = FileTime::now(); + filetime::set_file_times(&desired_seed_tar, now, now) + .context("failed to update seed tarball atime and mtime")?; + return Ok((desired_seed_tar, SeedTarballStatus::Existing)); + } + (false, Some(why)) => { + slog::info!( + log, + "{why}, but seed tarball does not exist: `{desired_seed_tar}`", + ); + false + } + (false, None) => { + // The tarball doesn't exist. + false + } + }; + + // At this point the tarball does not exist (either because it didn't exist + // in the first place or because it was deleted above), so try to create it. + // + // Nextest will execute this function just once via the `crdb-seed` binary, + // but it is possible for a user to start up multiple nextest processes to + // be running at the same time. So we should consider it possible for + // another caller to create this seed tarball before we finish setting it up + // ourselves. + test_setup_database_seed(log, &desired_seed_tar) + .await + .context("failed to setup seed tarball")?; + + let status = if invalidated { + SeedTarballStatus::Invalidated + } else { + SeedTarballStatus::Created + }; + Ok((desired_seed_tar, status)) +} + +/// Creates a seed file for a Cockroach database at the output tarball. +/// +/// This is intended to optimize subsequent calls to +/// [`test_setup_database`](super::test_setup_database) by reducing the latency +/// of populating the storage directory. +pub async fn test_setup_database_seed( + log: &Logger, + output_tar: &Utf8Path, +) -> Result<()> { + let base_seed_dir = output_tar.parent().unwrap(); + let tmp_seed_dir = camino_tempfile::Utf8TempDir::new_in(base_seed_dir) + .context("failed to create temporary seed directory")?; + + let mut db = super::setup_database( + log, + super::StorageSource::PopulateLatest { + output_dir: tmp_seed_dir.path().to_owned(), + }, + ) + .await + .context("failed to setup database")?; + db.cleanup().await.context("failed to cleanup database")?; + + // See https://github.com/cockroachdb/cockroach/issues/74231 for context on + // this. We use this assertion to check that our seed directory won't point + // back to itself, even if it is copied elsewhere. + let dirs_record_path = tmp_seed_dir.path().join("temp-dirs-record.txt"); + let dirs_record_len = dirs_record_path + .metadata() + .with_context(|| { + format!("cannot access metadata for {dirs_record_path}") + })? + .len(); + ensure!( + dirs_record_len == 0, + "Temporary directory record should be empty (was {dirs_record_len}) \ + after graceful shutdown", + ); + + let output_tar = output_tar.to_owned(); + + tokio::task::spawn_blocking(move || { + // Tar up the directory -- this prevents issues where some but not all of + // the files get cleaned up by /tmp cleaners. See + // https://github.com/oxidecomputer/omicron/issues/4193. + let atomic_file = atomicwrites::AtomicFile::new( + &output_tar, + // We don't expect this to exist, but if it does, we want to overwrite + // it. That is because there's a remote possibility that multiple + // instances of test_setup_database_seed are running simultaneously. + atomicwrites::OverwriteBehavior::AllowOverwrite, + ); + let res = atomic_file.write(|f| { + // Tar up the directory here. + let writer = BufWriter::new(f); + let mut tar = tar::Builder::new(writer); + tar.follow_symlinks(false); + tar.append_dir_all(".", tmp_seed_dir.path()).with_context( + || { + format!( + "failed to append directory `{}` to tarball", + tmp_seed_dir.path(), + ) + }, + )?; + + let mut writer = + tar.into_inner().context("failed to finish writing tarball")?; + writer.flush().context("failed to flush tarball")?; + + Ok::<_, anyhow::Error>(()) + }); + match res { + Ok(()) => Ok(()), + Err(atomicwrites::Error::Internal(error)) => Err(error) + .with_context(|| { + format!("failed to write seed tarball: `{}`", output_tar) + }), + Err(atomicwrites::Error::User(error)) => Err(error), + } + }) + .await + .context("error in task to tar up contents")? +} diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 8854ef27bc..106da92f62 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -215,49 +215,56 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } toml_datetime = { version = "0.6.3", default-features = false, features = ["serde"] } toml_edit = { version = "0.19.15", features = ["serde"] } @@ -266,7 +273,8 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-f hyper-rustls = { version = "0.24.1" } mio = { version = "0.8.8", features = ["net", "os-ext"] } once_cell = { version = "1.18.0", features = ["unstable"] } -rustix = { version = "0.38.9", features = ["fs", "termios"] } +rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.9", features = ["fs", "termios"] } +rustix-d736d0ac4424f0f1 = { package = "rustix", version = "0.37.23", features = ["fs", "termios"] } toml_datetime = { version = "0.6.3", default-features = false, features = ["serde"] } toml_edit = { version = "0.19.15", features = ["serde"] } From d624bce9af25e5bc1141de4dcdb50a15223f106d Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Mon, 9 Oct 2023 20:02:08 +0100 Subject: [PATCH 12/23] Back /var/fm/fmd with a dataset from the boot M.2 (#4212) `/var/fm/fmd` is where the illumos fault management system records data. We want to preserve this data across system reboots and in real time rather than via periodic data copying, so that the information is available should the system panic shortly thereafter. Fixes: https://github.com/oxidecomputer/omicron/issues/4211 --- illumos-utils/src/zfs.rs | 41 ++++-- sled-agent/src/backing_fs.rs | 178 +++++++++++++++++++++++++ sled-agent/src/bootstrap/pre_server.rs | 1 + sled-agent/src/lib.rs | 1 + sled-agent/src/sled_agent.rs | 22 ++- sled-agent/src/storage_manager.rs | 1 + sled-agent/src/swap_device.rs | 3 - sled-hardware/src/disk.rs | 15 ++- 8 files changed, 242 insertions(+), 20 deletions(-) create mode 100644 sled-agent/src/backing_fs.rs diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index ba8cd8c84a..9118a9a3cd 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -61,6 +61,9 @@ enum EnsureFilesystemErrorRaw { #[error("Failed to mount encrypted filesystem: {0}")] MountEncryptedFsFailed(crate::ExecutionError), + + #[error("Failed to mount overlay filesystem: {0}")] + MountOverlayFsFailed(crate::ExecutionError), } /// Error returned by [`Zfs::ensure_filesystem`]. @@ -202,6 +205,7 @@ impl Zfs { /// Creates a new ZFS filesystem named `name`, unless one already exists. /// /// Applies an optional quota, provided _in bytes_. + #[allow(clippy::too_many_arguments)] pub fn ensure_filesystem( name: &str, mountpoint: Mountpoint, @@ -209,6 +213,7 @@ impl Zfs { do_format: bool, encryption_details: Option, size_details: Option, + additional_options: Option>, ) -> Result<(), EnsureFilesystemError> { let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?; if exists { @@ -261,7 +266,14 @@ impl Zfs { ]); } + if let Some(opts) = additional_options { + for o in &opts { + cmd.args(&["-o", &o]); + } + } + cmd.args(&["-o", &format!("mountpoint={}", mountpoint), name]); + execute(cmd).map_err(|err| EnsureFilesystemError { name: name.to_string(), mountpoint: mountpoint.clone(), @@ -322,6 +334,20 @@ impl Zfs { Ok(()) } + pub fn mount_overlay_dataset( + name: &str, + mountpoint: &Mountpoint, + ) -> Result<(), EnsureFilesystemError> { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[ZFS, "mount", "-O", name]); + execute(cmd).map_err(|err| EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + err: EnsureFilesystemErrorRaw::MountOverlayFsFailed(err), + })?; + Ok(()) + } + // Return (true, mounted) if the dataset exists, (false, false) otherwise, // where mounted is if the dataset is mounted. fn dataset_exists( @@ -385,7 +411,7 @@ impl Zfs { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } - fn get_value( + pub fn get_value( filesystem_name: &str, name: &str, ) -> Result { @@ -422,13 +448,12 @@ pub fn get_all_omicron_datasets_for_delete() -> anyhow::Result> { let internal = pool.kind() == crate::zpool::ZpoolKind::Internal; let pool = pool.to_string(); for dataset in &Zfs::list_datasets(&pool)? { - // Avoid erasing crashdump datasets on internal pools - if dataset == "crash" && internal { - continue; - } - - // The swap device might be in use, so don't assert that it can be deleted. - if dataset == "swap" && internal { + // Avoid erasing crashdump, backing data and swap datasets on + // internal pools. The swap device may be in use. + if internal + && (["crash", "backing", "swap"].contains(&dataset.as_str()) + || dataset.starts_with("backing/")) + { continue; } diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs new file mode 100644 index 0000000000..5014ac5999 --- /dev/null +++ b/sled-agent/src/backing_fs.rs @@ -0,0 +1,178 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Operations for dealing with persistent backing mounts for OS data + +// On Oxide hardware, the root filesystem is backed by a ramdisk and +// non-persistent. However, there are several things within the root filesystem +// which are useful to preserve across reboots, and these are backed persistent +// datasets on the boot disk. +// +// Each boot disk contains a dataset sled_hardware::disk::M2_BACKING_DATASET +// and for each backing mount, a child dataset is created under there that +// is configured with the desired mountpoint in the root filesystem. Since +// there are multiple disks which can be used to boot, these datasets are also +// marked with the "canmount=noauto" attribute so that they do not all try to +// mount automatically and race -- only one could ever succeed. This allows us +// to come along later and specifically mount the one that we want (the one from +// the current boot disk) and also perform an overlay mount so that it succeeds +// even if there is content from the ramdisk image or early boot services +// present underneath. The overlay mount action is optionally bracketed with a +// service stop/start. + +use camino::Utf8PathBuf; +use illumos_utils::zfs::{ + EnsureFilesystemError, GetValueError, Mountpoint, SizeDetails, Zfs, +}; + +#[derive(Debug, thiserror::Error)] +pub enum BackingFsError { + #[error("Error administering service: {0}")] + Adm(#[from] smf::AdmError), + + #[error("Error retrieving dataset property: {0}")] + DatasetProperty(#[from] GetValueError), + + #[error("Error initializing dataset: {0}")] + Mount(#[from] EnsureFilesystemError), +} + +struct BackingFs { + // Dataset name + name: &'static str, + // Mountpoint + mountpoint: &'static str, + // Optional quota, in _bytes_ + quota: Option, + // Optional compression mode + compression: Option<&'static str>, + // Linked service + service: Option<&'static str>, +} + +impl BackingFs { + const fn new(name: &'static str) -> Self { + Self { + name, + mountpoint: "legacy", + quota: None, + compression: None, + service: None, + } + } + + const fn mountpoint(mut self, mountpoint: &'static str) -> Self { + self.mountpoint = mountpoint; + self + } + + const fn quota(mut self, quota: usize) -> Self { + self.quota = Some(quota); + self + } + + const fn compression(mut self, compression: &'static str) -> Self { + self.compression = Some(compression); + self + } + + const fn service(mut self, service: &'static str) -> Self { + self.service = Some(service); + self + } +} + +const BACKING_FMD_DATASET: &'static str = "fmd"; +const BACKING_FMD_MOUNTPOINT: &'static str = "/var/fm/fmd"; +const BACKING_FMD_SERVICE: &'static str = "svc:/system/fmd:default"; +const BACKING_FMD_QUOTA: usize = 500 * (1 << 20); // 500 MiB + +const BACKING_COMPRESSION: &'static str = "on"; + +const BACKINGFS_COUNT: usize = 1; +static BACKINGFS: [BackingFs; BACKINGFS_COUNT] = + [BackingFs::new(BACKING_FMD_DATASET) + .mountpoint(BACKING_FMD_MOUNTPOINT) + .quota(BACKING_FMD_QUOTA) + .compression(BACKING_COMPRESSION) + .service(BACKING_FMD_SERVICE)]; + +/// Ensure that the backing filesystems are mounted. +/// If the underlying dataset for a backing fs does not exist on the specified +/// boot disk then it will be created. +pub(crate) fn ensure_backing_fs( + log: &slog::Logger, + boot_zpool_name: &illumos_utils::zpool::ZpoolName, +) -> Result<(), BackingFsError> { + let log = log.new(o!( + "component" => "BackingFs", + )); + for bfs in BACKINGFS.iter() { + info!(log, "Processing {}", bfs.name); + + let dataset = format!( + "{}/{}/{}", + boot_zpool_name, + sled_hardware::disk::M2_BACKING_DATASET, + bfs.name + ); + let mountpoint = Mountpoint::Path(Utf8PathBuf::from(bfs.mountpoint)); + + info!(log, "Ensuring dataset {}", dataset); + + let size_details = Some(SizeDetails { + quota: bfs.quota, + compression: bfs.compression, + }); + + Zfs::ensure_filesystem( + &dataset, + mountpoint.clone(), + false, // zoned + true, // do_format + None, // encryption_details, + size_details, + Some(vec!["canmount=noauto".to_string()]), // options + )?; + + // Check if a ZFS filesystem is already mounted on bfs.mountpoint by + // retrieving the ZFS `mountpoint` property and comparing it. This + // might seem counter-intuitive but if there is a filesystem mounted + // there, its mountpoint will match, and if not then we will retrieve + // the mountpoint of a higher level filesystem, such as '/'. If we + // can't retrieve the property at all, then there is definitely no ZFS + // filesystem mounted there - most likely we are running with a non-ZFS + // root, such as when net booted during CI. + if Zfs::get_value(&bfs.mountpoint, "mountpoint") + .unwrap_or("not-zfs".to_string()) + == bfs.mountpoint + { + info!(log, "{} is already mounted", bfs.mountpoint); + return Ok(()); + } + + if let Some(service) = bfs.service { + info!(log, "Stopping service {}", service); + smf::Adm::new() + .disable() + .temporary() + .synchronous() + .run(smf::AdmSelection::ByPattern(&[service]))?; + } + + info!(log, "Mounting {} on {}", dataset, mountpoint); + + Zfs::mount_overlay_dataset(&dataset, &mountpoint)?; + + if let Some(service) = bfs.service { + info!(log, "Starting service {}", service); + smf::Adm::new() + .enable() + .synchronous() + .run(smf::AdmSelection::ByPattern(&[service]))?; + } + } + + Ok(()) +} diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 0899bdd82f..71325fef3d 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -381,6 +381,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { do_format, encryption_details, quota, + None, ) .map_err(StartError::EnsureZfsRamdiskDataset) } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 5c4dbd8310..4e7921c605 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -17,6 +17,7 @@ pub mod sim; pub mod common; // Modules for the non-simulated sled agent. +mod backing_fs; pub mod bootstrap; pub mod config; mod http_entrypoints; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 7e62f6a8a7..5574edca55 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -59,9 +59,15 @@ use illumos_utils::{dladm::MockDladm as Dladm, zone::MockZones as Zones}; #[derive(thiserror::Error, Debug)] pub enum Error { + #[error("Could not find boot disk")] + BootDiskNotFound, + #[error("Configuration error: {0}")] Config(#[from] crate::config::ConfigError), + #[error("Error setting up backing filesystems: {0}")] + BackingFs(#[from] crate::backing_fs::BackingFsError), + #[error("Error setting up swap device: {0}")] SwapDevice(#[from] crate::swap_device::SwapDeviceError), @@ -268,14 +274,17 @@ impl SledAgent { )); info!(&log, "SledAgent::new(..) starting"); - // Configure a swap device of the configured size before other system setup. + let boot_disk = storage + .resources() + .boot_disk() + .await + .ok_or_else(|| Error::BootDiskNotFound)?; + + // Configure a swap device of the configured size before other system + // setup. match config.swap_device_size_gb { Some(sz) if sz > 0 => { info!(log, "Requested swap device of size {} GiB", sz); - let boot_disk = - storage.resources().boot_disk().await.ok_or_else(|| { - crate::swap_device::SwapDeviceError::BootDiskNotFound - })?; crate::swap_device::ensure_swap_device( &parent_log, &boot_disk.1, @@ -290,6 +299,9 @@ impl SledAgent { } } + info!(log, "Mounting backing filesystems"); + crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1)?; + // Ensure we have a thread that automatically reaps process contracts // when they become empty. See the comments in // illumos-utils/src/running_zone.rs for more detail. diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index bd71371396..c31a4dc0bc 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -417,6 +417,7 @@ impl StorageWorker { do_format, encryption_details, size_details, + None, )?; // Ensure the dataset has a usable UUID. if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { diff --git a/sled-agent/src/swap_device.rs b/sled-agent/src/swap_device.rs index 5a8f40adbd..6a00b42672 100644 --- a/sled-agent/src/swap_device.rs +++ b/sled-agent/src/swap_device.rs @@ -9,9 +9,6 @@ use zeroize::Zeroize; #[derive(Debug, thiserror::Error)] pub enum SwapDeviceError { - #[error("Could not find boot disk")] - BootDiskNotFound, - #[error("Error running ZFS command: {0}")] Zfs(illumos_utils::ExecutionError), diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index aec99ae3f8..e3078cbeea 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -256,6 +256,7 @@ pub const CRASH_DATASET: &'static str = "crash"; pub const CLUSTER_DATASET: &'static str = "cluster"; pub const CONFIG_DATASET: &'static str = "config"; pub const M2_DEBUG_DATASET: &'static str = "debug"; +pub const M2_BACKING_DATASET: &'static str = "backing"; // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be // tuned as needed. pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30); @@ -282,7 +283,7 @@ static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [ .compression(DUMP_DATASET_COMPRESSION), ]; -const M2_EXPECTED_DATASET_COUNT: usize = 5; +const M2_EXPECTED_DATASET_COUNT: usize = 6; static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ // Stores software images. // @@ -290,7 +291,11 @@ static M2_EXPECTED_DATASETS: [ExpectedDataset; M2_EXPECTED_DATASET_COUNT] = [ ExpectedDataset::new(INSTALL_DATASET), // Stores crash dumps. ExpectedDataset::new(CRASH_DATASET), - // Stores cluter configuration information. + // Backing store for OS data that should be persisted across reboots. + // Its children are selectively overlay mounted onto parts of the ramdisk + // root. + ExpectedDataset::new(M2_BACKING_DATASET), + // Stores cluster configuration information. // // Should be duplicated to both M.2s. ExpectedDataset::new(CLUSTER_DATASET), @@ -524,6 +529,7 @@ impl Disk { do_format, Some(encryption_details), None, + None, ); keyfile.zero_and_unlink().await.map_err(|error| { @@ -562,8 +568,8 @@ impl Disk { "Automatically destroying dataset: {}", name ); Zfs::destroy_dataset(name).or_else(|err| { - // If we can't find the dataset, that's fine -- it might - // not have been formatted yet. + // If we can't find the dataset, that's fine -- it + // might not have been formatted yet. if let DestroyDatasetErrorVariant::NotFound = err.err { @@ -588,6 +594,7 @@ impl Disk { do_format, encryption_details, size_details, + None, )?; if dataset.wipe { From 47a6b42c986c65292ee61b0c79090fa57dec5fe9 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Mon, 9 Oct 2023 16:35:01 -0500 Subject: [PATCH 13/23] [nexus] Add `/v1/ping` endpoint (#3925) Closes #3923 Adds `/v1/ping` that always returns `{ "status": "ok" }` if it returns anything at all. I went with `ping` over the initial `/v1/system/health` because the latter is vague about its meaning, whereas everyone know ping means a trivial request and response. I also thought it was weird to put an endpoint with no auth check under `/v1/system`, where ~all the other endpoints require fleet-level perms. This doesn't add too much over hitting an existing endpoint, but I think it's worth it because * It doesn't hit the DB * It has no auth check * It gives a very simple answer to "what endpoint should I use to ping the API?" (a question we have gotten at least once) * It's easy (I already did it) Questions that occurred to me while working through this: - Should we actually attempt to do something in the handler that would tell us, e.g., whether the DB is up? - No, that would be more than a ping - Raises DoS questions if not auth gated - Could add a db status endpoint or or you could use any endpoint that returns data - What tag should this be under? - Initially added a `system` tag because a) this doesn't fit under existing `system/blah` tags and b) it really does feel miscellaneous - Changed to `system/status`, with the idea that if we add other kinds of checks, they would be new endpoints under this tag. --- nexus/src/external_api/http_entrypoints.rs | 16 ++++++ nexus/src/external_api/tag-config.json | 6 ++ nexus/tests/integration_tests/basic.rs | 13 ++++- nexus/tests/integration_tests/endpoints.rs | 1 - nexus/tests/output/nexus_tags.txt | 4 ++ .../output/uncovered-authz-endpoints.txt | 1 + nexus/types/src/external_api/views.rs | 15 +++++ openapi/nexus.json | 57 +++++++++++++++++++ 8 files changed, 111 insertions(+), 2 deletions(-) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 6e614d5644..ac5cf76775 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -98,6 +98,8 @@ type NexusApiDescription = ApiDescription>; /// Returns a description of the external nexus API pub(crate) fn external_api() -> NexusApiDescription { fn register_endpoints(api: &mut NexusApiDescription) -> Result<(), String> { + api.register(ping)?; + api.register(system_policy_view)?; api.register(system_policy_update)?; @@ -364,6 +366,20 @@ pub(crate) fn external_api() -> NexusApiDescription { // clients. Client generators use operationId to name API methods, so changing // a function name is a breaking change from a client perspective. +/// Ping API +/// +/// Always responds with Ok if it responds at all. +#[endpoint { + method = GET, + path = "/v1/ping", + tags = ["system/status"], +}] +async fn ping( + _rqctx: RequestContext>, +) -> Result, HttpError> { + Ok(HttpResponseOk(views::Ping { status: views::PingStatus::Ok })) +} + /// Fetch the top-level IAM policy #[endpoint { method = GET, diff --git a/nexus/src/external_api/tag-config.json b/nexus/src/external_api/tag-config.json index e985ea7db4..07eb198016 100644 --- a/nexus/src/external_api/tag-config.json +++ b/nexus/src/external_api/tag-config.json @@ -80,6 +80,12 @@ "url": "http://docs.oxide.computer/api/vpcs" } }, + "system/status": { + "description": "Endpoints related to system health", + "external_docs": { + "url": "http://docs.oxide.computer/api/system-status" + } + }, "system/hardware": { "description": "These operations pertain to hardware inventory and management. Racks are the unit of expansion of an Oxide deployment. Racks are in turn composed of sleds, switches, power supplies, and a cabled backplane.", "external_docs": { diff --git a/nexus/tests/integration_tests/basic.rs b/nexus/tests/integration_tests/basic.rs index ab54c97197..282ec0cd96 100644 --- a/nexus/tests/integration_tests/basic.rs +++ b/nexus/tests/integration_tests/basic.rs @@ -10,7 +10,8 @@ use dropshot::HttpErrorResponseBody; use http::method::Method; use http::StatusCode; -use nexus_types::external_api::{params, views::Project}; +use nexus_types::external_api::params; +use nexus_types::external_api::views::{self, Project}; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::Name; @@ -546,3 +547,13 @@ async fn test_projects_list(cptestctx: &ControlPlaneTestContext) { .collect::>() ); } + +#[nexus_test] +async fn test_ping(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + let health = NexusRequest::object_get(client, "/v1/ping") + .execute_and_parse_unwrap::() + .await; + assert_eq!(health.status, views::PingStatus::Ok); +} diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index e04d26cc45..e9ae11c21f 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -1876,6 +1876,5 @@ lazy_static! { AllowedMethod::GetNonexistent ], }, - ]; } diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index ca2f737cb0..1d7f5556c2 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -172,6 +172,10 @@ silo_view GET /v1/system/silos/{silo} user_builtin_list GET /v1/system/users-builtin user_builtin_view GET /v1/system/users-builtin/{user} +API operations found with tag "system/status" +OPERATION ID METHOD URL PATH +ping GET /v1/ping + API operations found with tag "vpcs" OPERATION ID METHOD URL PATH vpc_create POST /v1/vpcs diff --git a/nexus/tests/output/uncovered-authz-endpoints.txt b/nexus/tests/output/uncovered-authz-endpoints.txt index 0e53222a8a..d76d9c5495 100644 --- a/nexus/tests/output/uncovered-authz-endpoints.txt +++ b/nexus/tests/output/uncovered-authz-endpoints.txt @@ -1,4 +1,5 @@ API endpoints with no coverage in authz tests: +ping (get "/v1/ping") device_auth_request (post "/device/auth") device_auth_confirm (post "/device/confirm") device_access_token (post "/device/token") diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 4b30b0be1c..ef3835c618 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -522,3 +522,18 @@ pub struct UpdateDeployment { pub version: SemverVersion, pub status: UpdateStatus, } + +// SYSTEM HEALTH + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum PingStatus { + Ok, +} + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)] +pub struct Ping { + /// Whether the external API is reachable. Will always be Ok if the endpoint + /// returns anything at all. + pub status: PingStatus, +} diff --git a/openapi/nexus.json b/openapi/nexus.json index 9330b0ef47..9dda94f283 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2816,6 +2816,34 @@ } } }, + "/v1/ping": { + "get": { + "tags": [ + "system/status" + ], + "summary": "Ping API", + "description": "Always responds with Ok if it responds at all.", + "operationId": "ping", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Ping" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/policy": { "get": { "tags": [ @@ -12031,6 +12059,28 @@ "items" ] }, + "Ping": { + "type": "object", + "properties": { + "status": { + "description": "Whether the external API is reachable. Will always be Ok if the endpoint returns anything at all.", + "allOf": [ + { + "$ref": "#/components/schemas/PingStatus" + } + ] + } + }, + "required": [ + "status" + ] + }, + "PingStatus": { + "type": "string", + "enum": [ + "ok" + ] + }, "Project": { "description": "View of a Project", "type": "object", @@ -15277,6 +15327,13 @@ "url": "http://docs.oxide.computer/api/system-silos" } }, + { + "name": "system/status", + "description": "Endpoints related to system health", + "externalDocs": { + "url": "http://docs.oxide.computer/api/system-status" + } + }, { "name": "system/update" }, From d9d39531991cc8843ef38c4d0afc03afe1a58722 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 10 Oct 2023 12:19:23 -0400 Subject: [PATCH 14/23] Do not double count region snapshots records! (#4095) `decrease_crucible_resource_count_and_soft_delete_volume` does not disambiguate cases where the snapshot_addr of a region_snapshot is duplicated with another one, which can occur due to the Crucible Agent reclaiming ports from destroyed daemons (see also #4049, which makes the simulated Crucible agent do this). Several invocations of the snapshot create and snapshot delete sagas could race in such a way that one of these ports would be reclaimed, and then be used in a different snapshot, and the lifetime of both of these would overlap! This would confuse our reference counting, which was written with a naive assumption that this port reuse **wouldn't** occur with these overlapping lifetimes. Spoiler alert, it can: root@[fd00:1122:3344:101::3]:32221/omicron> select * from region_snapshot where snapshot_addr = '[fd00:1122:3344:102::7]:19016'; dataset_id | region_id | snapshot_id | snapshot_addr | volume_references ---------------------------------------+--------------------------------------+--------------------------------------+-------------------------------+-------------------- 80790bfd-4b81-4381-9262-20912e3826cc | 0387bbb7-1d54-4683-943c-6c17d6804de9 | 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 80790bfd-4b81-4381-9262-20912e3826cc | ff20e066-8815-4eb6-ac84-fab9b9103462 | bdd9614e-f089-4a94-ae46-e10b96b79ba3 | [fd00:1122:3344:102::7]:19016 | 0 (2 rows) One way to solve this would be to create a UNIQUE INDEX on `snapshot_addr` here, but then in these cases the snapshot creation would return a 500 error to the user. This commit adds a sixth column: `deleting`, a boolean that is true when the region snapshot is part of a volume's `resources_to_clean_up`, and false otherwise. This is used to select (as part of the transaction for `decrease_crucible_resource_count_and_soft_delete_volume`) only the region_snapshot records that were decremented as part of that transaction, and skip re-deleting them otherwise. This works because the overlapping lifetime of the records in the DB is **not** the overlapping lifetime of the actual read-only downstairs daemon: for the port to be reclaimed, the original daemon has to be DELETEd, which happens after the decrement transaction has already computed which resources to clean up: 1) a snapshot record is created: ``` snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 | false | ``` 2) it is incremented as part of `volume_create`: ``` snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 1 | false | ``` 3) when the volume is deleted, then the decrement transaction will: a) decrease `volume_references` by 1 ``` snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 | false | ``` b) note any `region_snapshot` records whose `volume_references` went to 0 and have `deleted` = false, and return those in the list of resources to clean up: [ 1a800928-8f93-4cd3-9df1-4129582ffc20 ] c) set deleted = true for any region_snapshot records whose `volume_references` went to 0 and have deleted = false ``` snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 | true | ``` 4) That read-only snapshot daemon is DELETEd, freeing up the port. Another snapshot creation occurs, using that reclaimed port: ``` snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 | true | bdd9614e-f089-4a94-ae46-e10b96b79ba3 | [fd00:1122:3344:102::7]:19016 | 0 | false | ``` 5) That new snapshot is incremented as part of `volume_create`: ``` snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 | true | bdd9614e-f089-4a94-ae46-e10b96b79ba3 | [fd00:1122:3344:102::7]:19016 | 1 | false | ``` 6) It is later deleted, and the decrement transaction will: a) decrease `volume_references` by 1: ``` j snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 | true | bdd9614e-f089-4a94-ae46-e10b96b79ba3 | [fd00:1122:3344:102::7]:19016 | 0 | false | ``` b) note any `region_snapshot` records whose `volume_references` went to 0 and have `deleted` = false, and return those in the list of resources to clean up: [ bdd9614e-f089-4a94-ae46-e10b96b79ba3 ] c) set deleted = true for any region_snapshot records whose `volume_references` went to 0 and have deleted = false ``` snapshot_id | snapshot_addr | volume_references | deleted | -------------------------------------+-------------------------------+-------------------+---------- 1a800928-8f93-4cd3-9df1-4129582ffc20 | [fd00:1122:3344:102::7]:19016 | 0 | true | bdd9614e-f089-4a94-ae46-e10b96b79ba3 | [fd00:1122:3344:102::7]:19016 | 0 | true | ``` --- dev-tools/omdb/tests/env.out | 6 +- dev-tools/omdb/tests/successes.out | 12 +- nexus/db-model/src/region_snapshot.rs | 3 + nexus/db-model/src/schema.rs | 3 +- nexus/db-queries/src/db/datastore/dataset.rs | 16 + .../src/db/datastore/region_snapshot.rs | 23 ++ nexus/db-queries/src/db/datastore/volume.rs | 100 +++--- nexus/src/app/sagas/snapshot_create.rs | 1 + nexus/src/app/sagas/volume_delete.rs | 177 ++++++---- nexus/tests/integration_tests/snapshots.rs | 36 +- .../integration_tests/volume_management.rs | 308 ++++++++++++++++++ schema/crdb/6.0.0/up1.sql | 1 + schema/crdb/6.0.0/up2.sql | 1 + schema/crdb/dbinit.sql | 5 +- 14 files changed, 563 insertions(+), 129 deletions(-) create mode 100644 schema/crdb/6.0.0/up1.sql create mode 100644 schema/crdb/6.0.0/up2.sql diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index eb4cd0d32d..07a6d3fae5 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -7,7 +7,7 @@ sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] termination: Exited(2) @@ -172,7 +172,7 @@ stderr: note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] termination: Exited(0) @@ -185,5 +185,5 @@ stderr: note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index eb075a84ea..038f365e8e 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -8,7 +8,7 @@ external oxide-dev.test 2 create silo: "tes --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "diff", "external", "2"] termination: Exited(0) @@ -24,7 +24,7 @@ changes: names added: 1, names removed: 0 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "dns", "names", "external", "2"] termination: Exited(0) @@ -36,7 +36,7 @@ External zone: oxide-dev.test --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] termination: Exited(0) @@ -52,7 +52,7 @@ Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_ --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] termination: Exited(0) @@ -71,7 +71,7 @@ sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["db", "sleds"] termination: Exited(0) @@ -82,7 +82,7 @@ sim-b6d65341 [::1]:REDACTED_PORT - REDACTED_UUID_REDACTED_UUID_REDACTED --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected (5.0.0) +note: database schema version matches expected (6.0.0) ============================================= EXECUTING COMMAND: omdb ["mgs", "inventory"] termination: Exited(0) diff --git a/nexus/db-model/src/region_snapshot.rs b/nexus/db-model/src/region_snapshot.rs index 9addeb83e3..af1cf8b2b3 100644 --- a/nexus/db-model/src/region_snapshot.rs +++ b/nexus/db-model/src/region_snapshot.rs @@ -32,4 +32,7 @@ pub struct RegionSnapshot { // how many volumes reference this? pub volume_references: i64, + + // true if part of a volume's `resources_to_clean_up` already + pub deleting: bool, } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 94a770e2ca..0165ab1568 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -856,6 +856,7 @@ table! { snapshot_id -> Uuid, snapshot_addr -> Text, volume_references -> Int8, + deleting -> Bool, } } @@ -1130,7 +1131,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(5, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(6, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index 99972459c8..0b26789e8f 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -13,15 +13,31 @@ use crate::db::error::ErrorHandler; use crate::db::identity::Asset; use crate::db::model::Dataset; use crate::db::model::Zpool; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; use omicron_common::api::external::CreateResult; use omicron_common::api::external::Error; +use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use uuid::Uuid; impl DataStore { + pub async fn dataset_get(&self, dataset_id: Uuid) -> LookupResult { + use db::schema::dataset::dsl; + + dsl::dataset + .filter(dsl::id.eq(dataset_id)) + .select(Dataset::as_select()) + .first_async::( + &*self.pool_connection_unauthorized().await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Stores a new dataset in the database. pub async fn dataset_upsert( &self, diff --git a/nexus/db-queries/src/db/datastore/region_snapshot.rs b/nexus/db-queries/src/db/datastore/region_snapshot.rs index 0a707e4504..148cfe4812 100644 --- a/nexus/db-queries/src/db/datastore/region_snapshot.rs +++ b/nexus/db-queries/src/db/datastore/region_snapshot.rs @@ -10,9 +10,11 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::RegionSnapshot; use async_bb8_diesel::AsyncRunQueryDsl; +use async_bb8_diesel::OptionalExtension; use diesel::prelude::*; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::LookupResult; use uuid::Uuid; impl DataStore { @@ -31,6 +33,27 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + pub async fn region_snapshot_get( + &self, + dataset_id: Uuid, + region_id: Uuid, + snapshot_id: Uuid, + ) -> LookupResult> { + use db::schema::region_snapshot::dsl; + + dsl::region_snapshot + .filter(dsl::dataset_id.eq(dataset_id)) + .filter(dsl::region_id.eq(region_id)) + .filter(dsl::snapshot_id.eq(snapshot_id)) + .select(RegionSnapshot::as_select()) + .first_async::( + &*self.pool_connection_unauthorized().await?, + ) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + pub async fn region_snapshot_remove( &self, dataset_id: Uuid, diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index b3e82886de..b97b8451cf 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -119,6 +119,7 @@ impl DataStore { .filter( rs_dsl::snapshot_addr.eq(read_only_target.clone()), ) + .filter(rs_dsl::deleting.eq(false)) .set( rs_dsl::volume_references .eq(rs_dsl::volume_references + 1), @@ -573,9 +574,7 @@ impl DataStore { // multiple times, and that is done by soft-deleting the volume during // the transaction, and returning the previously serialized list of // resources to clean up if a soft-delete has already occurred. - // - // TODO it would be nice to make this transaction_async, but I couldn't - // get the async optional extension to work. + self.pool_connection_unauthorized() .await? .transaction_async(|conn| async move { @@ -639,7 +638,9 @@ impl DataStore { } }; - // Decrease the number of uses for each referenced region snapshot. + // Decrease the number of uses for each non-deleted referenced + // region snapshot. + use db::schema::region_snapshot::dsl; diesel::update(dsl::region_snapshot) @@ -647,12 +648,40 @@ impl DataStore { dsl::snapshot_addr .eq_any(crucible_targets.read_only_targets.clone()), ) + .filter(dsl::volume_references.gt(0)) + .filter(dsl::deleting.eq(false)) .set(dsl::volume_references.eq(dsl::volume_references - 1)) .execute_async(&conn) .await?; + // Then, note anything that was set to zero from the above + // UPDATE, and then mark all those as deleted. + let snapshots_to_delete: Vec = + dsl::region_snapshot + .filter( + dsl::snapshot_addr.eq_any( + crucible_targets.read_only_targets.clone(), + ), + ) + .filter(dsl::volume_references.eq(0)) + .filter(dsl::deleting.eq(false)) + .select(RegionSnapshot::as_select()) + .load_async(&conn) + .await?; + + diesel::update(dsl::region_snapshot) + .filter( + dsl::snapshot_addr + .eq_any(crucible_targets.read_only_targets.clone()), + ) + .filter(dsl::volume_references.eq(0)) + .filter(dsl::deleting.eq(false)) + .set(dsl::deleting.eq(true)) + .execute_async(&conn) + .await?; + // Return what results can be cleaned up - let result = CrucibleResources::V1(CrucibleResourcesV1 { + let result = CrucibleResources::V2(CrucibleResourcesV2 { // The only use of a read-write region will be at the top level of a // Volume. These are not shared, but if any snapshots are taken this // will prevent deletion of the region. Filter out any regions that @@ -681,6 +710,7 @@ impl DataStore { .eq(0) // Despite the SQL specifying that this column is NOT NULL, // this null check is required for this function to work! + // The left join of region_snapshot might cause a null here. .or(dsl::volume_references.is_null()), ) .select((Dataset::as_select(), Region::as_select())) @@ -688,46 +718,17 @@ impl DataStore { .await? }, - // A volume (for a disk or snapshot) may reference another nested - // volume as a read-only parent, and this may be arbitrarily deep. - // After decrementing volume_references above, get the region - // snapshot records for these read_only_targets where the - // volume_references has gone to 0. Consumers of this struct will - // be responsible for deleting the read-only downstairs running - // for the snapshot and the snapshot itself. - datasets_and_snapshots: { - use db::schema::dataset::dsl as dataset_dsl; - - dsl::region_snapshot - // Only return region_snapshot records related to - // this volume that have zero references. This will - // only happen one time, on the last decrease of a - // volume containing these read-only targets. - // - // It's important to not return *every* region - // snapshot with zero references: multiple volume - // delete sub-sagas will then be issues duplicate - // DELETE calls to Crucible agents, and a request to - // delete a read-only downstairs running for a - // snapshot that doesn't exist will return a 404, - // causing the saga to error and unwind. - .filter(dsl::snapshot_addr.eq_any( - crucible_targets.read_only_targets.clone(), - )) - .filter(dsl::volume_references.eq(0)) - .inner_join( - dataset_dsl::dataset - .on(dsl::dataset_id.eq(dataset_dsl::id)), - ) - .select(( - Dataset::as_select(), - RegionSnapshot::as_select(), - )) - .get_results_async::<(Dataset, RegionSnapshot)>( - &conn, - ) - .await? - }, + // Consumers of this struct will be responsible for deleting + // the read-only downstairs running for the snapshot and the + // snapshot itself. + // + // It's important to not return *every* region snapshot with + // zero references: multiple volume delete sub-sagas will + // then be issues duplicate DELETE calls to Crucible agents, + // and a request to delete a read-only downstairs running + // for a snapshot that doesn't exist will return a 404, + // causing the saga to error and unwind. + snapshots_to_delete, }); // Soft delete this volume, and serialize the resources that are to @@ -967,7 +968,7 @@ impl DataStore { #[derive(Default, Debug, Serialize, Deserialize)] pub struct CrucibleTargets { - read_only_targets: Vec, + pub read_only_targets: Vec, } // Serialize this enum into the `resources_to_clean_up` column to handle @@ -975,6 +976,7 @@ pub struct CrucibleTargets { #[derive(Debug, Serialize, Deserialize)] pub enum CrucibleResources { V1(CrucibleResourcesV1), + V2(CrucibleResourcesV2), } #[derive(Debug, Default, Serialize, Deserialize)] @@ -983,6 +985,12 @@ pub struct CrucibleResourcesV1 { pub datasets_and_snapshots: Vec<(Dataset, RegionSnapshot)>, } +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct CrucibleResourcesV2 { + pub datasets_and_regions: Vec<(Dataset, Region)>, + pub snapshots_to_delete: Vec, +} + /// Return the targets from a VolumeConstructionRequest. /// /// The targets of a volume construction request map to resources. diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index eeabf64894..9c8a33fb17 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1280,6 +1280,7 @@ async fn ssc_start_running_snapshot( snapshot_id, snapshot_addr, volume_references: 0, // to be filled later + deleting: false, }) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/volume_delete.rs b/nexus/src/app/sagas/volume_delete.rs index 4cd633f575..d6358d5435 100644 --- a/nexus/src/app/sagas/volume_delete.rs +++ b/nexus/src/app/sagas/volume_delete.rs @@ -155,39 +155,39 @@ async fn svd_delete_crucible_regions( sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - match crucible_resources_to_delete { + let datasets_and_regions = match crucible_resources_to_delete { CrucibleResources::V1(crucible_resources_to_delete) => { - delete_crucible_regions( - log, - crucible_resources_to_delete.datasets_and_regions.clone(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to delete_crucible_regions: {:?}", - e, - )) - })?; + crucible_resources_to_delete.datasets_and_regions + } - // Remove DB records - let region_ids_to_delete = crucible_resources_to_delete - .datasets_and_regions - .iter() - .map(|(_, r)| r.id()) - .collect(); - - osagactx - .datastore() - .regions_hard_delete(log, region_ids_to_delete) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to regions_hard_delete: {:?}", - e, - )) - })?; + CrucibleResources::V2(crucible_resources_to_delete) => { + crucible_resources_to_delete.datasets_and_regions } - } + }; + + delete_crucible_regions(log, datasets_and_regions.clone()).await.map_err( + |e| { + ActionError::action_failed(format!( + "failed to delete_crucible_regions: {:?}", + e, + )) + }, + )?; + + // Remove DB records + let region_ids_to_delete = + datasets_and_regions.iter().map(|(_, r)| r.id()).collect(); + + osagactx + .datastore() + .regions_hard_delete(log, region_ids_to_delete) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to regions_hard_delete: {:?}", + e, + )) + })?; Ok(()) } @@ -202,26 +202,46 @@ async fn svd_delete_crucible_running_snapshots( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); let crucible_resources_to_delete = sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - match crucible_resources_to_delete { + let datasets_and_snapshots = match crucible_resources_to_delete { CrucibleResources::V1(crucible_resources_to_delete) => { - delete_crucible_running_snapshots( - log, - crucible_resources_to_delete.datasets_and_snapshots.clone(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to delete_crucible_running_snapshots: {:?}", - e, - )) - })?; + crucible_resources_to_delete.datasets_and_snapshots } - } + + CrucibleResources::V2(crucible_resources_to_delete) => { + let mut datasets_and_snapshots: Vec<_> = Vec::with_capacity( + crucible_resources_to_delete.snapshots_to_delete.len(), + ); + + for region_snapshot in + crucible_resources_to_delete.snapshots_to_delete + { + let dataset = osagactx + .datastore() + .dataset_get(region_snapshot.dataset_id) + .await + .map_err(ActionError::action_failed)?; + + datasets_and_snapshots.push((dataset, region_snapshot)); + } + + datasets_and_snapshots + } + }; + + delete_crucible_running_snapshots(log, datasets_and_snapshots.clone()) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to delete_crucible_running_snapshots: {:?}", + e, + )) + })?; Ok(()) } @@ -235,26 +255,46 @@ async fn svd_delete_crucible_snapshots( sagactx: NexusActionContext, ) -> Result<(), ActionError> { let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); let crucible_resources_to_delete = sagactx.lookup::("crucible_resources_to_delete")?; // Send DELETE calls to the corresponding Crucible agents - match crucible_resources_to_delete { + let datasets_and_snapshots = match crucible_resources_to_delete { CrucibleResources::V1(crucible_resources_to_delete) => { - delete_crucible_snapshots( - log, - crucible_resources_to_delete.datasets_and_snapshots.clone(), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "failed to delete_crucible_snapshots: {:?}", - e, - )) - })?; + crucible_resources_to_delete.datasets_and_snapshots } - } + + CrucibleResources::V2(crucible_resources_to_delete) => { + let mut datasets_and_snapshots: Vec<_> = Vec::with_capacity( + crucible_resources_to_delete.snapshots_to_delete.len(), + ); + + for region_snapshot in + crucible_resources_to_delete.snapshots_to_delete + { + let dataset = osagactx + .datastore() + .dataset_get(region_snapshot.dataset_id) + .await + .map_err(ActionError::action_failed)?; + + datasets_and_snapshots.push((dataset, region_snapshot)); + } + + datasets_and_snapshots + } + }; + + delete_crucible_snapshots(log, datasets_and_snapshots.clone()) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to delete_crucible_snapshots: {:?}", + e, + )) + })?; Ok(()) } @@ -293,6 +333,31 @@ async fn svd_delete_crucible_snapshot_records( })?; } } + + CrucibleResources::V2(crucible_resources_to_delete) => { + // Remove DB records + for region_snapshot in + &crucible_resources_to_delete.snapshots_to_delete + { + osagactx + .datastore() + .region_snapshot_remove( + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + ) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to region_snapshot_remove {} {} {}: {:?}", + region_snapshot.dataset_id, + region_snapshot.region_id, + region_snapshot.snapshot_id, + e, + )) + })?; + } + } } Ok(()) diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index d212175415..68f4cdadd2 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -1094,6 +1094,7 @@ async fn test_region_snapshot_create_idempotent( snapshot_addr: "[::]:12345".to_string(), volume_references: 1, + deleting: false, }; datastore.region_snapshot_create(region_snapshot.clone()).await.unwrap(); @@ -1287,13 +1288,16 @@ async fn test_multiple_deletes_not_sent(cptestctx: &ControlPlaneTestContext) { .unwrap(); let resources_1 = match resources_1 { - db::datastore::CrucibleResources::V1(resources_1) => resources_1, + db::datastore::CrucibleResources::V1(_) => panic!("using old style!"), + db::datastore::CrucibleResources::V2(resources_1) => resources_1, }; let resources_2 = match resources_2 { - db::datastore::CrucibleResources::V1(resources_2) => resources_2, + db::datastore::CrucibleResources::V1(_) => panic!("using old style!"), + db::datastore::CrucibleResources::V2(resources_2) => resources_2, }; let resources_3 = match resources_3 { - db::datastore::CrucibleResources::V1(resources_3) => resources_3, + db::datastore::CrucibleResources::V1(_) => panic!("using old style!"), + db::datastore::CrucibleResources::V2(resources_3) => resources_3, }; // No region deletions yet, these are just snapshot deletes @@ -1304,24 +1308,24 @@ async fn test_multiple_deletes_not_sent(cptestctx: &ControlPlaneTestContext) { // But there are snapshots to delete - assert!(!resources_1.datasets_and_snapshots.is_empty()); - assert!(!resources_2.datasets_and_snapshots.is_empty()); - assert!(!resources_3.datasets_and_snapshots.is_empty()); + assert!(!resources_1.snapshots_to_delete.is_empty()); + assert!(!resources_2.snapshots_to_delete.is_empty()); + assert!(!resources_3.snapshots_to_delete.is_empty()); - // Assert there are no overlaps in the datasets_and_snapshots to delete. + // Assert there are no overlaps in the snapshots_to_delete to delete. - for tuple in &resources_1.datasets_and_snapshots { - assert!(!resources_2.datasets_and_snapshots.contains(tuple)); - assert!(!resources_3.datasets_and_snapshots.contains(tuple)); + for tuple in &resources_1.snapshots_to_delete { + assert!(!resources_2.snapshots_to_delete.contains(tuple)); + assert!(!resources_3.snapshots_to_delete.contains(tuple)); } - for tuple in &resources_2.datasets_and_snapshots { - assert!(!resources_1.datasets_and_snapshots.contains(tuple)); - assert!(!resources_3.datasets_and_snapshots.contains(tuple)); + for tuple in &resources_2.snapshots_to_delete { + assert!(!resources_1.snapshots_to_delete.contains(tuple)); + assert!(!resources_3.snapshots_to_delete.contains(tuple)); } - for tuple in &resources_3.datasets_and_snapshots { - assert!(!resources_1.datasets_and_snapshots.contains(tuple)); - assert!(!resources_2.datasets_and_snapshots.contains(tuple)); + for tuple in &resources_3.snapshots_to_delete { + assert!(!resources_1.snapshots_to_delete.contains(tuple)); + assert!(!resources_2.snapshots_to_delete.contains(tuple)); } } diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 70d34fb778..e263593def 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -19,6 +19,7 @@ use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::views; +use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -1813,6 +1814,313 @@ async fn test_volume_checkout_updates_sparse_mid_multiple_gen( volume_match_gen(new_vol, vec![Some(8), None, Some(10)]); } +/// Test that the Crucible agent's port reuse does not confuse +/// `decrease_crucible_resource_count_and_soft_delete_volume`, due to the +/// `[ipv6]:port` targets being reused. +#[nexus_test] +async fn test_keep_your_targets_straight(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + + // Four zpools, one dataset each + let mut disk_test = DiskTest::new(&cptestctx).await; + disk_test + .add_zpool_with_dataset(&cptestctx, DiskTest::DEFAULT_ZPOOL_SIZE_GIB) + .await; + + // This bug occurs when region_snapshot records share a snapshot_addr, so + // insert those here manually. + + // (dataset_id, region_id, snapshot_id, snapshot_addr) + let region_snapshots = vec![ + // first snapshot-create + ( + disk_test.zpools[0].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:101:7]:19016"), + ), + ( + disk_test.zpools[1].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:102:7]:19016"), + ), + ( + disk_test.zpools[2].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:103:7]:19016"), + ), + // second snapshot-create + ( + disk_test.zpools[0].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:101:7]:19016"), // duplicate! + ), + ( + disk_test.zpools[3].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:104:7]:19016"), + ), + ( + disk_test.zpools[2].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:103:7]:19017"), + ), + ]; + + // First, three `region_snapshot` records created in the snapshot-create + // saga, which are then used to make snapshot's volume construction request + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, snapshot_addr) = + ®ion_snapshots[i]; + datastore + .region_snapshot_create(nexus_db_model::RegionSnapshot { + dataset_id: *dataset_id, + region_id: *region_id, + snapshot_id: *snapshot_id, + snapshot_addr: snapshot_addr.clone(), + volume_references: 0, + deleting: false, + }) + .await + .unwrap(); + } + + let volume_id = Uuid::new_v4(); + let volume = datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + gen: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + region_snapshots[0].3.clone(), + region_snapshots[1].3.clone(), + region_snapshots[2].3.clone(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }) + .unwrap(), + )) + .await + .unwrap(); + + // Sanity check + + assert_eq!(volume.id(), volume_id); + + // Make sure the volume has only three read-only targets: + + let crucible_targets = datastore + .read_only_resources_associated_with_volume(volume_id) + .await + .unwrap(); + assert_eq!(crucible_targets.read_only_targets.len(), 3); + + // Also validate the volume's region_snapshots got incremented by + // volume_create + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 1); + assert_eq!(region_snapshot.deleting, false); + } + + // Soft delete the volume, and validate that only three region_snapshot + // records are returned. + + let cr = datastore + .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) + .await + .unwrap(); + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + + match cr { + nexus_db_queries::db::datastore::CrucibleResources::V1(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.datasets_and_snapshots.len(), 3); + } + + nexus_db_queries::db::datastore::CrucibleResources::V2(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.snapshots_to_delete.len(), 3); + } + } + + // Now, let's say we're at a spot where the running snapshots have been + // deleted, but before volume_hard_delete or region_snapshot_remove are + // called. Pretend another snapshot-create and snapshot-delete snuck in + // here, and the second snapshot hits a agent that reuses the first target. + + for i in 3..6 { + let (dataset_id, region_id, snapshot_id, snapshot_addr) = + ®ion_snapshots[i]; + datastore + .region_snapshot_create(nexus_db_model::RegionSnapshot { + dataset_id: *dataset_id, + region_id: *region_id, + snapshot_id: *snapshot_id, + snapshot_addr: snapshot_addr.clone(), + volume_references: 0, + deleting: false, + }) + .await + .unwrap(); + } + + let volume_id = Uuid::new_v4(); + let volume = datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + gen: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + region_snapshots[3].3.clone(), + region_snapshots[4].3.clone(), + region_snapshots[5].3.clone(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }) + .unwrap(), + )) + .await + .unwrap(); + + // Sanity check + + assert_eq!(volume.id(), volume_id); + + // Make sure the volume has only three read-only targets: + + let crucible_targets = datastore + .read_only_resources_associated_with_volume(volume_id) + .await + .unwrap(); + assert_eq!(crucible_targets.read_only_targets.len(), 3); + + // Also validate only the volume's region_snapshots got incremented by + // volume_create. + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + for i in 3..6 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 1); + assert_eq!(region_snapshot.deleting, false); + } + + // Soft delete the volume, and validate that only three region_snapshot + // records are returned. + + let cr = datastore + .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) + .await + .unwrap(); + + // Make sure every region_snapshot is now 0, and deleting + + for i in 0..6 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + + match cr { + nexus_db_queries::db::datastore::CrucibleResources::V1(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.datasets_and_snapshots.len(), 3); + } + + nexus_db_queries::db::datastore::CrucibleResources::V2(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.snapshots_to_delete.len(), 3); + } + } +} + #[nexus_test] async fn test_disk_create_saga_unwinds_correctly( cptestctx: &ControlPlaneTestContext, diff --git a/schema/crdb/6.0.0/up1.sql b/schema/crdb/6.0.0/up1.sql new file mode 100644 index 0000000000..4a3cdc302e --- /dev/null +++ b/schema/crdb/6.0.0/up1.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.region_snapshot ADD COLUMN IF NOT EXISTS deleting BOOL NOT NULL DEFAULT false; diff --git a/schema/crdb/6.0.0/up2.sql b/schema/crdb/6.0.0/up2.sql new file mode 100644 index 0000000000..77c136a3bf --- /dev/null +++ b/schema/crdb/6.0.0/up2.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.region_snapshot ALTER COLUMN deleting DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index ad09092f8f..a62cbae5ea 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -505,6 +505,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot ( /* How many volumes reference this? */ volume_references INT8 NOT NULL, + /* Is this currently part of some resources_to_delete? */ + deleting BOOL NOT NULL, + PRIMARY KEY (dataset_id, region_id, snapshot_id) ); @@ -2574,7 +2577,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '5.0.0', NULL) + ( TRUE, NOW(), NOW(), '6.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 3e9f46c057b223ad390c742f882ef05e09366b77 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Tue, 10 Oct 2023 12:57:14 -0700 Subject: [PATCH 15/23] update softnpu version (#4227) This pulls in a new version of the `npuzone` tool from the softnpu repo that automatically pulls the latest sidecar-lite code. --- tools/ci_download_softnpu_machinery | 2 +- tools/create_virtual_hardware.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index d37d428476..7975a310f0 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -15,7 +15,7 @@ OUT_DIR="out/npuzone" # Pinned commit for softnpu ASIC simulator SOFTNPU_REPO="softnpu" -SOFTNPU_COMMIT="41b3a67b3d44f51528816ff8e539b4001df48305" +SOFTNPU_COMMIT="eb27e6a00f1082c9faac7cf997e57d0609f7a309" # This is the softnpu ASIC simulator echo "fetching npuzone" diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index dd6d9af9dd..95c2aa63df 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -37,7 +37,7 @@ function ensure_simulated_links { dladm create-simnet -t "net$I" dladm create-simnet -t "sc${I}_0" dladm modify-simnet -t -p "net$I" "sc${I}_0" - dladm set-linkprop -p mtu=1600 "sc${I}_0" # encap headroom + dladm set-linkprop -p mtu=9000 "sc${I}_0" # match emulated devices fi success "Simnet net$I/sc${I}_0 exists" done From 97ddc7da3a5cdbded9097827f90151980755c1e4 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 10 Oct 2023 13:12:23 -0700 Subject: [PATCH 16/23] [dependencies] add Renovate config (#4236) * Add configuration for automatically creating dependencies, and for pinning GitHub Actions digests * Add a post-upgrade script that runs cargo-hakari. Depends on https://github.com/oxidecomputer/renovate-config/pull/5. See [RFD 434](https://rfd.shared.oxide.computer/rfd/0434) and #4166. --- .github/renovate.json | 9 ++++++++ tools/renovate-post-upgrade.sh | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 .github/renovate.json create mode 100755 tools/renovate-post-upgrade.sh diff --git a/.github/renovate.json b/.github/renovate.json new file mode 100644 index 0000000000..405a3e282b --- /dev/null +++ b/.github/renovate.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "local>oxidecomputer/renovate-config", + "local>oxidecomputer/renovate-config//rust/autocreate", + "local>oxidecomputer/renovate-config:post-upgrade", + "helpers:pinGitHubActionDigests" + ] +} diff --git a/tools/renovate-post-upgrade.sh b/tools/renovate-post-upgrade.sh new file mode 100755 index 0000000000..c21832e0a9 --- /dev/null +++ b/tools/renovate-post-upgrade.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# This script is run after Renovate upgrades dependencies or lock files. + +set -euo pipefail + +# Function to retry a command up to 3 times. +function retry_command { + local retries=3 + local delay=5 + local count=0 + until "$@"; do + exit_code=$? + count=$((count+1)) + if [ $count -lt $retries ]; then + echo "Command failed with exit code $exit_code. Retrying in $delay seconds..." + sleep $delay + else + echo "Command failed with exit code $exit_code after $count attempts." + return $exit_code + fi + done +} + +# Download and install cargo-hakari if it is not already installed. +if ! command -v cargo-hakari &> /dev/null; then + # Need cargo-binstall to install cargo-hakari. + if ! command -v cargo-binstall &> /dev/null; then + # Fetch cargo binstall. + echo "Installing cargo-binstall..." + curl --retry 3 -L --proto '=https' --tlsv1.2 -sSfO https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh + retry_command bash install-from-binstall-release.sh + fi + + # Install cargo-hakari. + echo "Installing cargo-hakari..." + retry_command cargo binstall cargo-hakari --no-confirm +fi + +# Run cargo hakari to regenerate the workspace-hack file. +echo "Running cargo-hakari..." +cargo hakari generate From 72a0429debfaf4feeec2f952fefe3ffbffeb06f6 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 10 Oct 2023 17:50:06 -0700 Subject: [PATCH 17/23] [update-engine] fix buffer tests (#4163) Apparently I'd made a couple of mistakes while writing tests: * I was adding all events a second time by accident, which was hiding the fact that... * A couple not signs were flipped, whoops. --- update-engine/src/buffer.rs | 46 ++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 3de0e45f24..1779ef7da6 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -1389,7 +1389,10 @@ mod tests { test_cx .run_filtered_test( "all events passed in", - |buffer, event| buffer.add_event(event.clone()), + |buffer, event| { + buffer.add_event(event.clone()); + true + }, WithDeltas::No, ) .unwrap(); @@ -1397,10 +1400,12 @@ mod tests { test_cx .run_filtered_test( "progress events skipped", - |buffer, event| { - if let Event::Step(event) = event { + |buffer, event| match event { + Event::Step(event) => { buffer.add_step_event(event.clone()); + true } + Event::Progress(_) => false, }, WithDeltas::Both, ) @@ -1410,13 +1415,16 @@ mod tests { .run_filtered_test( "low-priority events skipped", |buffer, event| match event { - Event::Step(event) => { - if event.kind.priority() == StepEventPriority::Low { + Event::Step(event) => match event.kind.priority() { + StepEventPriority::High => { buffer.add_step_event(event.clone()); + true } - } + StepEventPriority::Low => false, + }, Event::Progress(event) => { buffer.add_progress_event(event.clone()); + true } }, WithDeltas::Both, @@ -1427,13 +1435,16 @@ mod tests { .run_filtered_test( "low-priority and progress events skipped", |buffer, event| match event { - Event::Step(event) => { - if event.kind.priority() == StepEventPriority::Low { + Event::Step(event) => match event.kind.priority() { + StepEventPriority::High => { buffer.add_step_event(event.clone()); + true } - } + StepEventPriority::Low => false, + }, Event::Progress(_) => { - // Don't add progress events either. + // Don't add progress events. + false } }, WithDeltas::Both, @@ -1565,7 +1576,10 @@ mod tests { fn run_filtered_test( &self, event_fn_description: &str, - mut event_fn: impl FnMut(&mut EventBuffer, &Event), + mut event_fn: impl FnMut( + &mut EventBuffer, + &Event, + ) -> bool, with_deltas: WithDeltas, ) -> anyhow::Result<()> { match with_deltas { @@ -1590,7 +1604,10 @@ mod tests { fn run_filtered_test_inner( &self, - mut event_fn: impl FnMut(&mut EventBuffer, &Event), + mut event_fn: impl FnMut( + &mut EventBuffer, + &Event, + ) -> bool, with_deltas: bool, ) -> anyhow::Result<()> { let description = format!("with deltas = {with_deltas}"); @@ -1608,8 +1625,9 @@ mod tests { let mut last_seen_opt = with_deltas.then_some(None); for (i, event) in self.generated_events.iter().enumerate() { - (event_fn)(&mut buffer, event); - buffer.add_event(event.clone()); + // Going to use event_added in an upcoming commit. + let _event_added = (event_fn)(&mut buffer, event); + let report = match last_seen_opt { Some(last_seen) => buffer.generate_report_since(last_seen), None => buffer.generate_report(), From 194889b956abbb3e01ce25b11b733c02598c3215 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 10 Oct 2023 19:27:33 -0700 Subject: [PATCH 18/23] [buildomat] authorize PRs generated by oxide-renovate (#4244) Means that PRs like https://github.com/oxidecomputer/omicron/pull/4241 will be automatically authorized. Also skip cargo-hakari update if cargo isn't present. --- .github/buildomat/config.toml | 1 + tools/renovate-post-upgrade.sh | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/.github/buildomat/config.toml b/.github/buildomat/config.toml index 922de631f2..419173fa50 100644 --- a/.github/buildomat/config.toml +++ b/.github/buildomat/config.toml @@ -17,5 +17,6 @@ org_only = true allow_users = [ "dependabot[bot]", "oxide-reflector-bot[bot]", + "oxide-renovate[bot]", "renovate[bot]", ] diff --git a/tools/renovate-post-upgrade.sh b/tools/renovate-post-upgrade.sh index c21832e0a9..2699f9f6a0 100755 --- a/tools/renovate-post-upgrade.sh +++ b/tools/renovate-post-upgrade.sh @@ -22,6 +22,13 @@ function retry_command { done } +# If cargo isn't present, skip this -- it implies that a non-Rust dependency was +# updated. +if ! command -v cargo &> /dev/null; then + echo "Skipping cargo-hakari update because cargo is not present." + exit 0 +fi + # Download and install cargo-hakari if it is not already installed. if ! command -v cargo-hakari &> /dev/null; then # Need cargo-binstall to install cargo-hakari. From a972c80c407b68848c178aca236ae00067bc4d3b Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Wed, 11 Oct 2023 17:59:57 +0100 Subject: [PATCH 19/23] destroy_virtual_hardware.sh needs to unmount backing filesystems (#4255) The backing filesystems added in d624bce9af2 prevent the destroy_virtual_hardware.sh script from properly cleaning up all ZFS pools and cause the fmd service to go into maintenance which delays control plane startup. This updates the script to unwind the backing datasets as part of its work. --- tools/destroy_virtual_hardware.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/destroy_virtual_hardware.sh b/tools/destroy_virtual_hardware.sh index ae6fef0673..46c6f117c4 100755 --- a/tools/destroy_virtual_hardware.sh +++ b/tools/destroy_virtual_hardware.sh @@ -56,7 +56,23 @@ function remove_softnpu_zone { --ports sc0_1,tfportqsfp0_0 } +# Some services have their working data overlaid by backing mounts from the +# internal boot disk. Before we can destroy the ZFS pools, we need to unmount +# these. + +BACKED_SERVICES="svc:/system/fmd:default" + +function demount_backingfs { + svcadm disable -st $BACKED_SERVICES + zpool list -Hpo name | grep '^oxi_' \ + | xargs -i zfs list -Hpo name,canmount,mounted -r {}/backing \ + | awk '$3 == "yes" && $2 == "noauto" { print $1 }' \ + | xargs -l zfs umount + svcadm enable -st $BACKED_SERVICES +} + verify_omicron_uninstalled +demount_backingfs unload_xde_driver remove_softnpu_zone try_remove_vnics From 1a21fdd581d80d92287c9f29e095dbee11f65b28 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 12:23:46 -0700 Subject: [PATCH 20/23] Update Rust crate proptest to 1.3.1 (#4243) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 306e953049..421bbd5e16 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6737,19 +6737,19 @@ dependencies = [ [[package]] name = "proptest" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" +checksum = "7c003ac8c77cb07bb74f5f198bce836a689bcd5a42574612bf14d17bfd08c20e" dependencies = [ "bit-set", - "bitflags 1.3.2", - "byteorder", + "bit-vec", + "bitflags 2.4.0", "lazy_static", "num-traits", "rand 0.8.5", "rand_chacha 0.3.1", "rand_xorshift", - "regex-syntax 0.6.29", + "regex-syntax 0.7.5", "rusty-fork", "tempfile", "unarray", diff --git a/Cargo.toml b/Cargo.toml index da7b582fe3..fdd67c3b5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -283,7 +283,7 @@ progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branc bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e" } propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", features = [ "generated-migration" ] } propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", default-features = false, features = ["mock-only"] } -proptest = "1.2.0" +proptest = "1.3.1" quote = "1.0" rand = "0.8.5" ratatui = "0.23.0" From 02aef4bec751b47b6d19adbeef9e51c42c10204d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 12:34:34 -0700 Subject: [PATCH 21/23] Update Rust crate predicates to 3.0.4 (#4254) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 12 ++++++------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 421bbd5e16..d58ba77133 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -283,7 +283,7 @@ dependencies = [ "anstyle", "bstr 1.6.0", "doc-comment", - "predicates 3.0.3", + "predicates 3.0.4", "predicates-core", "predicates-tree", "wait-timeout", @@ -5442,7 +5442,7 @@ dependencies = [ "phf_shared 0.11.2", "postgres-types", "ppv-lite86", - "predicates 3.0.3", + "predicates 3.0.4", "rand 0.8.5", "rand_chacha 0.3.1", "regex", @@ -6437,14 +6437,14 @@ dependencies = [ [[package]] name = "predicates" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" +checksum = "6dfc28575c2e3f19cb3c73b93af36460ae898d426eba6fc15b9bd2a5220758a0" dependencies = [ "anstyle", "difflib", "float-cmp", - "itertools 0.10.5", + "itertools 0.11.0", "normalize-line-endings", "predicates-core", "regex", @@ -9374,7 +9374,7 @@ dependencies = [ "omicron-common 0.1.0", "omicron-test-utils", "omicron-workspace-hack", - "predicates 3.0.3", + "predicates 3.0.4", "slog", "slog-async", "slog-envlogger", diff --git a/Cargo.toml b/Cargo.toml index fdd67c3b5c..832b8663e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -274,7 +274,7 @@ percent-encoding = "2.2.0" pem = "1.1" petgraph = "0.6.4" postgres-protocol = "0.6.6" -predicates = "3.0.3" +predicates = "3.0.4" pretty_assertions = "1.4.0" pretty-hex = "0.3.0" proc-macro2 = "1.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 106da92f62..a91477678b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -73,7 +73,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } -predicates = { version = "3.0.3" } +predicates = { version = "3.0.4" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.9.5" } @@ -171,7 +171,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } -predicates = { version = "3.0.3" } +predicates = { version = "3.0.4" } rand = { version = "0.8.5", features = ["min_const_gen", "small_rng"] } rand_chacha = { version = "0.3.1" } regex = { version = "1.9.5" } From d12cb0ffceeb09c1cccdada29ca24c3829a2c9fa Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 11 Oct 2023 12:37:42 -0700 Subject: [PATCH 22/23] Pin GitHub Actions dependencies (#4240) --- .github/workflows/check-opte-ver.yml | 2 +- .github/workflows/check-workspace-deps.yml | 2 +- .github/workflows/hakari.yml | 10 +++++----- .github/workflows/rust.yml | 14 +++++++------- .github/workflows/update-dendrite.yml | 2 +- .github/workflows/update-maghemite.yml | 2 +- .github/workflows/validate-openapi-spec.yml | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/check-opte-ver.yml b/.github/workflows/check-opte-ver.yml index 3b57f2795f..a8e18f080e 100644 --- a/.github/workflows/check-opte-ver.yml +++ b/.github/workflows/check-opte-ver.yml @@ -9,7 +9,7 @@ jobs: check-opte-ver: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Install jq run: sudo apt-get install -y jq - name: Install toml-cli diff --git a/.github/workflows/check-workspace-deps.yml b/.github/workflows/check-workspace-deps.yml index 9611c4103c..521afa7359 100644 --- a/.github/workflows/check-workspace-deps.yml +++ b/.github/workflows/check-workspace-deps.yml @@ -10,6 +10,6 @@ jobs: check-workspace-deps: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Check Workspace Dependencies run: cargo xtask check-workspace-deps diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d79196d318..df4cbc9b59 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -17,21 +17,21 @@ jobs: env: RUSTFLAGS: -D warnings steps: - - uses: actions/checkout@v4 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4 + - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1 with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@v2 + uses: taiki-e/install-action@e659bf85ee986e37e35cc1c53bfeebe044d8133e # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date - uses: actions-rs/cargo@v1 + uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # v1 with: command: hakari args: generate --diff - name: Check all crates depend on workspace-hack - uses: actions-rs/cargo@v1 + uses: actions-rs/cargo@844f36862e911db73fe0815f00a4a2602c279505 # v1 with: command: hakari args: manage-deps --dry-run diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f5cf1dc885..873b316e16 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -9,7 +9,7 @@ jobs: check-style: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 - name: Report cargo version run: cargo --version - name: Report rustfmt version @@ -27,8 +27,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -53,8 +53,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -79,8 +79,8 @@ jobs: # This repo is unstable and unnecessary: https://github.com/microsoft/linux-package-repositories/issues/34 - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - - uses: actions/checkout@v3.5.0 - - uses: Swatinem/rust-cache@v2.2.1 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: Swatinem/rust-cache@6fd3edff6979b79f87531400ad694fb7f2c84b1f # v2.2.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version diff --git a/.github/workflows/update-dendrite.yml b/.github/workflows/update-dendrite.yml index 86049dcafc..10d8ef7618 100644 --- a/.github/workflows/update-dendrite.yml +++ b/.github/workflows/update-dendrite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/update-maghemite.yml b/.github/workflows/update-maghemite.yml index 07fe329af3..7aa2b8b6c8 100644 --- a/.github/workflows/update-maghemite.yml +++ b/.github/workflows/update-maghemite.yml @@ -29,7 +29,7 @@ jobs: steps: # Checkout both the target and integration branches - - uses: actions/checkout@v3.5.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 with: token: ${{ inputs.reflector_access_token }} fetch-depth: 0 diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml index 06fc7526a8..1d6c152296 100644 --- a/.github/workflows/validate-openapi-spec.yml +++ b/.github/workflows/validate-openapi-spec.yml @@ -10,8 +10,8 @@ jobs: format: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3.5.0 - - uses: actions/setup-node@v3.6.0 + - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # v3.5.0 + - uses: actions/setup-node@64ed1c7eab4cce3362f8c340dee64e5eaeef8f7c # v3.6.0 with: node-version: '18' - name: Install our tools From 48363a1d009e2d3abe8c7500f84f42b7cb1e65d0 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 11 Oct 2023 12:55:29 -0700 Subject: [PATCH 23/23] use optional --- nexus/db-queries/src/db/datastore/region_snapshot.rs | 2 +- nexus/db-queries/src/db/datastore/snapshot.rs | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/region_snapshot.rs b/nexus/db-queries/src/db/datastore/region_snapshot.rs index 148cfe4812..3d328a6206 100644 --- a/nexus/db-queries/src/db/datastore/region_snapshot.rs +++ b/nexus/db-queries/src/db/datastore/region_snapshot.rs @@ -10,8 +10,8 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::RegionSnapshot; use async_bb8_diesel::AsyncRunQueryDsl; -use async_bb8_diesel::OptionalExtension; use diesel::prelude::*; +use diesel::OptionalExtension; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::LookupResult; diff --git a/nexus/db-queries/src/db/datastore/snapshot.rs b/nexus/db-queries/src/db/datastore/snapshot.rs index 474493de5f..59fb00c84d 100644 --- a/nexus/db-queries/src/db/datastore/snapshot.rs +++ b/nexus/db-queries/src/db/datastore/snapshot.rs @@ -24,7 +24,7 @@ use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; -use diesel::result::Error as DieselError; +use diesel::OptionalExtension; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; @@ -100,7 +100,7 @@ impl DataStore { // does not match, but a project and name that does, return // ObjectAlreadyExists here. - let existing_snapshot_id: Option = match dsl::snapshot + let existing_snapshot_id: Option = dsl::snapshot .filter(dsl::time_deleted.is_null()) .filter(dsl::name.eq(snapshot.name().to_string())) .filter(dsl::project_id.eq(snapshot.project_id)) @@ -108,11 +108,7 @@ impl DataStore { .limit(1) .first_async(&conn) .await - { - Ok(v) => Ok(Some(v)), - Err(DieselError::NotFound) => Ok(None), - Err(e) => Err(e), - }?; + .optional()?; if let Some(existing_snapshot_id) = existing_snapshot_id { if existing_snapshot_id != snapshot.id() {