From 82027322d93b18e93617838bd4a3e4a49a519785 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 30 May 2024 15:18:09 -0700 Subject: [PATCH 01/16] Update Rust crate either to 1.12.0 (#5803) --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index e6b0ffb099..0065e26618 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -261,7 +261,7 @@ dns-service-client = { path = "clients/dns-service-client" } dpd-client = { path = "clients/dpd-client" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } dyn-clone = "1.0.17" -either = "1.11.0" +either = "1.12.0" expectorate = "1.1.0" fatfs = "0.3.6" filetime = "0.2.23" From 63fc73b54df9f2d759153f14a46f269f819627d5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 30 May 2024 15:18:21 -0700 Subject: [PATCH 02/16] Update Rust crate serde to v1.0.203 (#5787) --- Cargo.lock | 8 ++++---- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7b8326fb8d..ce10771d8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8372,9 +8372,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.202" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] @@ -8419,9 +8419,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.202" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 3b5e1917d0..ab5d08d711 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -92,7 +92,7 @@ ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.20", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.202", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["inline", "unicode"] } @@ -197,7 +197,7 @@ ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.20", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } -serde = { version = "1.0.202", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["inline", "unicode"] } From d79a51d57bdf324947275841ac849f2b37edff3a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 30 May 2024 15:18:56 -0700 Subject: [PATCH 03/16] Update Rust crate libc to 0.2.155 (#5819) --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ce10771d8d..a0878a0c32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3956,9 +3956,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.153" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "libdlpi-sys" @@ -4024,7 +4024,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.5", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 0065e26618..fc8811e9b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -306,7 +306,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] } ispf = { git = "https://github.com/oxidecomputer/ispf" } key-manager = { path = "key-manager" } kstat-rs = "0.2.3" -libc = "0.2.153" +libc = "0.2.155" libfalcon = { git = "https://github.com/oxidecomputer/falcon", rev = "e69694a1f7cc9fe31fab27f321017280531fb5f7" } libnvme = { git = "https://github.com/oxidecomputer/libnvme", rev = "6fffcc81d2c423ed2d2e6c5c2827485554c4ecbe" } linear-map = "1.2.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index ab5d08d711..ee4dcccb70 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -68,7 +68,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.153", features = ["extra_traits"] } +libc = { version = "0.2.155", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } @@ -173,7 +173,7 @@ itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.153", features = ["extra_traits"] } +libc = { version = "0.2.155", features = ["extra_traits"] } log = { version = "0.4.21", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.7.2" } From aade5ade080e85238f7175ed281b38385a425a2c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 31 May 2024 04:14:11 +0000 Subject: [PATCH 04/16] Update taiki-e/install-action digest to 51b8ba0 (#5840) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`7491b90` -> `51b8ba0`](https://togithub.com/taiki-e/install-action/compare/7491b90...51b8ba0) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 236b9b5023..ed2615f655 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@7491b900536dd0dae2e47ce7c17f140e46328dc4 # v2 + uses: taiki-e/install-action@51b8ba088c63d8750c618764ff2030742da0ec19 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From b0dfd535386cb93361e08a17d926e91867d9cb08 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 31 May 2024 08:48:43 -0700 Subject: [PATCH 05/16] [xtask] Convert ci_download bash scripts to Rust (#5481) Implements `cargo xtask download`, which provides options to replace the existing scripts: - ci_download_clickhouse - ci_download_cockroachdb - ci_download_console - ci_download_dendrite_openapi - ci_download_dendrite_stub - ci_download_maghemite_mgd - ci_download_maghemite_openapi - ci_download_softnpu_machinery - ci_download_thundermuffin - ci_download_transceiver_control This PR additionally introduces the `cargo xtask download all` option, which attempts to download all artifacts concurrently. Somewhat related to https://github.com/oxidecomputer/omicron/issues/3939 --- Cargo.lock | 12 + dev-tools/xtask/Cargo.toml | 12 + dev-tools/xtask/src/download.rs | 873 ++++++++++++++++++++++++++++++++ dev-tools/xtask/src/main.rs | 9 +- 4 files changed, 904 insertions(+), 2 deletions(-) create mode 100644 dev-tools/xtask/src/download.rs diff --git a/Cargo.lock b/Cargo.lock index a0878a0c32..15ebba75ae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11416,10 +11416,22 @@ dependencies = [ "cargo_metadata", "cargo_toml", "clap", + "flate2", "fs-err", + "futures", "macaddr", + "md5", + "reqwest", "serde", + "sha2", + "slog", + "slog-async", + "slog-bunyan", + "slog-term", + "strum", "swrite", + "tar", + "tokio", "toml 0.8.13", ] diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index 2aecde57e5..745e16dea6 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -13,8 +13,20 @@ camino.workspace = true cargo_toml = "0.20" cargo_metadata.workspace = true clap.workspace = true +flate2.workspace = true +futures.workspace = true macaddr.workspace = true +md5 = "0.7.0" +reqwest = { workspace = true, features = [ "default-tls" ] } serde.workspace = true +sha2.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-bunyan.workspace = true +slog-term.workspace = true +strum.workspace = true +tar.workspace = true +tokio = { workspace = true, features = ["full"] } toml.workspace = true fs-err.workspace = true swrite.workspace = true diff --git a/dev-tools/xtask/src/download.rs b/dev-tools/xtask/src/download.rs new file mode 100644 index 0000000000..ccfe8b2bc9 --- /dev/null +++ b/dev-tools/xtask/src/download.rs @@ -0,0 +1,873 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Subcommand: cargo xtask download + +use anyhow::{bail, Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use clap::Parser; +use clap::ValueEnum; +use flate2::bufread::GzDecoder; +use futures::StreamExt; +use sha2::Digest; +use slog::{info, o, warn, Drain, Logger}; +use std::collections::{BTreeSet, HashMap}; +use std::io::Write; +use std::os::unix::fs::PermissionsExt; +use strum::EnumIter; +use strum::IntoEnumIterator; +use tar::Archive; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::process::Command; + +const BUILDOMAT_URL: &'static str = + "https://buildomat.eng.oxide.computer/public/file"; + +/// What is being downloaded? +#[derive( + Copy, + Clone, + Debug, + Hash, + PartialEq, + Eq, + PartialOrd, + Ord, + ValueEnum, + EnumIter, +)] +enum Target { + /// Download all targets + All, + + /// Clickhouse binary + Clickhouse, + + /// CockroachDB binary + Cockroach, + + /// Web console assets + Console, + + /// Dendrite OpenAPI spec + DendriteOpenapi, + + /// Stub Dendrite binary tarball + DendriteStub, + + /// Maghemite mgd binary + MaghemiteMgd, + + /// SoftNPU, an admin program (scadm) and a pre-compiled P4 program. + Softnpu, + + /// Transceiver Control binary + TransceiverControl, +} + +#[derive(Parser)] +pub struct DownloadArgs { + /// The targets to be downloaded. This list is additive. + #[clap(required = true)] + targets: Vec, + + /// The path to the "out" directory of omicron. + #[clap(long, default_value = "out")] + output_dir: Utf8PathBuf, + + /// The path to the versions and checksums directory. + #[clap(long, default_value = "tools")] + versions_dir: Utf8PathBuf, +} + +pub async fn run_cmd(args: DownloadArgs) -> Result<()> { + let mut targets = BTreeSet::new(); + + for target in args.targets { + match target { + Target::All => { + // Add all targets, then remove the "All" variant because that + // isn't a real thing we can download. + let mut all = BTreeSet::from_iter(Target::iter()); + all.remove(&Target::All); + targets.append(&mut all); + } + _ => _ = targets.insert(target), + } + } + + let decorator = slog_term::TermDecorator::new().build(); + let drain = slog_term::FullFormat::new(decorator).build().fuse(); + let drain = slog_async::Async::new(drain).build().fuse(); + let log = Logger::root(drain, o!()); + + let mut all_downloads = targets + .into_iter() + .map(|target| { + let log = log.new(o!("target" => format!("{target:?}"))); + let output_dir = args.output_dir.clone(); + let versions_dir = args.versions_dir.clone(); + tokio::task::spawn(async move { + info!(&log, "Starting download"); + + let downloader = Downloader::new( + log.clone(), + &output_dir, + &versions_dir, + ); + + match target { + Target::All => { + bail!("We should have already filtered this 'All' target out?"); + } + Target::Clickhouse => downloader.download_clickhouse().await, + Target::Cockroach => downloader.download_cockroach().await, + Target::Console => downloader.download_console().await, + Target::DendriteOpenapi => { + downloader.download_dendrite_openapi().await + } + Target::DendriteStub => downloader.download_dendrite_stub().await, + Target::MaghemiteMgd => downloader.download_maghemite_mgd().await, + Target::Softnpu => downloader.download_softnpu().await, + Target::TransceiverControl => { + downloader.download_transceiver_control().await + } + }.context("Failed to download {target:?}")?; + + info!(&log, "Download complete"); + Ok(()) + }) + }) + .collect::>(); + + while let Some(result) = all_downloads.next().await { + result??; + } + + Ok(()) +} + +enum Os { + Illumos, + Linux, + Mac, +} + +impl Os { + fn env_name(&self) -> &'static str { + match self { + Os::Illumos => "ILLUMOS", + Os::Linux => "LINUX", + Os::Mac => "DARWIN", + } + } +} + +fn os_name() -> Result { + let os = match std::env::consts::OS { + "linux" => Os::Linux, + "macos" => Os::Mac, + "solaris" | "illumos" => Os::Illumos, + other => bail!("OS not supported: {other}"), + }; + Ok(os) +} + +struct Downloader<'a> { + log: Logger, + + /// The path to the "out" directory of omicron. + output_dir: &'a Utf8Path, + + /// The path to the versions and checksums directory. + versions_dir: &'a Utf8Path, +} + +impl<'a> Downloader<'a> { + fn new( + log: Logger, + output_dir: &'a Utf8Path, + versions_dir: &'a Utf8Path, + ) -> Self { + Self { log, output_dir, versions_dir } + } +} + +/// Parses a file of the format: +/// +/// ```ignore +/// KEY1="value1" +/// KEY2="value2" +/// ``` +/// +/// And returns an array of the values in the same order as keys. +async fn get_values_from_file( + keys: [&str; N], + path: &Utf8Path, +) -> Result<[String; N]> { + // Map of "key" => "Position in output". + let mut keys: HashMap<&str, usize> = + keys.into_iter().enumerate().map(|(i, s)| (s, i)).collect(); + + const EMPTY_STRING: String = String::new(); + let mut values = [EMPTY_STRING; N]; + + let content = tokio::fs::read_to_string(&path) + .await + .context("Failed to read {path}")?; + for line in content.lines() { + let line = line.trim(); + let Some((key, value)) = line.split_once("=") else { + continue; + }; + let value = value.trim_matches('"'); + if let Some(i) = keys.remove(key) { + values[i] = value.to_string(); + } + } + if !keys.is_empty() { + bail!("Could not find keys: {:?}", keys.keys().collect::>(),); + } + Ok(values) +} + +/// Send a GET request to `url`, downloading the contents to `path`. +/// +/// Writes the response to the file as it is received. +async fn streaming_download(url: &str, path: &Utf8Path) -> Result<()> { + let mut response = reqwest::get(url).await?; + let mut tarball = tokio::fs::File::create(&path).await?; + while let Some(chunk) = response.chunk().await? { + tarball.write_all(chunk.as_ref()).await?; + } + Ok(()) +} + +/// Returns the hex, lowercase md5 checksum of a file at `path`. +async fn md5_checksum(path: &Utf8Path) -> Result { + let mut buf = vec![0u8; 65536]; + let mut file = tokio::fs::File::open(path).await?; + let mut ctx = md5::Context::new(); + loop { + let n = file.read(&mut buf).await?; + if n == 0 { + break; + } + ctx.write_all(&buf[0..n])?; + } + + let digest = ctx.compute(); + Ok(format!("{digest:x}")) +} + +/// Returns the hex, lowercase sha2 checksum of a file at `path`. +async fn sha2_checksum(path: &Utf8Path) -> Result { + let mut buf = vec![0u8; 65536]; + let mut file = tokio::fs::File::open(path).await?; + let mut ctx = sha2::Sha256::new(); + loop { + let n = file.read(&mut buf).await?; + if n == 0 { + break; + } + ctx.write_all(&buf[0..n])?; + } + + let digest = ctx.finalize(); + Ok(format!("{digest:x}")) +} + +async fn unpack_tarball( + log: &Logger, + tarball_path: &Utf8Path, + destination_dir: &Utf8Path, +) -> Result<()> { + info!(log, "Unpacking {tarball_path} to {destination_dir}"); + let tarball_path = tarball_path.to_owned(); + let destination_dir = destination_dir.to_owned(); + + let task = tokio::task::spawn_blocking(move || { + let reader = std::fs::File::open(tarball_path)?; + let buf_reader = std::io::BufReader::new(reader); + let gz = GzDecoder::new(buf_reader); + let mut archive = Archive::new(gz); + archive.unpack(&destination_dir)?; + Ok(()) + }); + task.await? +} + +async fn unpack_gzip( + log: &Logger, + gzip_path: &Utf8Path, + destination: &Utf8Path, +) -> Result<()> { + info!(log, "Unpacking {gzip_path} to {destination}"); + let gzip_path = gzip_path.to_owned(); + let destination = destination.to_owned(); + + let task = tokio::task::spawn_blocking(move || { + let reader = std::fs::File::open(gzip_path)?; + let buf_reader = std::io::BufReader::new(reader); + let mut gz = GzDecoder::new(buf_reader); + + let mut destination = std::fs::File::create(destination)?; + std::io::copy(&mut gz, &mut destination)?; + Ok(()) + }); + task.await? +} + +async fn clickhouse_confirm_binary_works(binary: &Utf8Path) -> Result<()> { + let mut cmd = Command::new(binary); + cmd.args(["server", "--version"]); + + let output = + cmd.output().await.context(format!("Failed to run {binary}"))?; + if !output.status.success() { + let stderr = + String::from_utf8(output.stderr).unwrap_or_else(|_| String::new()); + bail!("{binary} failed: {} (stderr: {stderr})", output.status); + } + Ok(()) +} + +async fn cockroach_confirm_binary_works(binary: &Utf8Path) -> Result<()> { + let mut cmd = Command::new(binary); + cmd.arg("version"); + + let output = + cmd.output().await.context(format!("Failed to run {binary}"))?; + if !output.status.success() { + let stderr = + String::from_utf8(output.stderr).unwrap_or_else(|_| String::new()); + bail!("{binary} failed: {} (stderr: {stderr})", output.status); + } + Ok(()) +} + +fn copy_dir_all(src: &Utf8Path, dst: &Utf8Path) -> Result<()> { + std::fs::create_dir_all(&dst)?; + for entry in src.read_dir_utf8()? { + let entry = entry?; + let ty = entry.file_type()?; + if ty.is_dir() { + copy_dir_all(entry.path(), &dst.join(entry.file_name()))?; + } else { + std::fs::copy(entry.path(), &dst.join(entry.file_name()))?; + } + } + Ok(()) +} + +async fn set_permissions(path: &Utf8Path, mode: u32) -> Result<()> { + let mut p = tokio::fs::metadata(&path).await?.permissions(); + p.set_mode(mode); + tokio::fs::set_permissions(&path, p).await?; + Ok(()) +} + +enum ChecksumAlgorithm { + Md5, + Sha2, +} + +impl ChecksumAlgorithm { + async fn checksum(&self, path: &Utf8Path) -> Result { + match self { + ChecksumAlgorithm::Md5 => md5_checksum(path).await, + ChecksumAlgorithm::Sha2 => sha2_checksum(path).await, + } + } +} + +/// Downloads a file and verifies the checksum. +/// +/// If the file already exists and the checksum matches, +/// avoids performing the download altogether. +async fn download_file_and_verify( + log: &Logger, + path: &Utf8Path, + url: &str, + algorithm: ChecksumAlgorithm, + checksum: &str, +) -> Result<()> { + let do_download = if path.exists() { + info!(log, "Already downloaded ({path})"); + if algorithm.checksum(&path).await? == checksum { + info!( + log, + "Checksum matches already downloaded file - skipping download" + ); + false + } else { + warn!(log, "Checksum mismatch - retrying download"); + true + } + } else { + true + }; + + if do_download { + info!(log, "Downloading {path}"); + streaming_download(&url, &path).await?; + } + + let observed_checksum = algorithm.checksum(&path).await?; + if observed_checksum != checksum { + bail!( + "Checksum mismatch (saw {observed_checksum}, expected {checksum})" + ); + } + Ok(()) +} + +impl<'a> Downloader<'a> { + async fn download_clickhouse(&self) -> Result<()> { + let os = os_name()?; + + let download_dir = self.output_dir.join("downloads"); + let destination_dir = self.output_dir.join("clickhouse"); + + let checksums_path = self.versions_dir.join("clickhouse_checksums"); + let [checksum] = get_values_from_file( + [&format!("CIDL_MD5_{}", os.env_name())], + &checksums_path, + ) + .await?; + + let versions_path = self.versions_dir.join("clickhouse_version"); + let version = tokio::fs::read_to_string(&versions_path) + .await + .context("Failed to read version from {versions_path}")?; + let version = version.trim(); + + const S3_BUCKET: &'static str = + "https://oxide-clickhouse-build.s3.us-west-2.amazonaws.com"; + + let platform = match os { + Os::Illumos => "illumos", + Os::Linux => "linux", + Os::Mac => "macos", + }; + let tarball_filename = + format!("clickhouse-{version}.{platform}.tar.gz"); + let tarball_url = format!("{S3_BUCKET}/{tarball_filename}"); + + let tarball_path = download_dir.join(tarball_filename); + + tokio::fs::create_dir_all(&download_dir).await?; + tokio::fs::create_dir_all(&destination_dir).await?; + + download_file_and_verify( + &self.log, + &tarball_path, + &tarball_url, + ChecksumAlgorithm::Md5, + &checksum, + ) + .await?; + + unpack_tarball(&self.log, &tarball_path, &destination_dir).await?; + let clickhouse_binary = destination_dir.join("clickhouse"); + + info!(self.log, "Checking that binary works"); + clickhouse_confirm_binary_works(&clickhouse_binary).await?; + + Ok(()) + } + + async fn download_cockroach(&self) -> Result<()> { + let os = os_name()?; + + let download_dir = self.output_dir.join("downloads"); + let destination_dir = self.output_dir.join("cockroachdb"); + + let checksums_path = self.versions_dir.join("cockroachdb_checksums"); + let [checksum] = get_values_from_file( + [&format!("CIDL_SHA256_{}", os.env_name())], + &checksums_path, + ) + .await?; + + let versions_path = self.versions_dir.join("cockroachdb_version"); + let version = tokio::fs::read_to_string(&versions_path) + .await + .context("Failed to read version from {versions_path}")?; + let version = version.trim(); + + let (url_base, suffix) = match os { + Os::Illumos => ("https://illumos.org/downloads", "tar.gz"), + Os::Linux | Os::Mac => ("https://binaries.cockroachdb.com", "tgz"), + }; + let build = match os { + Os::Illumos => "illumos", + Os::Linux => "linux-amd64", + Os::Mac => "darwin-10.9-amd64", + }; + + let version_directory = format!("cockroach-{version}"); + let tarball_name = format!("{version_directory}.{build}"); + let tarball_filename = format!("{tarball_name}.{suffix}"); + let tarball_url = format!("{url_base}/{tarball_filename}"); + + let tarball_path = download_dir.join(tarball_filename); + + tokio::fs::create_dir_all(&download_dir).await?; + tokio::fs::create_dir_all(&destination_dir).await?; + + download_file_and_verify( + &self.log, + &tarball_path, + &tarball_url, + ChecksumAlgorithm::Sha2, + &checksum, + ) + .await?; + + // We unpack the tarball in the download directory to emulate the old + // behavior. This could be a little more consistent with Clickhouse. + info!(self.log, "tarball path: {tarball_path}"); + unpack_tarball(&self.log, &tarball_path, &download_dir).await?; + + // This is where the binary will end up eventually + let cockroach_binary = destination_dir.join("bin/cockroach"); + + // Re-shuffle the downloaded tarball to our "destination" location. + // + // This ensures some uniformity, even though different platforms bundle + // the Cockroach package differently. + let binary_dir = destination_dir.join("bin"); + tokio::fs::create_dir_all(&binary_dir).await?; + match os { + Os::Illumos => { + let src = tarball_path.with_file_name(version_directory); + let dst = &destination_dir; + info!(self.log, "Copying from {src} to {dst}"); + copy_dir_all(&src, &dst)?; + } + Os::Linux | Os::Mac => { + let src = + tarball_path.with_file_name(tarball_name).join("cockroach"); + tokio::fs::copy(src, &cockroach_binary).await?; + } + } + + info!(self.log, "Checking that binary works"); + cockroach_confirm_binary_works(&cockroach_binary).await?; + + Ok(()) + } + + async fn download_console(&self) -> Result<()> { + let download_dir = self.output_dir.join("downloads"); + let tarball_path = download_dir.join("console.tar.gz"); + + let checksums_path = self.versions_dir.join("console_version"); + let [commit, checksum] = + get_values_from_file(["COMMIT", "SHA2"], &checksums_path).await?; + + tokio::fs::create_dir_all(&download_dir).await?; + let tarball_url = format!( + "https://dl.oxide.computer/releases/console/{commit}.tar.gz" + ); + download_file_and_verify( + &self.log, + &tarball_path, + &tarball_url, + ChecksumAlgorithm::Sha2, + &checksum, + ) + .await?; + + let destination_dir = self.output_dir.join("console-assets"); + let _ = tokio::fs::remove_dir_all(&destination_dir).await; + tokio::fs::create_dir_all(&destination_dir).await?; + + unpack_tarball(&self.log, &tarball_path, &destination_dir).await?; + + Ok(()) + } + + async fn download_dendrite_openapi(&self) -> Result<()> { + let download_dir = self.output_dir.join("downloads"); + + let checksums_path = self.versions_dir.join("dendrite_openapi_version"); + let [commit, checksum] = + get_values_from_file(["COMMIT", "SHA2"], &checksums_path).await?; + + let url = format!( + "{BUILDOMAT_URL}/oxidecomputer/dendrite/openapi/{commit}/dpd.json" + ); + let path = download_dir.join(format!("dpd-{commit}.json")); + + tokio::fs::create_dir_all(&download_dir).await?; + download_file_and_verify( + &self.log, + &path, + &url, + ChecksumAlgorithm::Sha2, + &checksum, + ) + .await?; + + Ok(()) + } + + async fn download_dendrite_stub(&self) -> Result<()> { + let download_dir = self.output_dir.join("downloads"); + let destination_dir = self.output_dir.join("dendrite-stub"); + + let stub_checksums_path = + self.versions_dir.join("dendrite_stub_checksums"); + + // NOTE: This seems odd to me -- the "dendrite_openapi_version" file also + // contains a SHA2, but we're ignoring it? + // + // Regardless, this is currenlty the one that actually matches, regardless + // of host OS. + let [sha2, dpd_sha2, swadm_sha2] = get_values_from_file( + [ + "CIDL_SHA256_ILLUMOS", + "CIDL_SHA256_LINUX_DPD", + "CIDL_SHA256_LINUX_SWADM", + ], + &stub_checksums_path, + ) + .await?; + let checksums_path = self.versions_dir.join("dendrite_openapi_version"); + let [commit, _sha2] = + get_values_from_file(["COMMIT", "SHA2"], &checksums_path).await?; + + let tarball_file = "dendrite-stub.tar.gz"; + let tarball_path = download_dir.join(tarball_file); + let repo = "oxidecomputer/dendrite"; + let url_base = format!("{BUILDOMAT_URL}/{repo}/image/{commit}"); + + tokio::fs::create_dir_all(&download_dir).await?; + tokio::fs::create_dir_all(&destination_dir).await?; + + download_file_and_verify( + &self.log, + &tarball_path, + &format!("{url_base}/{tarball_file}"), + ChecksumAlgorithm::Sha2, + &sha2, + ) + .await?; + + // Unpack in the download directory, then copy everything into the + // destination directory. + unpack_tarball(&self.log, &tarball_path, &download_dir).await?; + + let _ = tokio::fs::remove_dir_all(&destination_dir).await; + tokio::fs::create_dir_all(&destination_dir).await?; + let destination_root = destination_dir.join("root"); + tokio::fs::create_dir_all(&destination_root).await?; + copy_dir_all(&download_dir.join("root"), &destination_root)?; + + let bin_dir = destination_dir.join("root/opt/oxide/dendrite/bin"); + + // Symbolic links for backwards compatibility with existing setups + std::os::unix::fs::symlink( + bin_dir.canonicalize()?, + destination_dir.canonicalize()?.join("bin"), + ) + .context("Failed to create a symlink to dendrite's bin directory")?; + + match os_name()? { + Os::Linux => { + let base_url = + format!("{BUILDOMAT_URL}/{repo}/linux-bin/{commit}"); + let filename = "dpd"; + let path = download_dir.join(filename); + download_file_and_verify( + &self.log, + &path, + &format!("{base_url}/{filename}"), + ChecksumAlgorithm::Sha2, + &dpd_sha2, + ) + .await?; + set_permissions(&path, 0o755).await?; + tokio::fs::copy(path, bin_dir.join(filename)).await?; + + let filename = "swadm"; + let path = download_dir.join(filename); + download_file_and_verify( + &self.log, + &path, + &format!("{base_url}/{filename}"), + ChecksumAlgorithm::Sha2, + &swadm_sha2, + ) + .await?; + set_permissions(&path, 0o755).await?; + tokio::fs::copy(path, bin_dir.join(filename)).await?; + } + Os::Illumos => {} + Os::Mac => { + warn!(self.log, "WARNING: Dendrite not available for Mac"); + warn!(self.log, "Network APIs will be unavailable"); + + let path = bin_dir.join("dpd"); + tokio::fs::write(&path, "echo 'unsupported os' && exit 1") + .await?; + set_permissions(&path, 0o755).await?; + } + } + + Ok(()) + } + + async fn download_maghemite_mgd(&self) -> Result<()> { + let download_dir = self.output_dir.join("downloads"); + tokio::fs::create_dir_all(&download_dir).await?; + + let checksums_path = self.versions_dir.join("maghemite_mgd_checksums"); + let [mgd_sha2, mgd_linux_sha2] = get_values_from_file( + ["CIDL_SHA256", "MGD_LINUX_SHA256"], + &checksums_path, + ) + .await?; + let commit_path = + self.versions_dir.join("maghemite_mg_openapi_version"); + let [commit] = get_values_from_file(["COMMIT"], &commit_path).await?; + + let repo = "oxidecomputer/maghemite"; + let base_url = format!("{BUILDOMAT_URL}/{repo}/image/{commit}"); + + let filename = "mgd.tar.gz"; + let tarball_path = download_dir.join(filename); + download_file_and_verify( + &self.log, + &tarball_path, + &format!("{base_url}/{filename}"), + ChecksumAlgorithm::Sha2, + &mgd_sha2, + ) + .await?; + unpack_tarball(&self.log, &tarball_path, &download_dir).await?; + + let destination_dir = self.output_dir.join("mgd"); + let _ = tokio::fs::remove_dir_all(&destination_dir).await; + tokio::fs::create_dir_all(&destination_dir).await?; + copy_dir_all( + &download_dir.join("root"), + &destination_dir.join("root"), + )?; + + let binary_dir = destination_dir.join("root/opt/oxide/mgd/bin"); + + match os_name()? { + Os::Linux => { + let filename = "mgd"; + let path = download_dir.join(filename); + download_file_and_verify( + &self.log, + &path, + &format!( + "{BUILDOMAT_URL}/{repo}/linux/{commit}/{filename}" + ), + ChecksumAlgorithm::Sha2, + &mgd_linux_sha2, + ) + .await?; + set_permissions(&path, 0o755).await?; + tokio::fs::copy(path, binary_dir.join(filename)).await?; + } + _ => (), + } + + Ok(()) + } + + async fn download_softnpu(&self) -> Result<()> { + let destination_dir = self.output_dir.join("npuzone"); + tokio::fs::create_dir_all(&destination_dir).await?; + + let repo = "oxidecomputer/softnpu"; + + // TODO: This should probably live in a separate file, but + // at the moment we're just building parity with + // "ci_download_softnpu_machinery". + let commit = "3203c51cf4473d30991b522062ac0df2e045c2f2"; + + let filename = "npuzone"; + let base_url = format!("{BUILDOMAT_URL}/{repo}/image/{commit}"); + let artifact_url = format!("{base_url}/{filename}"); + let sha2_url = format!("{base_url}/{filename}.sha256.txt"); + let sha2 = reqwest::get(sha2_url).await?.text().await?; + let sha2 = sha2.trim(); + + let path = destination_dir.join(filename); + download_file_and_verify( + &self.log, + &path, + &artifact_url, + ChecksumAlgorithm::Sha2, + &sha2, + ) + .await?; + set_permissions(&path, 0o755).await?; + + Ok(()) + } + + async fn download_transceiver_control(&self) -> Result<()> { + let destination_dir = self.output_dir.join("transceiver-control"); + let download_dir = self.output_dir.join("downloads"); + tokio::fs::create_dir_all(&download_dir).await?; + + let [commit, sha2] = get_values_from_file( + ["COMMIT", "CIDL_SHA256_ILLUMOS"], + &self.versions_dir.join("transceiver_control_version"), + ) + .await?; + + let repo = "oxidecomputer/transceiver-control"; + let base_url = format!("{BUILDOMAT_URL}/{repo}/bins/{commit}"); + + let filename_gz = "xcvradm.gz"; + let filename = "xcvradm"; + let gzip_path = download_dir.join(filename_gz); + download_file_and_verify( + &self.log, + &gzip_path, + &format!("{base_url}/{filename_gz}"), + ChecksumAlgorithm::Sha2, + &sha2, + ) + .await?; + + let download_bin_dir = download_dir.join("root/opt/oxide/bin"); + tokio::fs::create_dir_all(&download_bin_dir).await?; + let path = download_bin_dir.join(filename); + unpack_gzip(&self.log, &gzip_path, &path).await?; + set_permissions(&path, 0o755).await?; + + let _ = tokio::fs::remove_dir_all(&destination_dir).await; + tokio::fs::create_dir_all(&destination_dir).await?; + copy_dir_all( + &download_dir.join("root"), + &destination_dir.join("root"), + )?; + + match os_name()? { + Os::Illumos => (), + _ => { + let binary_dir = destination_dir.join("opt/oxide/bin"); + tokio::fs::create_dir_all(&binary_dir).await?; + + let path = binary_dir.join(filename); + warn!(self.log, "Unsupported OS for transceiver-control - Creating stub"; "path" => %path); + tokio::fs::write(&path, "echo 'unsupported os' && exit 1") + .await?; + set_permissions(&path, 0o755).await?; + } + } + + Ok(()) + } +} diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs index 9f1131e758..22e5a22632 100644 --- a/dev-tools/xtask/src/main.rs +++ b/dev-tools/xtask/src/main.rs @@ -12,6 +12,7 @@ use clap::{Parser, Subcommand}; mod check_workspace_deps; mod clippy; +mod download; #[cfg_attr(not(target_os = "illumos"), allow(dead_code))] mod external; @@ -38,10 +39,13 @@ enum Cmds { CheckWorkspaceDeps, /// Run configured clippy checks Clippy(clippy::ClippyArgs), + /// Download binaries, OpenAPI specs, and other out-of-repo utilities. + Download(download::DownloadArgs), #[cfg(target_os = "illumos")] /// Build a TUF repo Releng(external::External), + /// Verify we are not leaking library bindings outside of intended /// crates #[cfg(target_os = "illumos")] @@ -61,12 +65,13 @@ enum Cmds { VirtualHardware, } -fn main() -> Result<()> { +#[tokio::main] +async fn main() -> Result<()> { let args = Args::parse(); match args.cmd { Cmds::Clippy(args) => clippy::run_cmd(args), Cmds::CheckWorkspaceDeps => check_workspace_deps::run_cmd(), - + Cmds::Download(args) => download::run_cmd(args).await, #[cfg(target_os = "illumos")] Cmds::Releng(external) => { external.cargo_args(["--release"]).exec("omicron-releng") From 6dee6ee4bfb44d79e21500c0d8957f9ec115b742 Mon Sep 17 00:00:00 2001 From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com> Date: Fri, 31 May 2024 17:38:20 -0500 Subject: [PATCH 06/16] filter vmm table for active instances (#5845) We needed to add a filter to the view that prevents v2p mappings for inactive instances from showing up in the v2p_mapping view. --- nexus/db-model/src/schema_versions.rs | 3 +- schema/crdb/dbinit.sql | 3 +- .../up01.sql | 1 + .../up02.sql | 43 +++++++++++++++++++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql create mode 100644 schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index b417570a6c..ebc9d0173a 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(67, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(68, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(68, "filter-v2p-mapping-by-instance-state"), KnownVersion::new(67, "add-instance-updater-lock"), KnownVersion::new(66, "blueprint-crdb-preserve-downgrade"), KnownVersion::new(65, "region-replacement"), diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index d254c00138..cf6bc2bf53 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3841,6 +3841,7 @@ WITH VmV2pMappings AS ( JOIN omicron.public.sled s ON vmm.sled_id = s.id WHERE n.time_deleted IS NULL AND n.kind = 'instance' + AND (vmm.state = 'running' OR vmm.state = 'starting') AND s.sled_policy = 'in_service' AND s.sled_state = 'active' ), @@ -4019,7 +4020,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '67.0.0', NULL) + (TRUE, NOW(), NOW(), '68.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql b/schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql new file mode 100644 index 0000000000..aebe0119f5 --- /dev/null +++ b/schema/crdb/filter-v2p-mapping-by-instance-state/up01.sql @@ -0,0 +1 @@ +DROP VIEW IF EXISTS omicron.public.v2p_mapping_view; diff --git a/schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql b/schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql new file mode 100644 index 0000000000..c92ac4ae43 --- /dev/null +++ b/schema/crdb/filter-v2p-mapping-by-instance-state/up02.sql @@ -0,0 +1,43 @@ +-- view for v2p mapping rpw +CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view +AS +WITH VmV2pMappings AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id + JOIN omicron.public.sled s ON vmm.sled_id = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'instance' + AND (vmm.state = 'running' OR vmm.state = 'starting') + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +), +ProbeV2pMapping AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.probe p ON n.parent_id = p.id + JOIN omicron.public.sled s ON p.sled = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'probe' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +) +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings +UNION +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping; From 152f61c175ce94eaf4e942ac7bef243850d458dd Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Sat, 1 Jun 2024 00:12:46 +0100 Subject: [PATCH 07/16] Clear XDE underlay when destroying virtual hardware (#5602) OPTE now prevents itself from being unloaded if its underlay state is set. Currently, underlay setup is performed only once, and it seems to be the case that XDE can be unloaded in some scenarios (e.g., `a4x2` setup). However, a consequence is that removing the driver requires an extra operation to explicitly clear the underlay state. This PR adds this operation to the `cargo xtask virtual-hardware destroy` command. Closes #5314. --- .github/buildomat/jobs/a4x2-deploy.sh | 2 +- .github/buildomat/jobs/deploy.sh | 3 ++- Cargo.lock | 32 ++++++++----------------- Cargo.toml | 8 +++---- dev-tools/xtask/src/virtual_hardware.rs | 12 +++++++++- package-manifest.toml | 16 ++++++------- tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 2 +- tools/maghemite_mgd_checksums | 4 ++-- tools/opte_version | 2 +- workspace-hack/Cargo.toml | 4 ++-- 11 files changed, 43 insertions(+), 44 deletions(-) diff --git a/.github/buildomat/jobs/a4x2-deploy.sh b/.github/buildomat/jobs/a4x2-deploy.sh index c8eb998b35..53153beafb 100755 --- a/.github/buildomat/jobs/a4x2-deploy.sh +++ b/.github/buildomat/jobs/a4x2-deploy.sh @@ -2,7 +2,7 @@ #: #: name = "a4x2-deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.29" +#: target = "lab-2.0-opte-0.31" #: output_rules = [ #: "/out/falcon/*.log", #: "/out/falcon/*.err", diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 31733f0dc0..a2aac86aec 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.29" +#: target = "lab-2.0-opte-0.31" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/zone/oxz_*/root/var/svc/log/oxide-*.log*", @@ -50,6 +50,7 @@ _exit_trap() { dump-state pfexec /opt/oxide/opte/bin/opteadm list-ports pfexec /opt/oxide/opte/bin/opteadm dump-v2b + pfexec /opt/oxide/opte/bin/opteadm dump-v2p z_swadm link ls z_swadm addr list z_swadm route list diff --git a/Cargo.lock b/Cargo.lock index 15ebba75ae..9072aff98c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.83" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25bdb32cbbdce2b519a9cd7df3a678443100e265d5e25ca763b7572a5104f5f3" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" dependencies = [ "backtrace", ] @@ -1591,7 +1591,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=025389ff39d594bf2b815377e2c1dc4dd23b1f96#025389ff39d594bf2b815377e2c1dc4dd23b1f96" +source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3" dependencies = [ "percent-encoding", "progenitor", @@ -1729,17 +1729,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "derror-macro" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732" -dependencies = [ - "darling", - "proc-macro2", - "quote", - "syn 2.0.64", -] - [[package]] name = "dhcproto" version = "0.11.0" @@ -3481,7 +3470,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732" +source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" [[package]] name = "illumos-utils" @@ -3895,7 +3884,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732" +source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" dependencies = [ "quote", "syn 2.0.64", @@ -4307,7 +4296,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=025389ff39d594bf2b815377e2c1dc4dd23b1f96#025389ff39d594bf2b815377e2c1dc4dd23b1f96" +source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3" dependencies = [ "anyhow", "chrono", @@ -6060,10 +6049,9 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732" +source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" dependencies = [ "cfg-if", - "derror-macro", "dyn-clone", "illumos-sys-hdrs", "kstat-macro", @@ -6078,7 +6066,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732" +source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6090,7 +6078,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732" +source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6164,7 +6152,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=194a8d1d6443f78d59702a25849607dba33db732#194a8d1d6443f78d59702a25849607dba33db732" +source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" dependencies = [ "cfg-if", "illumos-sys-hdrs", diff --git a/Cargo.toml b/Cargo.toml index fc8811e9b5..5c0433a662 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -314,8 +314,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.12" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "025389ff39d594bf2b815377e2c1dc4dd23b1f96" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "025389ff39d594bf2b815377e2c1dc4dd23b1f96" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } multimap = "0.10.0" nexus-client = { path = "clients/nexus-client" } nexus-config = { path = "nexus-config" } @@ -351,14 +351,14 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "194a8d1d6443f78d59702a25849607dba33db732", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "d6177ca84f23e60a661461bb4cece475689502d2", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "194a8d1d6443f78d59702a25849607dba33db732" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "d6177ca84f23e60a661461bb4cece475689502d2" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } diff --git a/dev-tools/xtask/src/virtual_hardware.rs b/dev-tools/xtask/src/virtual_hardware.rs index d013ff6505..0ec9f91492 100644 --- a/dev-tools/xtask/src/virtual_hardware.rs +++ b/dev-tools/xtask/src/virtual_hardware.rs @@ -104,6 +104,7 @@ const IPADM: &'static str = "/usr/sbin/ipadm"; const MODINFO: &'static str = "/usr/sbin/modinfo"; const MODUNLOAD: &'static str = "/usr/sbin/modunload"; const NETSTAT: &'static str = "/usr/bin/netstat"; +const OPTEADM: &'static str = "/opt/oxide/opte/bin/opteadm"; const PFEXEC: &'static str = "/usr/bin/pfexec"; const PING: &'static str = "/usr/sbin/ping"; const SWAP: &'static str = "/usr/sbin/swap"; @@ -247,8 +248,17 @@ fn unload_xde_driver() -> Result<()> { println!("xde driver already unloaded"); return Ok(()); }; - println!("unloading xde driver"); + println!("unloading xde driver:\na) clearing underlay..."); + let mut cmd = Command::new(PFEXEC); + cmd.args([OPTEADM, "clear-xde-underlay"]); + if let Err(e) = execute(cmd) { + // This is explicitly non-fatal: the underlay is only set when + // sled-agent is running. We still need to be able to tear + // down the driver if we immediately call create->destroy. + println!("\tFailed or already unset: {e}"); + } + println!("b) unloading module..."); let mut cmd = Command::new(PFEXEC); cmd.arg(MODUNLOAD); cmd.arg("-i"); diff --git a/package-manifest.toml b/package-manifest.toml index bffd5be7dc..9d372cd4df 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -548,10 +548,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" +source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3" # The SHA256 digest is automatically posted to: -# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "1ea0e73e149a68bf91b5ce2e0db2a8a1af50dcdbbf381b672aa9ac7e36a3a181" +# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt +source.sha256 = "28965f303a64f49cf5b83322babe1e0ceb4cfe33fb2df8c8d452d8c3ec02d933" output.type = "tarball" [package.mg-ddm] @@ -564,10 +564,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" +source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "3334b0a9d5956e3117a6b493b9a5a31220391fab1ecbfb3a4bd8e94d7030771a" +source.sha256 = "6fa53be6fc5ad6273e0ca5e969c882ea40c473722415b060dfea420e962d4f8e" output.type = "zone" output.intermediate_only = true @@ -579,10 +579,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" +source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3" # The SHA256 digest is automatically posted to: -# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "e0907de39ca9f8ab45d40d361a1dbeed4bd8e9b157f8d3d8fe0a4bc259d933bd" +# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt +source.sha256 = "6ae4bc3b332e91706c1c6633a7fc218aac65b7feff5643ee2dbbe79b841e0df3" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index c39c9690bb..9a93e6b556 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="025389ff39d594bf2b815377e2c1dc4dd23b1f96" +COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3" SHA2="004e873e4120aa26460271368485266b75b7f964e5ed4dbee8fb5db4519470d7" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 73095bd42d..22918c581a 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="23b0cf439f9f62b9a4933e55cc72bcaddc9596cd" +COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3" SHA2="fdb33ee7425923560534672264008ef8948d227afce948ab704de092ad72157c" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index eeb873a424..d2ad05383d 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="e0907de39ca9f8ab45d40d361a1dbeed4bd8e9b157f8d3d8fe0a4bc259d933bd" -MGD_LINUX_SHA256="903413ddaab89594ed7518cb8f2f27793e96cd17ed2d6b3fe11657ec4375cb19" +CIDL_SHA256="6ae4bc3b332e91706c1c6633a7fc218aac65b7feff5643ee2dbbe79b841e0df3" +MGD_LINUX_SHA256="7930008cf8ce535a8b31043fc3edde0e825bd54d75f73234929bd0037ecc3a41" diff --git a/tools/opte_version b/tools/opte_version index 2de18d2d9b..6126a52eb4 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.29.250 +0.31.259 diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index ee4dcccb70..f82fe1c833 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -19,7 +19,7 @@ workspace = true [dependencies] ahash = { version = "0.8.11" } aho-corasick = { version = "1.1.3" } -anyhow = { version = "1.0.83", features = ["backtrace"] } +anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } @@ -124,7 +124,7 @@ zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } [build-dependencies] ahash = { version = "0.8.11" } aho-corasick = { version = "1.1.3" } -anyhow = { version = "1.0.83", features = ["backtrace"] } +anyhow = { version = "1.0.86", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } From 8df03b3934dc3c894a0484f333b15bbfd088262c Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 31 May 2024 19:49:19 -0500 Subject: [PATCH 08/16] Fix IP pool silos pagination bug (#5847) Closes #5837 - [x] Write test reproducing the bug - [x] Fix the bug --- nexus/db-queries/src/db/datastore/ip_pool.rs | 2 +- nexus/tests/integration_tests/ip_pools.rs | 48 ++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index 30ea2e89b4..08db5ef38c 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -426,7 +426,7 @@ impl DataStore { paginated( ip_pool_resource::table, - ip_pool_resource::ip_pool_id, + ip_pool_resource::resource_id, pagparams, ) .inner_join(ip_pool::table) diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 38cfd25844..e3ddc98029 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -724,6 +724,54 @@ async fn test_ip_pool_pagination(cptestctx: &ControlPlaneTestContext) { assert_eq!(get_names(next_page.items), &pool_names[5..8]); } +#[nexus_test] +async fn test_ip_pool_silos_pagination(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + + // one pool, and there should be no linked silos + create_pool(client, "p0").await; + let silos_p0 = silos_for_pool(client, "p0").await; + assert_eq!(silos_p0.items.len(), 0); + + // create and link some silos. we need to use discoverable silos because + // non-discoverable silos, while linkable, are filtered out of the list of + // linked silos for a pool + let mut silo_ids = vec![]; + for i in 1..=8 { + let name = format!("silo-{}", i); + let silo = + create_silo(&client, &name, true, SiloIdentityMode::SamlJit).await; + silo_ids.push(silo.id()); + link_ip_pool(client, "p0", &silo.id(), false).await; + } + + // we paginate by ID, so these should be in order to match + silo_ids.sort(); + + let base_url = "/v1/system/ip-pools/p0/silos"; + let first_five_url = format!("{}?limit=5", base_url); + let first_five = + objects_list_page_authz::(client, &first_five_url) + .await; + assert!(first_five.next_page.is_some()); + assert_eq!( + first_five.items.iter().map(|s| s.silo_id).collect::>(), + &silo_ids[0..5] + ); + + let next_page_url = format!( + "{}?limit=5&page_token={}", + base_url, + first_five.next_page.unwrap() + ); + let next_page = + objects_list_page_authz::(client, &next_page_url).await; + assert_eq!( + next_page.items.iter().map(|s| s.silo_id).collect::>(), + &silo_ids[5..8] + ); +} + /// helper to make tests less ugly fn get_names(pools: Vec) -> Vec { pools.iter().map(|p| p.identity.name.to_string()).collect() From 450f906e8f54e7b6999de8256a74730d7475f6e3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 31 May 2024 18:53:35 -0700 Subject: [PATCH 09/16] [nexus] Split authn/authz and db-fixed-data into new crates (#5849) As a part of the ongoing effort to split Nexus into smaller pieces, this PR splits out two new crates: - `nexus-auth` takes the contents of `nexus/db-queries/src/auth{n,z}`, as well as `nexus/db-queries/src/context.rs`, and separates this logic into a new bespoke crate. Although this crate **does** have a dependency on the datastore itself, it only actually invokes a single method, and can be abstracted via a new trait, defined in `nexus/auth/storage`. - `nexus-db-fixed-data` takes the contents of `nexus/db-queries/src/db/fixed-data` and separates this logic into a new crate. --- Cargo.lock | 67 ++++- Cargo.toml | 6 + nexus/Cargo.toml | 1 + nexus/auth/Cargo.toml | 48 ++++ nexus/auth/build.rs | 10 + .../src/authn/external/cookies.rs | 2 + .../src/authn/external/mod.rs | 1 + .../src/authn/external/session_cookie.rs | 1 + .../src/authn/external/spoof.rs | 1 + .../src/authn/external/token.rs | 0 nexus/{db-queries => auth}/src/authn/mod.rs | 40 +-- nexus/{db-queries => auth}/src/authn/saga.rs | 0 nexus/{db-queries => auth}/src/authn/silos.rs | 61 +---- nexus/{db-queries => auth}/src/authz/actor.rs | 0 .../src/authz/api_resources.rs | 240 ++++++------------ .../{db-queries => auth}/src/authz/context.rs | 108 ++++---- nexus/{db-queries => auth}/src/authz/mod.rs | 7 +- .../src/authz/omicron.polar | 0 .../src/authz/oso_generic.rs | 25 +- nexus/{db-queries => auth}/src/authz/roles.rs | 11 +- nexus/{db-queries => auth}/src/context.rs | 65 +++-- nexus/auth/src/lib.rs | 11 + nexus/auth/src/storage.rs | 27 ++ nexus/db-fixed-data/Cargo.toml | 25 ++ nexus/db-fixed-data/build.rs | 10 + .../src}/allow_list.rs | 0 .../mod.rs => db-fixed-data/src/lib.rs} | 0 .../src}/project.rs | 10 +- .../src}/role_assignment.rs | 4 +- .../src}/role_builtin.rs | 2 +- .../fixed_data => db-fixed-data/src}/silo.rs | 10 +- .../src}/silo_user.rs | 43 ++-- .../src}/user_builtin.rs | 0 .../fixed_data => db-fixed-data/src}/vpc.rs | 8 +- .../src}/vpc_firewall_rule.rs | 0 .../src}/vpc_subnet.rs | 2 +- nexus/db-queries/Cargo.toml | 14 +- .../db-queries/src/db/datastore/allow_list.rs | 2 +- nexus/db-queries/src/db/datastore/auth.rs | 81 ++++++ .../src/db/datastore/cockroachdb_settings.rs | 6 +- .../src/db/datastore/identity_provider.rs | 48 ++++ nexus/db-queries/src/db/datastore/instance.rs | 3 +- nexus/db-queries/src/db/datastore/mod.rs | 18 +- .../src/db/datastore/network_interface.rs | 2 +- nexus/db-queries/src/db/datastore/project.rs | 4 +- .../src/db/datastore/pub_test_utils.rs | 8 +- nexus/db-queries/src/db/datastore/rack.rs | 8 +- nexus/db-queries/src/db/datastore/role.rs | 65 +---- nexus/db-queries/src/db/datastore/silo.rs | 2 +- .../db-queries/src/db/datastore/silo_user.rs | 4 +- .../virtual_provisioning_collection.rs | 7 +- nexus/db-queries/src/db/datastore/vpc.rs | 20 +- nexus/db-queries/src/db/lookup.rs | 6 +- nexus/db-queries/src/db/mod.rs | 2 +- .../virtual_provisioning_collection_update.rs | 2 +- nexus/db-queries/src/db/saga_recovery.rs | 10 +- nexus/db-queries/src/lib.rs | 12 +- .../src/{authz => }/policy_test/coverage.rs | 5 +- .../src/{authz => }/policy_test/mod.rs | 18 +- .../policy_test/resource_builder.rs | 96 ++++--- .../src/{authz => }/policy_test/resources.rs | 6 +- nexus/db-queries/tests/output/authz-roles.out | 2 +- nexus/src/app/mod.rs | 18 +- nexus/src/app/test_interfaces.rs | 12 +- nexus/src/external_api/console_api.rs | 21 +- nexus/src/populate.rs | 4 +- nexus/tests/integration_tests/saml.rs | 25 +- nexus/tests/integration_tests/silos.rs | 27 +- workspace-hack/Cargo.toml | 1 + 69 files changed, 800 insertions(+), 605 deletions(-) create mode 100644 nexus/auth/Cargo.toml create mode 100644 nexus/auth/build.rs rename nexus/{db-queries => auth}/src/authn/external/cookies.rs (98%) rename nexus/{db-queries => auth}/src/authn/external/mod.rs (99%) rename nexus/{db-queries => auth}/src/authn/external/session_cookie.rs (99%) rename nexus/{db-queries => auth}/src/authn/external/spoof.rs (99%) rename nexus/{db-queries => auth}/src/authn/external/token.rs (100%) rename nexus/{db-queries => auth}/src/authn/mod.rs (94%) rename nexus/{db-queries => auth}/src/authn/saga.rs (100%) rename nexus/{db-queries => auth}/src/authn/silos.rs (86%) rename nexus/{db-queries => auth}/src/authz/actor.rs (100%) rename nexus/{db-queries => auth}/src/authz/api_resources.rs (83%) rename nexus/{db-queries => auth}/src/authz/context.rs (80%) rename nexus/{db-queries => auth}/src/authz/mod.rs (98%) rename nexus/{db-queries => auth}/src/authz/omicron.polar (100%) rename nexus/{db-queries => auth}/src/authz/oso_generic.rs (96%) rename nexus/{db-queries => auth}/src/authz/roles.rs (96%) rename nexus/{db-queries => auth}/src/context.rs (92%) create mode 100644 nexus/auth/src/lib.rs create mode 100644 nexus/auth/src/storage.rs create mode 100644 nexus/db-fixed-data/Cargo.toml create mode 100644 nexus/db-fixed-data/build.rs rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/allow_list.rs (100%) rename nexus/{db-queries/src/db/fixed_data/mod.rs => db-fixed-data/src/lib.rs} (100%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/project.rs (79%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/role_assignment.rs (97%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/role_builtin.rs (99%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/silo.rs (91%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/silo_user.rs (66%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/user_builtin.rs (100%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/vpc.rs (91%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/vpc_firewall_rule.rs (100%) rename nexus/{db-queries/src/db/fixed_data => db-fixed-data/src}/vpc_subnet.rs (98%) create mode 100644 nexus/db-queries/src/db/datastore/auth.rs rename nexus/db-queries/src/{authz => }/policy_test/coverage.rs (97%) rename nexus/db-queries/src/{authz => }/policy_test/mod.rs (97%) rename nexus/db-queries/src/{authz => }/policy_test/resource_builder.rs (74%) rename nexus/db-queries/src/{authz => }/policy_test/resources.rs (99%) diff --git a/Cargo.lock b/Cargo.lock index 9072aff98c..4f4fa019c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4473,6 +4473,44 @@ dependencies = [ "rustc_version 0.1.7", ] +[[package]] +name = "nexus-auth" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-trait", + "authz-macros", + "base64 0.22.1", + "chrono", + "cookie 0.18.1", + "dropshot", + "futures", + "headers", + "http 0.2.12", + "hyper 0.14.28", + "newtype_derive", + "nexus-db-fixed-data", + "nexus-db-model", + "nexus-types", + "omicron-common", + "omicron-rpaths", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "once_cell", + "openssl", + "oso", + "pq-sys", + "samael", + "serde", + "serde_urlencoded", + "slog", + "strum", + "thiserror", + "tokio", + "uuid", +] + [[package]] name = "nexus-client" version = "0.1.0" @@ -4515,6 +4553,21 @@ dependencies = [ "uuid", ] +[[package]] +name = "nexus-db-fixed-data" +version = "0.1.0" +dependencies = [ + "nexus-db-model", + "nexus-types", + "omicron-common", + "omicron-rpaths", + "omicron-workspace-hack", + "once_cell", + "pq-sys", + "strum", + "uuid", +] + [[package]] name = "nexus-db-model" version = "0.1.0" @@ -4568,14 +4621,11 @@ dependencies = [ "assert_matches", "async-bb8-diesel", "async-trait", - "authz-macros", - "base64 0.22.1", "bb8", "camino", "camino-tempfile", "chrono", "const_format", - "cookie 0.18.1", "db-macros", "diesel", "diesel-dtrace", @@ -4583,17 +4633,15 @@ dependencies = [ "expectorate", "futures", "gateway-client", - "headers", - "http 0.2.12", - "hyper 0.14.28", "hyper-rustls 0.26.0", "illumos-utils", "internal-dns", "ipnetwork", "itertools 0.12.1", "macaddr", - "newtype_derive", + "nexus-auth", "nexus-config", + "nexus-db-fixed-data", "nexus-db-model", "nexus-inventory", "nexus-reconfigurator-planning", @@ -4608,7 +4656,6 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "openapiv3", - "openssl", "oso", "oximeter", "oxnet", @@ -4623,12 +4670,10 @@ dependencies = [ "ref-cast", "regex", "rustls 0.22.4", - "samael", "schemars", "semver 1.0.23", "serde", "serde_json", - "serde_urlencoded", "serde_with", "sled-agent-client", "slog", @@ -5437,6 +5482,7 @@ dependencies = [ "itertools 0.12.1", "macaddr", "mg-admin-client", + "nexus-auth", "nexus-client", "nexus-config", "nexus-db-model", @@ -5921,6 +5967,7 @@ dependencies = [ "trust-dns-proto", "unicode-bidi", "unicode-normalization", + "unicode-xid", "usdt 0.5.0", "usdt-impl 0.5.0", "uuid", diff --git a/Cargo.toml b/Cargo.toml index 5c0433a662..4eb76f5859 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,8 @@ members = [ "nexus", "nexus-config", "nexus/authz-macros", + "nexus/auth", + "nexus/db-fixed-data", "nexus/db-macros", "nexus/db-model", "nexus/db-queries", @@ -123,9 +125,11 @@ default-members = [ "nexus", "nexus-config", "nexus/authz-macros", + "nexus/auth", "nexus/macros-common", "nexus/metrics-producer-gc", "nexus/networking", + "nexus/db-fixed-data", "nexus/db-macros", "nexus/db-model", "nexus/db-queries", @@ -317,8 +321,10 @@ newtype_derive = "0.1.6" mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } multimap = "0.10.0" +nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } nexus-config = { path = "nexus-config" } +nexus-db-fixed-data = { path = "nexus/db-fixed-data" } nexus-db-model = { path = "nexus/db-model" } nexus-db-queries = { path = "nexus/db-queries" } nexus-defaults = { path = "nexus/defaults" } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 0b0bd097bc..58a1e824cb 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -86,6 +86,7 @@ tough.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true +nexus-auth.workspace = true nexus-defaults.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/nexus/auth/Cargo.toml b/nexus/auth/Cargo.toml new file mode 100644 index 0000000000..1a926f1789 --- /dev/null +++ b/nexus/auth/Cargo.toml @@ -0,0 +1,48 @@ +[package] +name = "nexus-auth" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +anyhow.workspace = true +async-trait.workspace = true +base64.workspace = true +chrono.workspace = true +cookie.workspace = true +dropshot.workspace = true +futures.workspace = true +headers.workspace = true +http.workspace = true +hyper.workspace = true +newtype_derive.workspace = true +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" +once_cell.workspace = true +openssl.workspace = true +oso.workspace = true +samael.workspace = true +serde.workspace = true +serde_urlencoded.workspace = true +slog.workspace = true +strum.workspace = true +thiserror.workspace = true +tokio = { workspace = true, features = ["full"] } +uuid.workspace = true + +authz-macros.workspace = true +nexus-db-fixed-data.workspace = true +nexus-db-model.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true + +[dev-dependencies] +omicron-test-utils.workspace = true diff --git a/nexus/auth/build.rs b/nexus/auth/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/nexus/auth/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/nexus/db-queries/src/authn/external/cookies.rs b/nexus/auth/src/authn/external/cookies.rs similarity index 98% rename from nexus/db-queries/src/authn/external/cookies.rs rename to nexus/auth/src/authn/external/cookies.rs index e3ad2e3264..35e697475b 100644 --- a/nexus/db-queries/src/authn/external/cookies.rs +++ b/nexus/auth/src/authn/external/cookies.rs @@ -9,6 +9,8 @@ use dropshot::{ ApiEndpointBodyContentType, ExtensionMode, ExtractorMetadata, HttpError, RequestContext, ServerContext, SharedExtractor, }; +use newtype_derive::NewtypeDeref; +use newtype_derive::NewtypeFrom; pub fn parse_cookies( headers: &http::HeaderMap, diff --git a/nexus/db-queries/src/authn/external/mod.rs b/nexus/auth/src/authn/external/mod.rs similarity index 99% rename from nexus/db-queries/src/authn/external/mod.rs rename to nexus/auth/src/authn/external/mod.rs index 623544d38c..ccb7218285 100644 --- a/nexus/db-queries/src/authn/external/mod.rs +++ b/nexus/auth/src/authn/external/mod.rs @@ -9,6 +9,7 @@ use super::SiloAuthnPolicy; use crate::authn; use async_trait::async_trait; use authn::Reason; +use slog::trace; use std::borrow::Borrow; use uuid::Uuid; diff --git a/nexus/db-queries/src/authn/external/session_cookie.rs b/nexus/auth/src/authn/external/session_cookie.rs similarity index 99% rename from nexus/db-queries/src/authn/external/session_cookie.rs rename to nexus/auth/src/authn/external/session_cookie.rs index 74faafef9b..7811bf2826 100644 --- a/nexus/db-queries/src/authn/external/session_cookie.rs +++ b/nexus/auth/src/authn/external/session_cookie.rs @@ -13,6 +13,7 @@ use async_trait::async_trait; use chrono::{DateTime, Duration, Utc}; use dropshot::HttpError; use http::HeaderValue; +use slog::debug; use uuid::Uuid; // many parts of the implementation will reference this OWASP guide diff --git a/nexus/db-queries/src/authn/external/spoof.rs b/nexus/auth/src/authn/external/spoof.rs similarity index 99% rename from nexus/db-queries/src/authn/external/spoof.rs rename to nexus/auth/src/authn/external/spoof.rs index 9b5ed94bde..326d529431 100644 --- a/nexus/db-queries/src/authn/external/spoof.rs +++ b/nexus/auth/src/authn/external/spoof.rs @@ -17,6 +17,7 @@ use async_trait::async_trait; use headers::authorization::{Authorization, Bearer}; use headers::HeaderMapExt; use once_cell::sync::Lazy; +use slog::debug; use uuid::Uuid; // This scheme is intended for demos, development, and testing until we have a diff --git a/nexus/db-queries/src/authn/external/token.rs b/nexus/auth/src/authn/external/token.rs similarity index 100% rename from nexus/db-queries/src/authn/external/token.rs rename to nexus/auth/src/authn/external/token.rs diff --git a/nexus/db-queries/src/authn/mod.rs b/nexus/auth/src/authn/mod.rs similarity index 94% rename from nexus/db-queries/src/authn/mod.rs rename to nexus/auth/src/authn/mod.rs index 305c359820..08b27b9773 100644 --- a/nexus/db-queries/src/authn/mod.rs +++ b/nexus/auth/src/authn/mod.rs @@ -28,22 +28,21 @@ pub mod external; pub mod saga; pub mod silos; -pub use crate::db::fixed_data::silo_user::USER_TEST_PRIVILEGED; -pub use crate::db::fixed_data::silo_user::USER_TEST_UNPRIVILEGED; -pub use crate::db::fixed_data::user_builtin::USER_DB_INIT; -pub use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; -pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_API; -pub use crate::db::fixed_data::user_builtin::USER_INTERNAL_READ; -pub use crate::db::fixed_data::user_builtin::USER_SAGA_RECOVERY; -pub use crate::db::fixed_data::user_builtin::USER_SERVICE_BALANCER; -use crate::db::model::ConsoleSession; +pub use nexus_db_fixed_data::silo_user::USER_TEST_PRIVILEGED; +pub use nexus_db_fixed_data::silo_user::USER_TEST_UNPRIVILEGED; +pub use nexus_db_fixed_data::user_builtin::USER_DB_INIT; +pub use nexus_db_fixed_data::user_builtin::USER_EXTERNAL_AUTHN; +pub use nexus_db_fixed_data::user_builtin::USER_INTERNAL_API; +pub use nexus_db_fixed_data::user_builtin::USER_INTERNAL_READ; +pub use nexus_db_fixed_data::user_builtin::USER_SAGA_RECOVERY; +pub use nexus_db_fixed_data::user_builtin::USER_SERVICE_BALANCER; use crate::authz; -use crate::db; -use crate::db::fixed_data::silo::DEFAULT_SILO; -use crate::db::identity::Asset; +use newtype_derive::NewtypeDisplay; +use nexus_db_fixed_data::silo::DEFAULT_SILO; use nexus_types::external_api::shared::FleetRole; use nexus_types::external_api::shared::SiloRole; +use nexus_types::identity::Asset; use omicron_common::api::external::LookupType; use serde::Deserialize; use serde::Serialize; @@ -254,7 +253,6 @@ pub struct SiloAuthnPolicy { } impl SiloAuthnPolicy { - #[cfg(test)] pub fn new( mapped_fleet_roles: BTreeMap>, ) -> SiloAuthnPolicy { @@ -290,8 +288,8 @@ mod test { use super::USER_SERVICE_BALANCER; use super::USER_TEST_PRIVILEGED; use super::USER_TEST_UNPRIVILEGED; - use crate::db::fixed_data::user_builtin::USER_EXTERNAL_AUTHN; - use crate::db::identity::Asset; + use nexus_db_fixed_data::user_builtin::USER_EXTERNAL_AUTHN; + use nexus_types::identity::Asset; #[test] fn test_internal_users() { @@ -386,11 +384,13 @@ impl Actor { } } -impl From<&Actor> for db::model::IdentityType { - fn from(actor: &Actor) -> db::model::IdentityType { +impl From<&Actor> for nexus_db_model::IdentityType { + fn from(actor: &Actor) -> nexus_db_model::IdentityType { match actor { - Actor::UserBuiltin { .. } => db::model::IdentityType::UserBuiltin, - Actor::SiloUser { .. } => db::model::IdentityType::SiloUser, + Actor::UserBuiltin { .. } => { + nexus_db_model::IdentityType::UserBuiltin + } + Actor::SiloUser { .. } => nexus_db_model::IdentityType::SiloUser, } } } @@ -421,7 +421,7 @@ impl std::fmt::Debug for Actor { /// A console session with the silo id of the authenticated user #[derive(Clone, Debug)] pub struct ConsoleSessionWithSiloId { - pub console_session: ConsoleSession, + pub console_session: nexus_db_model::ConsoleSession, pub silo_id: Uuid, } diff --git a/nexus/db-queries/src/authn/saga.rs b/nexus/auth/src/authn/saga.rs similarity index 100% rename from nexus/db-queries/src/authn/saga.rs rename to nexus/auth/src/authn/saga.rs diff --git a/nexus/db-queries/src/authn/silos.rs b/nexus/auth/src/authn/silos.rs similarity index 86% rename from nexus/db-queries/src/authn/silos.rs rename to nexus/auth/src/authn/silos.rs index fc5068fc3c..40b6346fa0 100644 --- a/nexus/db-queries/src/authn/silos.rs +++ b/nexus/auth/src/authn/silos.rs @@ -4,12 +4,6 @@ //! Silo related authentication types and functions -use crate::authz; -use crate::context::OpContext; -use crate::db::lookup::LookupPath; -use crate::db::{model, DataStore}; -use omicron_common::api::external::LookupResult; - use anyhow::{anyhow, Result}; use base64::Engine; use dropshot::HttpError; @@ -36,10 +30,10 @@ pub struct SamlIdentityProvider { pub group_attribute_name: Option, } -impl TryFrom for SamlIdentityProvider { +impl TryFrom for SamlIdentityProvider { type Error = anyhow::Error; fn try_from( - model: model::SamlIdentityProvider, + model: nexus_db_model::SamlIdentityProvider, ) -> Result { let provider = SamlIdentityProvider { idp_metadata_document_string: model.idp_metadata_document_string, @@ -68,57 +62,6 @@ pub enum IdentityProviderType { Saml(SamlIdentityProvider), } -impl IdentityProviderType { - /// First, look up the provider type, then look in for the specific - /// provider details. - pub async fn lookup( - datastore: &DataStore, - opctx: &OpContext, - silo_name: &model::Name, - provider_name: &model::Name, - ) -> LookupResult<(authz::Silo, model::Silo, Self)> { - let (authz_silo, db_silo) = LookupPath::new(opctx, datastore) - .silo_name(silo_name) - .fetch() - .await?; - - let (.., identity_provider) = LookupPath::new(opctx, datastore) - .silo_name(silo_name) - .identity_provider_name(provider_name) - .fetch() - .await?; - - match identity_provider.provider_type { - model::IdentityProviderType::Saml => { - let (.., saml_identity_provider) = - LookupPath::new(opctx, datastore) - .silo_name(silo_name) - .saml_identity_provider_name(provider_name) - .fetch() - .await?; - - let saml_identity_provider = IdentityProviderType::Saml( - saml_identity_provider.try_into() - .map_err(|e: anyhow::Error| - // If an error is encountered converting from the - // model to the authn type here, this is a server - // error: it was validated before it went into the - // DB. - omicron_common::api::external::Error::internal_error( - &format!( - "saml_identity_provider.try_into() failed! {}", - &e.to_string() - ) - ) - )? - ); - - Ok((authz_silo, db_silo, saml_identity_provider)) - } - } - } -} - impl SamlIdentityProvider { pub fn sign_in_url(&self, relay_state: Option) -> Result { let idp_metadata: EntityDescriptor = diff --git a/nexus/db-queries/src/authz/actor.rs b/nexus/auth/src/authz/actor.rs similarity index 100% rename from nexus/db-queries/src/authz/actor.rs rename to nexus/auth/src/authz/actor.rs diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/auth/src/authz/api_resources.rs similarity index 83% rename from nexus/db-queries/src/authz/api_resources.rs rename to nexus/auth/src/authz/api_resources.rs index 69b883a8cf..98a24b68b5 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/auth/src/authz/api_resources.rs @@ -34,13 +34,11 @@ use super::Action; use super::{actor::AuthenticatedActor, Authz}; use crate::authn; use crate::context::OpContext; -use crate::db; -use crate::db::fixed_data::FLEET_ID; -use crate::db::model::{ArtifactId, SemverVersion}; -use crate::db::DataStore; use authz_macros::authz_resource; use futures::future::BoxFuture; use futures::FutureExt; +use nexus_db_fixed_data::FLEET_ID; +use nexus_db_model::{ArtifactId, SemverVersion}; use nexus_types::external_api::shared::{FleetRole, ProjectRole, SiloRole}; use omicron_common::api::external::{Error, LookupType, ResourceType}; use once_cell::sync::Lazy; @@ -103,27 +101,21 @@ pub trait ApiResourceWithRoles: ApiResource { pub trait ApiResourceWithRolesType: ApiResourceWithRoles { type AllowedRoles: serde::Serialize + serde::de::DeserializeOwned - + db::model::DatabaseString + + nexus_db_model::DatabaseString + Clone; } -impl AuthorizedResource for T { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { - load_roles_for_resource_tree(self, opctx, datastore, authn, roleset) - .boxed() +impl AuthorizedResource for T +where + T: ApiResource + oso::PolarClass + Clone, +{ + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> BoxFuture<'fut, Result<(), Error>> { + load_roles_for_resource_tree(self, opctx, authn, roleset).boxed() } fn on_unauthorized( @@ -263,26 +255,17 @@ impl oso::PolarClass for BlueprintConfig { } impl AuthorizedResource for BlueprintConfig { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // There are no roles on the BlueprintConfig, only permissions. But we // still need to load the Fleet-related roles to verify that the actor // has the "admin" role on the Fleet (possibly conferred from a Silo // role). - load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset) - .boxed() + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() } fn on_unauthorized( @@ -323,22 +306,13 @@ impl oso::PolarClass for ConsoleSessionList { } impl AuthorizedResource for ConsoleSessionList { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { - load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset) - .boxed() + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() } fn on_unauthorized( @@ -379,22 +353,13 @@ impl oso::PolarClass for DnsConfig { } impl AuthorizedResource for DnsConfig { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { - load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset) - .boxed() + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() } fn on_unauthorized( @@ -435,25 +400,16 @@ impl oso::PolarClass for IpPoolList { } impl AuthorizedResource for IpPoolList { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // There are no roles on the IpPoolList, only permissions. But we still // need to load the Fleet-related roles to verify that the actor has the // "admin" role on the Fleet (possibly conferred from a Silo role). - load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset) - .boxed() + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() } fn on_unauthorized( @@ -486,25 +442,16 @@ impl oso::PolarClass for DeviceAuthRequestList { } impl AuthorizedResource for DeviceAuthRequestList { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // There are no roles on the DeviceAuthRequestList, only permissions. But we // still need to load the Fleet-related roles to verify that the actor has the // "admin" role on the Fleet. - load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset) - .boxed() + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() } fn on_unauthorized( @@ -544,22 +491,13 @@ impl oso::PolarClass for Inventory { } impl AuthorizedResource for Inventory { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { - load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset) - .boxed() + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { + load_roles_for_resource_tree(&FLEET, opctx, authn, roleset).boxed() } fn on_unauthorized( @@ -603,23 +541,15 @@ impl oso::PolarClass for SiloCertificateList { } impl AuthorizedResource for SiloCertificateList { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // There are no roles on this resource, but we still need to load the // Silo-related roles. - self.silo().load_roles(opctx, datastore, authn, roleset) + self.silo().load_roles(opctx, authn, roleset) } fn on_unauthorized( @@ -663,23 +593,15 @@ impl oso::PolarClass for SiloIdentityProviderList { } impl AuthorizedResource for SiloIdentityProviderList { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // There are no roles on this resource, but we still need to load the // Silo-related roles. - self.silo().load_roles(opctx, datastore, authn, roleset) + self.silo().load_roles(opctx, authn, roleset) } fn on_unauthorized( @@ -720,23 +642,15 @@ impl oso::PolarClass for SiloUserList { } impl AuthorizedResource for SiloUserList { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // There are no roles on this resource, but we still need to load the // Silo-related roles. - self.silo().load_roles(opctx, datastore, authn, roleset) + self.silo().load_roles(opctx, authn, roleset) } fn on_unauthorized( diff --git a/nexus/db-queries/src/authz/context.rs b/nexus/auth/src/authz/context.rs similarity index 80% rename from nexus/db-queries/src/authz/context.rs rename to nexus/auth/src/authz/context.rs index 0d6f2a73ac..bd375321e3 100644 --- a/nexus/db-queries/src/authz/context.rs +++ b/nexus/auth/src/authz/context.rs @@ -10,12 +10,13 @@ use crate::authn; use crate::authz::oso_generic; use crate::authz::Action; use crate::context::OpContext; -use crate::db::DataStore; +use crate::storage::Storage; use futures::future::BoxFuture; use omicron_common::api::external::Error; use omicron_common::bail_unless; use oso::Oso; use oso::OsoError; +use slog::debug; use std::collections::BTreeSet; use std::sync::Arc; @@ -51,7 +52,6 @@ impl Authz { self.oso.is_allowed(actor.clone(), action, resource.clone()) } - #[cfg(test)] pub fn into_class_names(self) -> BTreeSet { self.class_names } @@ -66,18 +66,22 @@ impl Authz { pub struct Context { authn: Arc, authz: Arc, - datastore: Arc, + datastore: Arc, } impl Context { pub fn new( authn: Arc, authz: Arc, - datastore: Arc, + datastore: Arc, ) -> Context { Context { authn, authz, datastore } } + pub(crate) fn datastore(&self) -> &Arc { + &self.datastore + } + /// Check whether the actor performing this request is authorized for /// `action` on `resource`. pub async fn authorize( @@ -111,9 +115,7 @@ impl Context { ); let mut roles = RoleSet::new(); - resource - .load_roles(opctx, &self.datastore, &self.authn, &mut roles) - .await?; + resource.load_roles(opctx, &self.authn, &mut roles).await?; debug!(opctx.log, "roles"; "roles" => ?roles); let actor = AnyActor::new(&self.authn, roles); let is_authn = self.authn.actor().is_some(); @@ -162,19 +164,12 @@ pub trait AuthorizedResource: oso::ToPolar + Send + Sync + 'static { /// That's how this works for most resources. There are other kinds of /// resources (like the Database itself) that aren't stored in the database /// and for which a different mechanism might be used. - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - opctx: &'b OpContext, - datastore: &'c DataStore, - authn: &'d authn::Context, - roleset: &'e mut RoleSet, - ) -> BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f; + fn load_roles<'fut>( + &'fut self, + opctx: &'fut OpContext, + authn: &'fut authn::Context, + roleset: &'fut mut RoleSet, + ) -> BoxFuture<'fut, Result<(), Error>>; /// Invoked on authz failure to determine the final authz result /// @@ -196,17 +191,45 @@ pub trait AuthorizedResource: oso::ToPolar + Send + Sync + 'static { mod test { use crate::authn; use crate::authz::Action; + use crate::authz::AnyActor; use crate::authz::Authz; use crate::authz::Context; - use crate::db::DataStore; - use nexus_test_utils::db::test_setup_database; + use crate::authz::RoleSet; + use crate::context::OpContext; + use nexus_db_model::IdentityType; + use nexus_db_model::RoleAssignment; + use omicron_common::api::external::Error; + use omicron_common::api::external::ResourceType; use omicron_test_utils::dev; use std::sync::Arc; + use uuid::Uuid; + + struct FakeStorage {} + + impl FakeStorage { + fn new() -> Arc { + Arc::new(Self {}) + } + } + + #[async_trait::async_trait] + impl crate::storage::Storage for FakeStorage { + async fn role_asgn_list_for( + &self, + _opctx: &OpContext, + _identity_type: IdentityType, + _identity_id: Uuid, + _resource_type: ResourceType, + _resource_id: Uuid, + ) -> Result, Error> { + unimplemented!("This test is not expected to access the database"); + } + } fn authz_context_for_actor( log: &slog::Logger, authn: authn::Context, - datastore: Arc, + datastore: Arc, ) -> Context { let authz = Authz::new(log); Context::new(Arc::new(authn), Arc::new(authz), datastore) @@ -215,34 +238,26 @@ mod test { #[tokio::test] async fn test_unregistered_resource() { let logctx = dev::test_setup_log("test_unregistered_resource"); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = - crate::db::datastore::test_utils::datastore_test(&logctx, &db) - .await; + let datastore = FakeStorage::new(); + let opctx = OpContext::for_background( + logctx.log.new(o!()), + Arc::new(Authz::new(&logctx.log)), + authn::Context::internal_db_init(), + Arc::clone(&datastore) as Arc, + ); // Define a resource that we "forget" to register with Oso. - use super::AuthorizedResource; - use crate::authz::actor::AnyActor; - use crate::authz::roles::RoleSet; - use crate::context::OpContext; - use omicron_common::api::external::Error; + use crate::authz::AuthorizedResource; use oso::PolarClass; #[derive(Clone, PolarClass)] struct UnregisteredResource; impl AuthorizedResource for UnregisteredResource { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - _: &'b OpContext, - _: &'c DataStore, - _: &'d authn::Context, - _: &'e mut RoleSet, - ) -> futures::future::BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, + fn load_roles<'fut>( + &'fut self, + _: &'fut OpContext, + _: &'fut authn::Context, + _: &'fut mut RoleSet, + ) -> futures::future::BoxFuture<'fut, Result<(), Error>> { // authorize() shouldn't get far enough to call this. unimplemented!(); @@ -270,7 +285,7 @@ mod test { let authz_privileged = authz_context_for_actor( &logctx.log, authn::Context::privileged_test_user(), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ); let error = authz_privileged .authorize(&opctx, Action::Read, unregistered_resource) @@ -281,7 +296,6 @@ mod test { }) if internal_message == "attempted authz check \ on unregistered resource: \"UnregisteredResource\"")); - db.cleanup().await.unwrap(); logctx.cleanup_successful(); } } diff --git a/nexus/db-queries/src/authz/mod.rs b/nexus/auth/src/authz/mod.rs similarity index 98% rename from nexus/db-queries/src/authz/mod.rs rename to nexus/auth/src/authz/mod.rs index 6b7dab7208..1c666d2296 100644 --- a/nexus/db-queries/src/authz/mod.rs +++ b/nexus/auth/src/authz/mod.rs @@ -168,6 +168,8 @@ //! allowed. Otherwise, it's not. mod actor; +pub use actor::AnyActor; +pub use actor::AuthenticatedActor; mod api_resources; pub use api_resources::*; @@ -179,9 +181,8 @@ pub use context::Context; mod oso_generic; pub use oso_generic::Action; +pub use oso_generic::Database; pub use oso_generic::DATABASE; mod roles; - -#[cfg(test)] -mod policy_test; +pub use roles::RoleSet; diff --git a/nexus/db-queries/src/authz/omicron.polar b/nexus/auth/src/authz/omicron.polar similarity index 100% rename from nexus/db-queries/src/authz/omicron.polar rename to nexus/auth/src/authz/omicron.polar diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/auth/src/authz/oso_generic.rs similarity index 96% rename from nexus/db-queries/src/authz/oso_generic.rs rename to nexus/auth/src/authz/oso_generic.rs index dd646a1c98..383a06e985 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/auth/src/authz/oso_generic.rs @@ -12,7 +12,6 @@ use super::roles::RoleSet; use super::Authz; use crate::authn; use crate::context::OpContext; -use crate::db::DataStore; use anyhow::ensure; use anyhow::Context; use futures::future::BoxFuture; @@ -20,6 +19,7 @@ use futures::FutureExt; use omicron_common::api::external::Error; use oso::Oso; use oso::PolarClass; +use slog::info; use std::collections::BTreeSet; use std::fmt; @@ -172,8 +172,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { /// /// There's currently just one enum of Actions for all of Omicron. We expect /// most objects to support mostly the same set of actions. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -#[cfg_attr(test, derive(strum::EnumIter))] +#[derive(Clone, Copy, Debug, Eq, PartialEq, strum::EnumIter)] pub enum Action { Query, // only used for `Database` Read, @@ -267,20 +266,12 @@ impl oso::PolarClass for Database { } impl AuthorizedResource for Database { - fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( - &'a self, - _: &'b OpContext, - _: &'c DataStore, - _: &'d authn::Context, - _: &'e mut RoleSet, - ) -> BoxFuture<'f, Result<(), Error>> - where - 'a: 'f, - 'b: 'f, - 'c: 'f, - 'd: 'f, - 'e: 'f, - { + fn load_roles<'fut>( + &'fut self, + _: &'fut OpContext, + _: &'fut authn::Context, + _: &'fut mut RoleSet, + ) -> BoxFuture<'fut, Result<(), Error>> { // We don't use (database) roles to grant access to the database. The // role assignment is hardcoded for all authenticated users. See the // "has_role" Polar method above. diff --git a/nexus/db-queries/src/authz/roles.rs b/nexus/auth/src/authz/roles.rs similarity index 96% rename from nexus/db-queries/src/authz/roles.rs rename to nexus/auth/src/authz/roles.rs index 11b3d482d1..0716e05bc7 100644 --- a/nexus/db-queries/src/authz/roles.rs +++ b/nexus/auth/src/authz/roles.rs @@ -37,9 +37,9 @@ use super::api_resources::ApiResource; use crate::authn; use crate::context::OpContext; -use crate::db::DataStore; use omicron_common::api::external::Error; use omicron_common::api::external::ResourceType; +use slog::trace; use std::collections::BTreeSet; use uuid::Uuid; @@ -87,7 +87,6 @@ impl RoleSet { pub async fn load_roles_for_resource_tree( resource: &R, opctx: &OpContext, - datastore: &DataStore, authn: &authn::Context, roleset: &mut RoleSet, ) -> Result<(), Error> @@ -100,7 +99,6 @@ where let resource_id = with_roles.resource_id(); load_directly_attached_roles( opctx, - datastore, authn, resource_type, resource_id, @@ -115,7 +113,6 @@ where { load_directly_attached_roles( opctx, - datastore, authn, resource_type, resource_id, @@ -135,7 +132,7 @@ where // it's clearer to just call this "parent" than // "related_resources_whose_roles_might_grant_access_to_this".) if let Some(parent) = resource.parent() { - parent.load_roles(opctx, datastore, authn, roleset).await?; + parent.load_roles(opctx, authn, roleset).await?; } Ok(()) @@ -143,7 +140,6 @@ where async fn load_directly_attached_roles( opctx: &OpContext, - datastore: &DataStore, authn: &authn::Context, resource_type: ResourceType, resource_id: Uuid, @@ -159,7 +155,8 @@ async fn load_directly_attached_roles( "resource_id" => resource_id.to_string(), ); - let roles = datastore + let roles = opctx + .datastore() .role_asgn_list_for( opctx, actor.into(), diff --git a/nexus/db-queries/src/context.rs b/nexus/auth/src/context.rs similarity index 92% rename from nexus/db-queries/src/context.rs rename to nexus/auth/src/context.rs index dfd1fe4322..0aac0900c5 100644 --- a/nexus/db-queries/src/context.rs +++ b/nexus/auth/src/context.rs @@ -8,9 +8,12 @@ use super::authz; use crate::authn::external::session_cookie::Session; use crate::authn::ConsoleSessionWithSiloId; use crate::authz::AuthorizedResource; -use crate::db::DataStore; +use crate::storage::Storage; use chrono::{DateTime, Utc}; use omicron_common::api::external::Error; +use slog::debug; +use slog::o; +use slog::trace; use std::collections::BTreeMap; use std::fmt::Debug; use std::sync::Arc; @@ -111,6 +114,10 @@ impl OpContext { }) } + pub(crate) fn datastore(&self) -> &Arc { + self.authz.datastore() + } + fn log_and_metadata_for_authn( log: &slog::Logger, authn: &authn::Context, @@ -135,8 +142,8 @@ impl OpContext { (log, metadata) } - pub fn load_request_metadata( - rqctx: &dropshot::RequestContext, + pub fn load_request_metadata( + rqctx: &dropshot::RequestContext, metadata: &mut BTreeMap, ) { let request = &rqctx.request; @@ -151,7 +158,7 @@ impl OpContext { log: slog::Logger, authz: Arc, authn: authn::Context, - datastore: Arc, + datastore: Arc, ) -> OpContext { let created_instant = Instant::now(); let created_walltime = SystemTime::now(); @@ -180,7 +187,7 @@ impl OpContext { // outside public interfaces. pub fn for_tests( log: slog::Logger, - datastore: Arc, + datastore: Arc, ) -> OpContext { let created_instant = Instant::now(); let created_walltime = SystemTime::now(); @@ -207,7 +214,7 @@ impl OpContext { /// functionally the same as one that you already have, but where you want /// to provide extra debugging information (in the form of key-value pairs) /// in both the OpContext itself and its logger. - pub fn child(&self, new_metadata: BTreeMap) -> OpContext { + pub fn child(&self, new_metadata: BTreeMap) -> Self { let created_instant = Instant::now(); let created_walltime = SystemTime::now(); let mut metadata = self.metadata.clone(); @@ -346,19 +353,42 @@ mod test { use crate::authn; use crate::authz; use authz::Action; - use nexus_test_utils::db::test_setup_database; + use nexus_db_model::IdentityType; + use nexus_db_model::RoleAssignment; use omicron_common::api::external::Error; + use omicron_common::api::external::ResourceType; use omicron_test_utils::dev; use std::collections::BTreeMap; use std::sync::Arc; + use uuid::Uuid; + + struct FakeStorage {} + + impl FakeStorage { + fn new() -> Arc { + Arc::new(Self {}) + } + } + + #[async_trait::async_trait] + impl crate::storage::Storage for FakeStorage { + async fn role_asgn_list_for( + &self, + _opctx: &OpContext, + _identity_type: IdentityType, + _identity_id: Uuid, + _resource_type: ResourceType, + _resource_id: Uuid, + ) -> Result, Error> { + unimplemented!("This test is not expected to access the database"); + } + } #[tokio::test] async fn test_background_context() { let logctx = dev::test_setup_log("test_background_context"); - let mut db = test_setup_database(&logctx.log).await; - let (_, datastore) = - crate::db::datastore::test_utils::datastore_test(&logctx, &db) - .await; + + let datastore = FakeStorage::new(); let opctx = OpContext::for_background( logctx.log.new(o!()), Arc::new(authz::Authz::new(&logctx.log)), @@ -381,17 +411,13 @@ mod test { .await .expect_err("expected authorization error"); assert!(matches!(error, Error::Unauthenticated { .. })); - db.cleanup().await.unwrap(); logctx.cleanup_successful(); } #[tokio::test] async fn test_test_context() { let logctx = dev::test_setup_log("test_background_context"); - let mut db = test_setup_database(&logctx.log).await; - let (_, datastore) = - crate::db::datastore::test_utils::datastore_test(&logctx, &db) - .await; + let datastore = FakeStorage::new(); let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore); // Like in test_background_context(), this is essentially a test of the @@ -403,17 +429,13 @@ mod test { .authorize(Action::Query, &authz::DATABASE) .await .expect("expected authorization to succeed"); - db.cleanup().await.unwrap(); logctx.cleanup_successful(); } #[tokio::test] async fn test_child_context() { let logctx = dev::test_setup_log("test_child_context"); - let mut db = test_setup_database(&logctx.log).await; - let (_, datastore) = - crate::db::datastore::test_utils::datastore_test(&logctx, &db) - .await; + let datastore = FakeStorage::new(); let opctx = OpContext::for_background( logctx.log.new(o!()), Arc::new(authz::Authz::new(&logctx.log)), @@ -451,7 +473,6 @@ mod test { assert_eq!(grandchild_opctx.metadata["one"], "seven"); assert_eq!(grandchild_opctx.metadata["five"], "six"); - db.cleanup().await.unwrap(); logctx.cleanup_successful(); } } diff --git a/nexus/auth/src/lib.rs b/nexus/auth/src/lib.rs new file mode 100644 index 0000000000..0f0b9064b2 --- /dev/null +++ b/nexus/auth/src/lib.rs @@ -0,0 +1,11 @@ +pub mod authn; +pub mod authz; +pub mod context; +pub mod storage; + +#[macro_use] +extern crate newtype_derive; + +#[allow(unused_imports)] +#[macro_use] +extern crate slog; diff --git a/nexus/auth/src/storage.rs b/nexus/auth/src/storage.rs new file mode 100644 index 0000000000..c1d2fcedd8 --- /dev/null +++ b/nexus/auth/src/storage.rs @@ -0,0 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Describes the dependency from the auth system on the datastore. +//! +//! Auth and storage are intertwined, but by isolating the interface from +//! auth on the database, we can avoid a circular dependency. + +use crate::context::OpContext; +use nexus_db_model::IdentityType; +use nexus_db_model::RoleAssignment; +use omicron_common::api::external::Error; +use omicron_common::api::external::ResourceType; +use uuid::Uuid; + +#[async_trait::async_trait] +pub trait Storage: Send + Sync { + async fn role_asgn_list_for( + &self, + opctx: &OpContext, + identity_type: IdentityType, + identity_id: Uuid, + resource_type: ResourceType, + resource_id: Uuid, + ) -> Result, Error>; +} diff --git a/nexus/db-fixed-data/Cargo.toml b/nexus/db-fixed-data/Cargo.toml new file mode 100644 index 0000000000..486df15686 --- /dev/null +++ b/nexus/db-fixed-data/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "nexus-db-fixed-data" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" +description = "Hard-coded database data, including defaults and built-ins" + +[lints] +workspace = true + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +once_cell.workspace = true +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" +strum.workspace = true +uuid.workspace = true + +nexus-db-model.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +omicron-workspace-hack.workspace = true + diff --git a/nexus/db-fixed-data/build.rs b/nexus/db-fixed-data/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/nexus/db-fixed-data/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/nexus/db-queries/src/db/fixed_data/allow_list.rs b/nexus/db-fixed-data/src/allow_list.rs similarity index 100% rename from nexus/db-queries/src/db/fixed_data/allow_list.rs rename to nexus/db-fixed-data/src/allow_list.rs diff --git a/nexus/db-queries/src/db/fixed_data/mod.rs b/nexus/db-fixed-data/src/lib.rs similarity index 100% rename from nexus/db-queries/src/db/fixed_data/mod.rs rename to nexus/db-fixed-data/src/lib.rs diff --git a/nexus/db-queries/src/db/fixed_data/project.rs b/nexus/db-fixed-data/src/project.rs similarity index 79% rename from nexus/db-queries/src/db/fixed_data/project.rs rename to nexus/db-fixed-data/src/project.rs index e240900e0c..6b9f005916 100644 --- a/nexus/db-queries/src/db/fixed_data/project.rs +++ b/nexus/db-fixed-data/src/project.rs @@ -2,12 +2,14 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::db; -use crate::db::datastore::SERVICES_DB_NAME; +use nexus_db_model as model; use nexus_types::external_api::params; use omicron_common::api::external::IdentityMetadataCreateParams; use once_cell::sync::Lazy; +/// The name of the built-in Project and VPC for Oxide services. +pub const SERVICES_DB_NAME: &str = "oxide-services"; + /// UUID of built-in project for internal services on the rack. pub static SERVICES_PROJECT_ID: Lazy = Lazy::new(|| { "001de000-4401-4000-8000-000000000000" @@ -16,8 +18,8 @@ pub static SERVICES_PROJECT_ID: Lazy = Lazy::new(|| { }); /// Built-in Project for internal services on the rack. -pub static SERVICES_PROJECT: Lazy = Lazy::new(|| { - db::model::Project::new_with_id( +pub static SERVICES_PROJECT: Lazy = Lazy::new(|| { + model::Project::new_with_id( *SERVICES_PROJECT_ID, *super::silo::INTERNAL_SILO_ID, params::ProjectCreate { diff --git a/nexus/db-queries/src/db/fixed_data/role_assignment.rs b/nexus/db-fixed-data/src/role_assignment.rs similarity index 97% rename from nexus/db-queries/src/db/fixed_data/role_assignment.rs rename to nexus/db-fixed-data/src/role_assignment.rs index d6c95d47b6..25b26786f8 100644 --- a/nexus/db-queries/src/db/fixed_data/role_assignment.rs +++ b/nexus/db-fixed-data/src/role_assignment.rs @@ -6,8 +6,8 @@ use super::role_builtin; use super::user_builtin; use super::FLEET_ID; -use crate::db::model::IdentityType; -use crate::db::model::RoleAssignment; +use nexus_db_model::IdentityType; +use nexus_db_model::RoleAssignment; use once_cell::sync::Lazy; pub static BUILTIN_ROLE_ASSIGNMENTS: Lazy> = diff --git a/nexus/db-queries/src/db/fixed_data/role_builtin.rs b/nexus/db-fixed-data/src/role_builtin.rs similarity index 99% rename from nexus/db-queries/src/db/fixed_data/role_builtin.rs rename to nexus/db-fixed-data/src/role_builtin.rs index f58077fc3f..c617874e98 100644 --- a/nexus/db-queries/src/db/fixed_data/role_builtin.rs +++ b/nexus/db-fixed-data/src/role_builtin.rs @@ -83,7 +83,7 @@ pub static BUILTIN_ROLES: Lazy> = Lazy::new(|| { #[cfg(test)] mod test { use super::BUILTIN_ROLES; - use crate::db::model::DatabaseString; + use nexus_db_model::DatabaseString; use nexus_types::external_api::shared::{FleetRole, ProjectRole, SiloRole}; use omicron_common::api::external::ResourceType; use strum::IntoEnumIterator; diff --git a/nexus/db-queries/src/db/fixed_data/silo.rs b/nexus/db-fixed-data/src/silo.rs similarity index 91% rename from nexus/db-queries/src/db/fixed_data/silo.rs rename to nexus/db-fixed-data/src/silo.rs index dc5f19fc2f..ebc6776923 100644 --- a/nexus/db-queries/src/db/fixed_data/silo.rs +++ b/nexus/db-fixed-data/src/silo.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::db; +use nexus_db_model as model; use nexus_types::external_api::{params, shared}; use omicron_common::api::external::IdentityMetadataCreateParams; use once_cell::sync::Lazy; @@ -17,8 +17,8 @@ pub static DEFAULT_SILO_ID: Lazy = Lazy::new(|| { /// /// This was historically used for demos and the unit tests. The plan is to /// remove it per omicron#2305. -pub static DEFAULT_SILO: Lazy = Lazy::new(|| { - db::model::Silo::new_with_id( +pub static DEFAULT_SILO: Lazy = Lazy::new(|| { + model::Silo::new_with_id( *DEFAULT_SILO_ID, params::SiloCreate { identity: IdentityMetadataCreateParams { @@ -47,8 +47,8 @@ pub static INTERNAL_SILO_ID: Lazy = Lazy::new(|| { /// Built-in Silo to house internal resources. It contains no users and /// can't be logged into. -pub static INTERNAL_SILO: Lazy = Lazy::new(|| { - db::model::Silo::new_with_id( +pub static INTERNAL_SILO: Lazy = Lazy::new(|| { + model::Silo::new_with_id( *INTERNAL_SILO_ID, params::SiloCreate { identity: IdentityMetadataCreateParams { diff --git a/nexus/db-queries/src/db/fixed_data/silo_user.rs b/nexus/db-fixed-data/src/silo_user.rs similarity index 66% rename from nexus/db-queries/src/db/fixed_data/silo_user.rs rename to nexus/db-fixed-data/src/silo_user.rs index eb49093152..defaa9bd52 100644 --- a/nexus/db-queries/src/db/fixed_data/silo_user.rs +++ b/nexus/db-fixed-data/src/silo_user.rs @@ -4,8 +4,8 @@ //! Built-in Silo Users use super::role_builtin; -use crate::db; -use crate::db::identity::Asset; +use nexus_db_model as model; +use nexus_types::identity::Asset; use once_cell::sync::Lazy; /// Test user that's granted all privileges, used for automated testing and @@ -13,9 +13,9 @@ use once_cell::sync::Lazy; // TODO-security Once we have a way to bootstrap the initial Silo with the // initial privileged user, this user should be created in the test suite, // not automatically at Nexus startup. See omicron#2305. -pub static USER_TEST_PRIVILEGED: Lazy = Lazy::new(|| { - db::model::SiloUser::new( - *db::fixed_data::silo::DEFAULT_SILO_ID, +pub static USER_TEST_PRIVILEGED: Lazy = Lazy::new(|| { + model::SiloUser::new( + *crate::silo::DEFAULT_SILO_ID, // "4007" looks a bit like "root". "001de000-05e4-4000-8000-000000004007".parse().unwrap(), "privileged".into(), @@ -23,23 +23,23 @@ pub static USER_TEST_PRIVILEGED: Lazy = Lazy::new(|| { }); /// Role assignments needed for the privileged user -pub static ROLE_ASSIGNMENTS_PRIVILEGED: Lazy> = +pub static ROLE_ASSIGNMENTS_PRIVILEGED: Lazy> = Lazy::new(|| { vec![ // The "test-privileged" user gets the "admin" role on the sole // Fleet as well as the default Silo. - db::model::RoleAssignment::new( - db::model::IdentityType::SiloUser, + model::RoleAssignment::new( + model::IdentityType::SiloUser, USER_TEST_PRIVILEGED.id(), role_builtin::FLEET_ADMIN.resource_type, - *db::fixed_data::FLEET_ID, + *crate::FLEET_ID, role_builtin::FLEET_ADMIN.role_name, ), - db::model::RoleAssignment::new( - db::model::IdentityType::SiloUser, + model::RoleAssignment::new( + model::IdentityType::SiloUser, USER_TEST_PRIVILEGED.id(), role_builtin::SILO_ADMIN.resource_type, - *db::fixed_data::silo::DEFAULT_SILO_ID, + *crate::silo::DEFAULT_SILO_ID, role_builtin::SILO_ADMIN.role_name, ), ] @@ -49,22 +49,21 @@ pub static ROLE_ASSIGNMENTS_PRIVILEGED: Lazy> = // TODO-security Once we have a way to bootstrap the initial Silo with the // initial privileged user, this user should be created in the test suite, // not automatically at Nexus startup. See omicron#2305. -pub static USER_TEST_UNPRIVILEGED: Lazy = - Lazy::new(|| { - db::model::SiloUser::new( - *db::fixed_data::silo::DEFAULT_SILO_ID, - // 60001 is the decimal uid for "nobody" on Helios. - "001de000-05e4-4000-8000-000000060001".parse().unwrap(), - "unprivileged".into(), - ) - }); +pub static USER_TEST_UNPRIVILEGED: Lazy = Lazy::new(|| { + model::SiloUser::new( + *crate::silo::DEFAULT_SILO_ID, + // 60001 is the decimal uid for "nobody" on Helios. + "001de000-05e4-4000-8000-000000060001".parse().unwrap(), + "unprivileged".into(), + ) +}); #[cfg(test)] mod test { use super::super::assert_valid_uuid; use super::USER_TEST_PRIVILEGED; use super::USER_TEST_UNPRIVILEGED; - use crate::db::identity::Asset; + use nexus_types::identity::Asset; #[test] fn test_silo_user_ids_are_valid() { diff --git a/nexus/db-queries/src/db/fixed_data/user_builtin.rs b/nexus/db-fixed-data/src/user_builtin.rs similarity index 100% rename from nexus/db-queries/src/db/fixed_data/user_builtin.rs rename to nexus/db-fixed-data/src/user_builtin.rs diff --git a/nexus/db-queries/src/db/fixed_data/vpc.rs b/nexus/db-fixed-data/src/vpc.rs similarity index 91% rename from nexus/db-queries/src/db/fixed_data/vpc.rs rename to nexus/db-fixed-data/src/vpc.rs index c71b655ddc..25628a83b5 100644 --- a/nexus/db-queries/src/db/fixed_data/vpc.rs +++ b/nexus/db-fixed-data/src/vpc.rs @@ -2,8 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::db; -use crate::db::datastore::SERVICES_DB_NAME; +use crate::project::SERVICES_DB_NAME; +use nexus_db_model as model; use nexus_types::external_api::params; use omicron_common::address::SERVICE_VPC_IPV6_PREFIX; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -31,8 +31,8 @@ pub static SERVICES_VPC_DEFAULT_ROUTE_ID: Lazy = Lazy::new(|| { }); /// Built-in VPC for internal services on the rack. -pub static SERVICES_VPC: Lazy = Lazy::new(|| { - db::model::IncompleteVpc::new( +pub static SERVICES_VPC: Lazy = Lazy::new(|| { + model::IncompleteVpc::new( *SERVICES_VPC_ID, *super::project::SERVICES_PROJECT_ID, *SERVICES_VPC_ROUTER_ID, diff --git a/nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs b/nexus/db-fixed-data/src/vpc_firewall_rule.rs similarity index 100% rename from nexus/db-queries/src/db/fixed_data/vpc_firewall_rule.rs rename to nexus/db-fixed-data/src/vpc_firewall_rule.rs diff --git a/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs b/nexus/db-fixed-data/src/vpc_subnet.rs similarity index 98% rename from nexus/db-queries/src/db/fixed_data/vpc_subnet.rs rename to nexus/db-fixed-data/src/vpc_subnet.rs index c42d4121c9..622799b000 100644 --- a/nexus/db-queries/src/db/fixed_data/vpc_subnet.rs +++ b/nexus/db-fixed-data/src/vpc_subnet.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::db::model::VpcSubnet; +use nexus_db_model::VpcSubnet; use omicron_common::address::{ DNS_OPTE_IPV4_SUBNET, DNS_OPTE_IPV6_SUBNET, NEXUS_OPTE_IPV4_SUBNET, NEXUS_OPTE_IPV6_SUBNET, NTP_OPTE_IPV4_SUBNET, NTP_OPTE_IPV6_SUBNET, diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 135f2fcdf7..cb7061f4ce 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -14,37 +14,27 @@ omicron-rpaths.workspace = true anyhow.workspace = true async-bb8-diesel.workspace = true async-trait.workspace = true -base64.workspace = true bb8.workspace = true camino.workspace = true chrono.workspace = true const_format.workspace = true -cookie.workspace = true diesel.workspace = true diesel-dtrace.workspace = true dropshot.workspace = true futures.workspace = true -headers.workspace = true -http.workspace = true -hyper.workspace = true ipnetwork.workspace = true macaddr.workspace = true -newtype_derive.workspace = true once_cell.workspace = true -openssl.workspace = true -oso.workspace = true oxnet.workspace = true paste.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" rand.workspace = true ref-cast.workspace = true -samael.workspace = true schemars.workspace = true semver.workspace = true serde.workspace = true serde_json.workspace = true -serde_urlencoded.workspace = true serde_with.workspace = true sled-agent-client.workspace = true slog.workspace = true @@ -58,9 +48,10 @@ tokio = { workspace = true, features = ["full"] } uuid.workspace = true usdt.workspace = true -authz-macros.workspace = true db-macros.workspace = true +nexus-auth.workspace = true nexus-config.workspace = true +nexus-db-fixed-data.workspace = true nexus-db-model.workspace = true nexus-types.workspace = true omicron-common.workspace = true @@ -91,6 +82,7 @@ nexus-test-utils.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true openapiv3.workspace = true +oso.workspace = true pem.workspace = true petgraph.workspace = true predicates.workspace = true diff --git a/nexus/db-queries/src/db/datastore/allow_list.rs b/nexus/db-queries/src/db/datastore/allow_list.rs index 111ccad08f..7c1643451f 100644 --- a/nexus/db-queries/src/db/datastore/allow_list.rs +++ b/nexus/db-queries/src/db/datastore/allow_list.rs @@ -8,12 +8,12 @@ use crate::authz; use crate::context::OpContext; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::fixed_data::allow_list::USER_FACING_SERVICES_ALLOW_LIST_ID; use crate::db::DbConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::ExpressionMethods; use diesel::QueryDsl; use diesel::SelectableHelper; +use nexus_db_fixed_data::allow_list::USER_FACING_SERVICES_ALLOW_LIST_ID; use nexus_db_model::schema::allow_list; use nexus_db_model::AllowList; use omicron_common::api::external::AllowedSourceIps; diff --git a/nexus/db-queries/src/db/datastore/auth.rs b/nexus/db-queries/src/db/datastore/auth.rs new file mode 100644 index 0000000000..3b1d1d18e3 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/auth.rs @@ -0,0 +1,81 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implements the [Storage] interface for [nexus_auth] integration. + +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; + +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; +use nexus_auth::context::OpContext; +use nexus_auth::storage::Storage; +use nexus_db_model::IdentityType; +use nexus_db_model::RoleAssignment; +use omicron_common::api::external::Error; +use omicron_common::api::external::ResourceType; +use uuid::Uuid; + +#[async_trait::async_trait] +impl Storage for super::DataStore { + /// Return the built-in roles that the given built-in user has for the given + /// resource + async fn role_asgn_list_for( + &self, + opctx: &OpContext, + identity_type: IdentityType, + identity_id: Uuid, + resource_type: ResourceType, + resource_id: Uuid, + ) -> Result, Error> { + use db::schema::role_assignment::dsl as role_dsl; + use db::schema::silo_group_membership::dsl as group_dsl; + + // There is no resource-specific authorization check because all + // authenticated users need to be able to list their own roles -- + // otherwise we can't do any authorization checks. + // TODO-security rethink this -- how do we know the user is looking up + // their own roles? Maybe this should use an internal authz context. + + // TODO-scalability TODO-security This needs to be paginated. It's not + // exposed via an external API right now but someone could still put us + // into some hurt by assigning loads of roles to someone and having that + // person attempt to access anything. + + let direct_roles_query = role_dsl::role_assignment + .filter(role_dsl::identity_type.eq(identity_type.clone())) + .filter(role_dsl::identity_id.eq(identity_id)) + .filter(role_dsl::resource_type.eq(resource_type.to_string())) + .filter(role_dsl::resource_id.eq(resource_id)) + .select(RoleAssignment::as_select()); + + let roles_from_groups_query = role_dsl::role_assignment + .filter(role_dsl::identity_type.eq(IdentityType::SiloGroup)) + .filter( + role_dsl::identity_id.eq_any( + group_dsl::silo_group_membership + .filter(group_dsl::silo_user_id.eq(identity_id)) + .select(group_dsl::silo_group_id), + ), + ) + .filter(role_dsl::resource_type.eq(resource_type.to_string())) + .filter(role_dsl::resource_id.eq(resource_id)) + .select(RoleAssignment::as_select()); + + let conn = self.pool_connection_authorized(opctx).await?; + if identity_type == IdentityType::SiloUser { + direct_roles_query + .union(roles_from_groups_query) + .load_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } else { + direct_roles_query + .load_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + } +} diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs index 177cf673e7..e7a975fa69 100644 --- a/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs +++ b/nexus/db-queries/src/db/datastore/cockroachdb_settings.rs @@ -147,8 +147,10 @@ mod test { let (_, datastore) = crate::db::datastore::test_utils::datastore_test(&logctx, &db) .await; - let opctx = - OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); + let opctx = OpContext::for_tests( + logctx.log.new(o!()), + Arc::clone(&datastore) as Arc, + ); let settings = datastore.cockroachdb_settings(&opctx).await.unwrap(); // With a fresh cluster, this is the expected state diff --git a/nexus/db-queries/src/db/datastore/identity_provider.rs b/nexus/db-queries/src/db/datastore/identity_provider.rs index cee577acd6..e7ab9bde16 100644 --- a/nexus/db-queries/src/db/datastore/identity_provider.rs +++ b/nexus/db-queries/src/db/datastore/identity_provider.rs @@ -11,18 +11,66 @@ use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Resource; +use crate::db::lookup::LookupPath; +use crate::db::model; use crate::db::model::IdentityProvider; use crate::db::model::Name; use crate::db::pagination::paginated; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use nexus_auth::authn::silos::IdentityProviderType; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; use omicron_common::api::external::ResourceType; use ref_cast::RefCast; impl DataStore { + pub async fn identity_provider_lookup( + &self, + opctx: &OpContext, + silo_name: &model::Name, + provider_name: &model::Name, + ) -> LookupResult<(authz::Silo, model::Silo, IdentityProviderType)> { + let (authz_silo, db_silo) = + LookupPath::new(opctx, self).silo_name(silo_name).fetch().await?; + + let (.., identity_provider) = LookupPath::new(opctx, self) + .silo_name(silo_name) + .identity_provider_name(provider_name) + .fetch() + .await?; + + match identity_provider.provider_type { + model::IdentityProviderType::Saml => { + let (.., saml_identity_provider) = LookupPath::new(opctx, self) + .silo_name(silo_name) + .saml_identity_provider_name(provider_name) + .fetch() + .await?; + + let saml_identity_provider = IdentityProviderType::Saml( + saml_identity_provider.try_into() + .map_err(|e: anyhow::Error| + // If an error is encountered converting from the + // model to the authn type here, this is a server + // error: it was validated before it went into the + // DB. + omicron_common::api::external::Error::internal_error( + &format!( + "saml_identity_provider.try_into() failed! {}", + &e.to_string() + ) + ) + )? + ); + + Ok((authz_silo, db_silo, saml_identity_provider)) + } + } + } + pub async fn identity_provider_list( &self, opctx: &OpContext, diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 60fd5c9dc3..3b655e5bb9 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -783,7 +783,6 @@ impl DataStore { mod tests { use super::*; use crate::db::datastore::test_utils::datastore_test; - use crate::db::fixed_data; use crate::db::lookup::LookupPath; use nexus_db_model::Project; use nexus_test_utils::db::test_setup_database; @@ -796,7 +795,7 @@ mod tests { datastore: &DataStore, opctx: &OpContext, ) -> authz::Instance { - let silo_id = *fixed_data::silo::DEFAULT_SILO_ID; + let silo_id = *nexus_db_fixed_data::silo::DEFAULT_SILO_ID; let project_id = Uuid::new_v4(); let instance_id = Uuid::new_v4(); diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index b90f81affb..9ec3575860 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -49,6 +49,7 @@ use uuid::Uuid; mod address_lot; mod allow_list; +mod auth; mod bfd; mod bgp; mod bootstore; @@ -130,9 +131,6 @@ pub const REGION_REDUNDANCY_THRESHOLD: usize = 3; /// The name of the built-in IP pool for Oxide services. pub const SERVICE_IP_POOL_NAME: &str = "oxide-service-pool"; -/// The name of the built-in Project and VPC for Oxide services. -pub const SERVICES_DB_NAME: &str = "oxide-services"; - /// "limit" to be used in SQL queries that paginate through large result sets /// /// This value is chosen to be small enough to avoid any queries being too @@ -385,8 +383,6 @@ mod test { IneligibleSledKind, IneligibleSleds, }; use crate::db::explain::ExplainableAsync; - use crate::db::fixed_data::silo::DEFAULT_SILO; - use crate::db::fixed_data::silo::DEFAULT_SILO_ID; use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::{ @@ -400,6 +396,8 @@ mod test { use futures::stream; use futures::StreamExt; use nexus_config::RegionAllocationStrategy; + use nexus_db_fixed_data::silo::DEFAULT_SILO; + use nexus_db_fixed_data::silo::DEFAULT_SILO_ID; use nexus_db_model::IpAttachState; use nexus_db_model::{to_db_typed_uuid, Generation}; use nexus_test_utils::db::test_setup_database; @@ -485,7 +483,7 @@ mod test { logctx.log.new(o!("component" => "TestExternalAuthn")), Arc::new(authz::Authz::new(&logctx.log)), authn::Context::external_authn(), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ); let token = "a_token".to_string(); @@ -587,7 +585,7 @@ mod test { *DEFAULT_SILO_ID, SiloAuthnPolicy::try_from(&*DEFAULT_SILO).unwrap(), ), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ); let delete = datastore .session_hard_delete(&silo_user_opctx, &authz_session) @@ -1624,8 +1622,10 @@ mod test { let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); let datastore = Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); - let opctx = - OpContext::for_tests(logctx.log.new(o!()), datastore.clone()); + let opctx = OpContext::for_tests( + logctx.log.new(o!()), + Arc::clone(&datastore) as Arc, + ); let rack_id = Uuid::new_v4(); let addr1 = "[fd00:1de::1]:12345".parse().unwrap(); diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index af3f832e35..3ea2945b2f 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -854,8 +854,8 @@ impl DataStore { mod tests { use super::*; use crate::db::datastore::test_utils::datastore_test; - use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; + use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use nexus_test_utils::db::test_setup_database; use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; use omicron_test_utils::dev; diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs index 08647b421e..42ccca4ed6 100644 --- a/nexus/db-queries/src/db/datastore/project.rs +++ b/nexus/db-queries/src/db/datastore/project.rs @@ -13,8 +13,6 @@ use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::fixed_data::project::SERVICES_PROJECT; -use crate::db::fixed_data::silo::INTERNAL_SILO_ID; use crate::db::identity::Resource; use crate::db::model::CollectionTypeProvisioned; use crate::db::model::Name; @@ -27,6 +25,8 @@ use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_fixed_data::project::SERVICES_PROJECT; +use nexus_db_fixed_data::silo::INTERNAL_SILO_ID; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; diff --git a/nexus/db-queries/src/db/datastore/pub_test_utils.rs b/nexus/db-queries/src/db/datastore/pub_test_utils.rs index 5259a03656..93a172bd15 100644 --- a/nexus/db-queries/src/db/datastore/pub_test_utils.rs +++ b/nexus/db-queries/src/db/datastore/pub_test_utils.rs @@ -39,7 +39,7 @@ pub async fn datastore_test( logctx.log.new(o!()), Arc::new(authz::Authz::new(&logctx.log)), authn::Context::internal_db_init(), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ); // TODO: Can we just call "Populate" instead of doing this? @@ -59,8 +59,10 @@ pub async fn datastore_test( // Create an OpContext with the credentials of "test-privileged" for general // testing. - let opctx = - OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); + let opctx = OpContext::for_tests( + logctx.log.new(o!()), + Arc::clone(&datastore) as Arc, + ); (opctx, datastore) } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index d836185d87..4af6bf7263 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -16,10 +16,6 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::retryable; use crate::db::error::ErrorHandler; use crate::db::error::MaybeRetryable::*; -use crate::db::fixed_data::silo::INTERNAL_SILO_ID; -use crate::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; -use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; -use crate::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET; use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::Dataset; @@ -37,6 +33,10 @@ use diesel::prelude::*; use diesel::result::Error as DieselError; use diesel::upsert::excluded; use ipnetwork::IpNetwork; +use nexus_db_fixed_data::silo::INTERNAL_SILO_ID; +use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET; +use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; +use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET; use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::InitialDnsGroup; use nexus_db_model::PasswordHashString; diff --git a/nexus/db-queries/src/db/datastore/role.rs b/nexus/db-queries/src/db/datastore/role.rs index 3a57ffc44c..b91597ad1d 100644 --- a/nexus/db-queries/src/db/datastore/role.rs +++ b/nexus/db-queries/src/db/datastore/role.rs @@ -14,8 +14,6 @@ use crate::db::datastore::RunnableQueryNoReturn; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; -use crate::db::fixed_data::role_assignment::BUILTIN_ROLE_ASSIGNMENTS; -use crate::db::fixed_data::role_builtin::BUILTIN_ROLES; use crate::db::model::DatabaseString; use crate::db::model::IdentityType; use crate::db::model::RoleAssignment; @@ -25,13 +23,13 @@ use crate::db::pool::DbConnection; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use nexus_db_fixed_data::role_assignment::BUILTIN_ROLE_ASSIGNMENTS; +use nexus_db_fixed_data::role_builtin::BUILTIN_ROLES; use nexus_types::external_api::shared; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; -use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; -use uuid::Uuid; impl DataStore { /// List built-in roles @@ -117,65 +115,6 @@ impl DataStore { Ok(()) } - /// Return the built-in roles that the given built-in user has for the given - /// resource - pub async fn role_asgn_list_for( - &self, - opctx: &OpContext, - identity_type: IdentityType, - identity_id: Uuid, - resource_type: ResourceType, - resource_id: Uuid, - ) -> Result, Error> { - use db::schema::role_assignment::dsl as role_dsl; - use db::schema::silo_group_membership::dsl as group_dsl; - - // There is no resource-specific authorization check because all - // authenticated users need to be able to list their own roles -- - // otherwise we can't do any authorization checks. - // TODO-security rethink this -- how do we know the user is looking up - // their own roles? Maybe this should use an internal authz context. - - // TODO-scalability TODO-security This needs to be paginated. It's not - // exposed via an external API right now but someone could still put us - // into some hurt by assigning loads of roles to someone and having that - // person attempt to access anything. - - let direct_roles_query = role_dsl::role_assignment - .filter(role_dsl::identity_type.eq(identity_type.clone())) - .filter(role_dsl::identity_id.eq(identity_id)) - .filter(role_dsl::resource_type.eq(resource_type.to_string())) - .filter(role_dsl::resource_id.eq(resource_id)) - .select(RoleAssignment::as_select()); - - let roles_from_groups_query = role_dsl::role_assignment - .filter(role_dsl::identity_type.eq(IdentityType::SiloGroup)) - .filter( - role_dsl::identity_id.eq_any( - group_dsl::silo_group_membership - .filter(group_dsl::silo_user_id.eq(identity_id)) - .select(group_dsl::silo_group_id), - ), - ) - .filter(role_dsl::resource_type.eq(resource_type.to_string())) - .filter(role_dsl::resource_id.eq(resource_id)) - .select(RoleAssignment::as_select()); - - let conn = self.pool_connection_authorized(opctx).await?; - if identity_type == IdentityType::SiloUser { - direct_roles_query - .union(roles_from_groups_query) - .load_async::(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } else { - direct_roles_query - .load_async::(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - } - /// Fetches all of the externally-visible role assignments for the specified /// resource /// diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index 0fd858b900..2b7afa3270 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -15,7 +15,6 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::retryable; use crate::db::error::ErrorHandler; use crate::db::error::TransactionError; -use crate::db::fixed_data::silo::{DEFAULT_SILO, INTERNAL_SILO}; use crate::db::identity::Resource; use crate::db::model::CollectionTypeProvisioned; use crate::db::model::IpPoolResourceType; @@ -29,6 +28,7 @@ use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_fixed_data::silo::{DEFAULT_SILO, INTERNAL_SILO}; use nexus_db_model::Certificate; use nexus_db_model::ServiceKind; use nexus_db_model::SiloQuotas; diff --git a/nexus/db-queries/src/db/datastore/silo_user.rs b/nexus/db-queries/src/db/datastore/silo_user.rs index 59cb19a609..2825e2a310 100644 --- a/nexus/db-queries/src/db/datastore/silo_user.rs +++ b/nexus/db-queries/src/db/datastore/silo_user.rs @@ -429,7 +429,9 @@ impl DataStore { use db::schema::role_assignment::dsl; debug!(opctx.log, "attempting to create silo user role assignments"); let count = diesel::insert_into(dsl::role_assignment) - .values(&*db::fixed_data::silo_user::ROLE_ASSIGNMENTS_PRIVILEGED) + .values( + &*nexus_db_fixed_data::silo_user::ROLE_ASSIGNMENTS_PRIVILEGED, + ) .on_conflict(( dsl::identity_type, dsl::identity_id, diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index 3630231b63..9738f05ff6 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -312,7 +312,7 @@ impl DataStore { &self, opctx: &OpContext, ) -> Result<(), Error> { - let id = *db::fixed_data::FLEET_ID; + let id = *nexus_db_fixed_data::FLEET_ID; self.virtual_provisioning_collection_create( opctx, db::model::VirtualProvisioningCollection::new( @@ -331,7 +331,6 @@ mod test { use super::*; use crate::db::datastore::test_utils::datastore_test; - use crate::db::fixed_data; use crate::db::lookup::LookupPath; use nexus_db_model::Instance; use nexus_db_model::Project; @@ -384,8 +383,8 @@ mod test { datastore: &DataStore, opctx: &OpContext, ) -> TestData { - let fleet_id = *fixed_data::FLEET_ID; - let silo_id = *fixed_data::silo::DEFAULT_SILO_ID; + let fleet_id = *nexus_db_fixed_data::FLEET_ID; + let silo_id = *nexus_db_fixed_data::silo::DEFAULT_SILO_ID; let project_id = Uuid::new_v4(); let (authz_project, _project) = datastore diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 98af47f0e2..5322e20dbf 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -12,7 +12,6 @@ use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::fixed_data::vpc::SERVICES_VPC_ID; use crate::db::identity::Resource; use crate::db::model::ApplyBlueprintZoneFilterExt; use crate::db::model::ApplySledFilterExt; @@ -45,6 +44,7 @@ use diesel::prelude::*; use diesel::result::DatabaseErrorKind; use diesel::result::Error as DieselError; use ipnetwork::IpNetwork; +use nexus_db_fixed_data::vpc::SERVICES_VPC_ID; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::SledFilter; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -72,9 +72,9 @@ impl DataStore { &self, opctx: &OpContext, ) -> Result<(), Error> { - use crate::db::fixed_data::project::SERVICES_PROJECT_ID; - use crate::db::fixed_data::vpc::SERVICES_VPC; - use crate::db::fixed_data::vpc::SERVICES_VPC_DEFAULT_ROUTE_ID; + use nexus_db_fixed_data::project::SERVICES_PROJECT_ID; + use nexus_db_fixed_data::vpc::SERVICES_VPC; + use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_ROUTE_ID; opctx.authorize(authz::Action::Modify, &authz::DATABASE).await?; @@ -175,8 +175,8 @@ impl DataStore { &self, opctx: &OpContext, ) -> Result<(), Error> { - use db::fixed_data::vpc_firewall_rule::DNS_VPC_FW_RULE; - use db::fixed_data::vpc_firewall_rule::NEXUS_VPC_FW_RULE; + use nexus_db_fixed_data::vpc_firewall_rule::DNS_VPC_FW_RULE; + use nexus_db_fixed_data::vpc_firewall_rule::NEXUS_VPC_FW_RULE; debug!(opctx.log, "attempting to create built-in VPC firewall rules"); @@ -229,9 +229,9 @@ impl DataStore { &self, opctx: &OpContext, ) -> Result<(), Error> { - use crate::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; - use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; - use crate::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET; debug!(opctx.log, "attempting to create built-in VPC Subnets"); @@ -1230,9 +1230,9 @@ mod tests { use crate::db::datastore::test::sled_system_hardware_for_test; use crate::db::datastore::test_utils::datastore_test; use crate::db::datastore::test_utils::IneligibleSleds; - use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use crate::db::model::Project; use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; + use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::SledUpdate; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index 487a68b517..0999694c54 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -924,8 +924,10 @@ mod test { let (_, datastore) = crate::db::datastore::test_utils::datastore_test(&logctx, &db) .await; - let opctx = - OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); + let opctx = OpContext::for_tests( + logctx.log.new(o!()), + Arc::clone(&datastore) as Arc, + ); let project_name: Name = Name("my-project".parse().unwrap()); let instance_name: Name = Name("my-instance".parse().unwrap()); diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index 7ce6890a4d..7bd1bbec61 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -17,7 +17,6 @@ mod cte_utils; pub mod datastore; pub(crate) mod error; mod explain; -pub mod fixed_data; pub mod lookup; mod on_conflict_ext; // Public for doctests. @@ -42,6 +41,7 @@ pub use pool_connection::DISALLOW_FULL_TABLE_SCAN_SQL; #[cfg(test)] mod test_utils; +pub use nexus_db_fixed_data as fixed_data; pub use nexus_db_model as model; use nexus_db_model::saga_types; pub use nexus_db_model::schema; diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index 895fee2092..b3c1a569b0 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -122,7 +122,7 @@ WITH UNION (SELECT ").param().sql(" AS id) ),") .bind::(project_id) - .bind::(*crate::db::fixed_data::FLEET_ID) + .bind::(*nexus_db_fixed_data::FLEET_ID) .sql(" quotas AS ( diff --git a/nexus/db-queries/src/db/saga_recovery.rs b/nexus/db-queries/src/db/saga_recovery.rs index 55cda03c3c..25f8ff788d 100644 --- a/nexus/db-queries/src/db/saga_recovery.rs +++ b/nexus/db-queries/src/db/saga_recovery.rs @@ -447,7 +447,10 @@ mod test { let (storage, sec_client, uctx) = create_storage_sec_and_context(&log, db_datastore.clone(), sec_id); let sec_log = log.new(o!("component" => "SEC")); - let opctx = OpContext::for_tests(log, Arc::clone(&db_datastore)); + let opctx = OpContext::for_tests( + log, + Arc::clone(&db_datastore) as Arc, + ); // Create and start a saga. // @@ -520,7 +523,10 @@ mod test { let (storage, sec_client, uctx) = create_storage_sec_and_context(&log, db_datastore.clone(), sec_id); let sec_log = log.new(o!("component" => "SEC")); - let opctx = OpContext::for_tests(log, Arc::clone(&db_datastore)); + let opctx = OpContext::for_tests( + log, + Arc::clone(&db_datastore) as Arc, + ); // Create and start a saga, which we expect to complete successfully. let saga_id = SagaId(Uuid::new_v4()); diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs index 60177990e8..003310f920 100644 --- a/nexus/db-queries/src/lib.rs +++ b/nexus/db-queries/src/lib.rs @@ -4,17 +4,19 @@ //! Facilities for working with the Omicron database -pub mod authn; -pub mod authz; -pub mod context; +pub use nexus_auth::authn; +pub use nexus_auth::authz; +pub use nexus_auth::context; + pub mod db; pub mod provisioning; pub mod transaction_retry; +#[cfg(test)] +mod policy_test; + #[macro_use] extern crate slog; -#[macro_use] -extern crate newtype_derive; #[cfg(test)] #[macro_use] extern crate diesel; diff --git a/nexus/db-queries/src/authz/policy_test/coverage.rs b/nexus/db-queries/src/policy_test/coverage.rs similarity index 97% rename from nexus/db-queries/src/authz/policy_test/coverage.rs rename to nexus/db-queries/src/policy_test/coverage.rs index 021c9ef119..08235332ff 100644 --- a/nexus/db-queries/src/authz/policy_test/coverage.rs +++ b/nexus/db-queries/src/policy_test/coverage.rs @@ -2,8 +2,9 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::authz; -use crate::authz::AuthorizedResource; +use nexus_auth::authz; +use nexus_auth::authz::AuthorizedResource; +use slog::{debug, error, o, warn}; use std::collections::BTreeSet; /// Helper for identifying authz resources not covered by the IAM role policy diff --git a/nexus/db-queries/src/authz/policy_test/mod.rs b/nexus/db-queries/src/policy_test/mod.rs similarity index 97% rename from nexus/db-queries/src/authz/policy_test/mod.rs rename to nexus/db-queries/src/policy_test/mod.rs index b6961bcc30..395a480c47 100644 --- a/nexus/db-queries/src/authz/policy_test/mod.rs +++ b/nexus/db-queries/src/policy_test/mod.rs @@ -14,14 +14,14 @@ mod coverage; mod resource_builder; mod resources; -use crate::authn; -use crate::authn::SiloAuthnPolicy; -use crate::authz; -use crate::context::OpContext; use crate::db; -use authn::USER_TEST_PRIVILEGED; use coverage::Coverage; use futures::StreamExt; +use nexus_auth::authn; +use nexus_auth::authn::SiloAuthnPolicy; +use nexus_auth::authn::USER_TEST_PRIVILEGED; +use nexus_auth::authz; +use nexus_auth::context::OpContext; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::shared; use nexus_types::external_api::shared::FleetRole; @@ -33,6 +33,7 @@ use omicron_test_utils::dev; use resource_builder::DynAuthorizedResource; use resource_builder::ResourceBuilder; use resource_builder::ResourceSet; +use slog::{o, trace}; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::io::Cursor; @@ -117,7 +118,7 @@ async fn test_iam_roles_behavior() { main_silo_id, SiloAuthnPolicy::default(), ), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ); Arc::new((username.clone(), opctx)) @@ -140,7 +141,7 @@ async fn test_iam_roles_behavior() { user_log, Arc::clone(&authz), authn::Context::internal_unauthenticated(), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ), ))); @@ -439,7 +440,8 @@ async fn test_conferred_roles() { main_silo_id, policy.clone(), ), - Arc::clone(&datastore), + Arc::clone(&datastore) + as Arc, ); Arc::new((username.clone(), opctx)) }) diff --git a/nexus/db-queries/src/authz/policy_test/resource_builder.rs b/nexus/db-queries/src/policy_test/resource_builder.rs similarity index 74% rename from nexus/db-queries/src/authz/policy_test/resource_builder.rs rename to nexus/db-queries/src/policy_test/resource_builder.rs index 59cb283a95..3d09b2ab2d 100644 --- a/nexus/db-queries/src/authz/policy_test/resource_builder.rs +++ b/nexus/db-queries/src/policy_test/resource_builder.rs @@ -6,14 +6,14 @@ //! IAM policy test use super::coverage::Coverage; -use crate::authz; -use crate::authz::ApiResourceWithRolesType; -use crate::authz::AuthorizedResource; -use crate::context::OpContext; use crate::db; use authz::ApiResource; use futures::future::BoxFuture; use futures::FutureExt; +use nexus_auth::authz; +use nexus_auth::authz::ApiResourceWithRolesType; +use nexus_auth::authz::AuthorizedResource; +use nexus_auth::context::OpContext; use nexus_db_model::DatabaseString; use nexus_types::external_api::shared; use omicron_common::api::external::Error; @@ -192,40 +192,40 @@ pub trait DynAuthorizedResource: AuthorizedResource + std::fmt::Debug { fn resource_name(&self) -> String; } -impl DynAuthorizedResource for T -where - T: ApiResource + AuthorizedResource + oso::PolarClass + Clone, -{ - fn do_authorize<'a, 'b>( - &'a self, - opctx: &'b OpContext, - action: authz::Action, - ) -> BoxFuture<'a, Result<(), Error>> - where - 'b: 'a, - { - opctx.authorize(action, self).boxed() - } - - fn resource_name(&self) -> String { - let my_ident = match self.lookup_type() { - LookupType::ByName(name) => format!("{:?}", name), - LookupType::ById(id) => format!("id {:?}", id.to_string()), - LookupType::ByCompositeId(id) => format!("id {:?}", id), - LookupType::ByOther(_) => { - unimplemented!() +macro_rules! impl_dyn_authorized_resource_for_global { + ($t:ty) => { + impl DynAuthorizedResource for $t { + fn resource_name(&self) -> String { + String::from(stringify!($t)) } - }; - format!("{:?} {}", self.resource_type(), my_ident) - } + fn do_authorize<'a, 'b>( + &'a self, + opctx: &'b OpContext, + action: authz::Action, + ) -> BoxFuture<'a, Result<(), Error>> + where + 'b: 'a, + { + opctx.authorize(action, self).boxed() + } + } + }; } -macro_rules! impl_dyn_authorized_resource_for_global { +macro_rules! impl_dyn_authorized_resource_for_resource { ($t:ty) => { impl DynAuthorizedResource for $t { fn resource_name(&self) -> String { - String::from(stringify!($t)) + let my_ident = match self.lookup_type() { + LookupType::ByName(name) => format!("{:?}", name), + LookupType::ById(id) => format!("id {:?}", id.to_string()), + LookupType::ByCompositeId(id) => format!("id {:?}", id), + LookupType::ByOther(_) => { + unimplemented!() + } + }; + format!("{:?} {}", self.resource_type(), my_ident) } fn do_authorize<'a, 'b>( @@ -242,7 +242,39 @@ macro_rules! impl_dyn_authorized_resource_for_global { }; } -impl_dyn_authorized_resource_for_global!(authz::oso_generic::Database); +impl_dyn_authorized_resource_for_resource!(authz::AddressLot); +impl_dyn_authorized_resource_for_resource!(authz::Blueprint); +impl_dyn_authorized_resource_for_resource!(authz::Certificate); +impl_dyn_authorized_resource_for_resource!(authz::DeviceAccessToken); +impl_dyn_authorized_resource_for_resource!(authz::DeviceAuthRequest); +impl_dyn_authorized_resource_for_resource!(authz::Disk); +impl_dyn_authorized_resource_for_resource!(authz::Fleet); +impl_dyn_authorized_resource_for_resource!(authz::FloatingIp); +impl_dyn_authorized_resource_for_resource!(authz::IdentityProvider); +impl_dyn_authorized_resource_for_resource!(authz::Image); +impl_dyn_authorized_resource_for_resource!(authz::Instance); +impl_dyn_authorized_resource_for_resource!(authz::InstanceNetworkInterface); +impl_dyn_authorized_resource_for_resource!(authz::LoopbackAddress); +impl_dyn_authorized_resource_for_resource!(authz::Rack); +impl_dyn_authorized_resource_for_resource!(authz::PhysicalDisk); +impl_dyn_authorized_resource_for_resource!(authz::Project); +impl_dyn_authorized_resource_for_resource!(authz::ProjectImage); +impl_dyn_authorized_resource_for_resource!(authz::SamlIdentityProvider); +impl_dyn_authorized_resource_for_resource!(authz::Service); +impl_dyn_authorized_resource_for_resource!(authz::Silo); +impl_dyn_authorized_resource_for_resource!(authz::SiloGroup); +impl_dyn_authorized_resource_for_resource!(authz::SiloImage); +impl_dyn_authorized_resource_for_resource!(authz::SiloUser); +impl_dyn_authorized_resource_for_resource!(authz::Sled); +impl_dyn_authorized_resource_for_resource!(authz::Snapshot); +impl_dyn_authorized_resource_for_resource!(authz::SshKey); +impl_dyn_authorized_resource_for_resource!(authz::TufArtifact); +impl_dyn_authorized_resource_for_resource!(authz::TufRepo); +impl_dyn_authorized_resource_for_resource!(authz::Vpc); +impl_dyn_authorized_resource_for_resource!(authz::VpcSubnet); +impl_dyn_authorized_resource_for_resource!(authz::Zpool); + +impl_dyn_authorized_resource_for_global!(authz::Database); impl_dyn_authorized_resource_for_global!(authz::BlueprintConfig); impl_dyn_authorized_resource_for_global!(authz::ConsoleSessionList); impl_dyn_authorized_resource_for_global!(authz::DeviceAuthRequestList); diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/policy_test/resources.rs similarity index 99% rename from nexus/db-queries/src/authz/policy_test/resources.rs rename to nexus/db-queries/src/policy_test/resources.rs index bc30e77fac..478fa169ff 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/policy_test/resources.rs @@ -6,8 +6,8 @@ use super::resource_builder::ResourceBuilder; use super::resource_builder::ResourceSet; -use crate::authz; use crate::db::model::ArtifactId; +use nexus_auth::authz; use nexus_db_model::SemverVersion; use omicron_common::api::external::LookupType; use omicron_uuid_kinds::GenericUuid; @@ -367,8 +367,8 @@ pub fn exempted_authz_classes() -> BTreeSet { [ // Non-resources: authz::Action::get_polar_class(), - authz::actor::AnyActor::get_polar_class(), - authz::actor::AuthenticatedActor::get_polar_class(), + authz::AnyActor::get_polar_class(), + authz::AuthenticatedActor::get_polar_class(), // Resources whose behavior should be identical to an existing type // and we don't want to do the test twice for performance reasons: // none yet. diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 0482cdfd2a..41a1ded3b4 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -1,4 +1,4 @@ -resource: authz::oso_generic::Database +resource: authz::Database USER Q R LC RP M MP CC D fleet-admin ✔ ✘ ✘ ✘ ✘ ✘ ✘ ✘ diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 263ab24c70..f9bcc2cf80 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -377,7 +377,7 @@ impl Nexus { log.new(o!("component" => "DataLoader")), Arc::clone(&authz), authn::Context::internal_db_init(), - Arc::clone(&db_datastore), + Arc::clone(&db_datastore) as Arc, ); let populate_args = PopulateArgs::new(rack_id); @@ -391,7 +391,7 @@ impl Nexus { log.new(o!("component" => "BackgroundTasks")), Arc::clone(&authz), authn::Context::internal_api(), - Arc::clone(&db_datastore), + Arc::clone(&db_datastore) as Arc, ); let v2p_watcher_channel = tokio::sync::watch::channel(()); @@ -440,13 +440,15 @@ impl Nexus { log.new(o!("component" => "InstanceAllocator")), Arc::clone(&authz), authn::Context::internal_read(), - Arc::clone(&db_datastore), + Arc::clone(&db_datastore) + as Arc, ), opctx_external_authn: OpContext::for_background( log.new(o!("component" => "ExternalAuthn")), Arc::clone(&authz), authn::Context::external_authn(), - Arc::clone(&db_datastore), + Arc::clone(&db_datastore) + as Arc, ), samael_max_issue_delay: std::sync::Mutex::new(None), internal_resolver: resolver, @@ -469,7 +471,7 @@ impl Nexus { log.new(o!("component" => "SagaRecoverer")), Arc::clone(&authz), authn::Context::internal_saga_recovery(), - Arc::clone(&db_datastore), + Arc::clone(&db_datastore) as Arc, ); let saga_logger = nexus.log.new(o!("saga_type" => "recovery")); let recovery_task = db::recover( @@ -701,7 +703,8 @@ impl Nexus { self.log.new(o!("component" => "ServiceBalancer")), Arc::clone(&self.authz), authn::Context::internal_service_balancer(), - Arc::clone(&self.db_datastore), + Arc::clone(&self.db_datastore) + as Arc, ) } @@ -711,7 +714,8 @@ impl Nexus { self.log.new(o!("component" => "InternalApi")), Arc::clone(&self.authz), authn::Context::internal_api(), - Arc::clone(&self.db_datastore), + Arc::clone(&self.db_datastore) + as Arc, ) } diff --git a/nexus/src/app/test_interfaces.rs b/nexus/src/app/test_interfaces.rs index 581b9a89bb..9e7bd1582f 100644 --- a/nexus/src/app/test_interfaces.rs +++ b/nexus/src/app/test_interfaces.rs @@ -73,7 +73,8 @@ impl TestInterfaces for super::Nexus { ) -> Result>, Error> { let opctx = OpContext::for_tests( self.log.new(o!()), - Arc::clone(&self.db_datastore), + Arc::clone(&self.db_datastore) + as Arc, ); self.instance_sled_by_id_with_opctx(id, &opctx).await @@ -98,7 +99,8 @@ impl TestInterfaces for super::Nexus { ) -> Result>, Error> { let opctx = OpContext::for_tests( self.log.new(o!()), - Arc::clone(&self.db_datastore), + Arc::clone(&self.db_datastore) + as Arc, ); let (.., db_disk) = LookupPath::new(&opctx, &self.db_datastore) .disk_id(*id) @@ -112,7 +114,8 @@ impl TestInterfaces for super::Nexus { async fn instance_sled_id(&self, id: &Uuid) -> Result, Error> { let opctx = OpContext::for_tests( self.log.new(o!()), - Arc::clone(&self.db_datastore), + Arc::clone(&self.db_datastore) + as Arc, ); self.instance_sled_id_with_opctx(id, &opctx).await @@ -138,7 +141,8 @@ impl TestInterfaces for super::Nexus { async fn set_disk_as_faulted(&self, disk_id: &Uuid) -> Result { let opctx = OpContext::for_tests( self.log.new(o!()), - Arc::clone(&self.db_datastore), + Arc::clone(&self.db_datastore) + as Arc, ); let (.., authz_disk, db_disk) = diff --git a/nexus/src/external_api/console_api.rs b/nexus/src/external_api/console_api.rs index caff195047..fb0a47bbea 100644 --- a/nexus/src/external_api/console_api.rs +++ b/nexus/src/external_api/console_api.rs @@ -270,13 +270,14 @@ pub(crate) async fn login_saml_redirect( // unauthenticated. let opctx = nexus.opctx_external_authn(); - let (.., identity_provider) = IdentityProviderType::lookup( - &nexus.datastore(), - &opctx, - &path_params.silo_name, - &path_params.provider_name, - ) - .await?; + let (.., identity_provider) = nexus + .datastore() + .identity_provider_lookup( + &opctx, + &path_params.silo_name, + &path_params.provider_name, + ) + .await?; match identity_provider { IdentityProviderType::Saml(saml_identity_provider) => { @@ -330,9 +331,9 @@ pub(crate) async fn login_saml( // keep specifically for this purpose. let opctx = nexus.opctx_external_authn(); - let (authz_silo, db_silo, identity_provider) = - IdentityProviderType::lookup( - &nexus.datastore(), + let (authz_silo, db_silo, identity_provider) = nexus + .datastore() + .identity_provider_lookup( &opctx, &path_params.silo_name, &path_params.provider_name, diff --git a/nexus/src/populate.rs b/nexus/src/populate.rs index ffe67baeae..724b25162d 100644 --- a/nexus/src/populate.rs +++ b/nexus/src/populate.rs @@ -388,7 +388,7 @@ mod test { logctx.log.clone(), Arc::new(authz::Authz::new(&logctx.log)), authn::Context::internal_db_init(), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ); let log = &logctx.log; @@ -444,7 +444,7 @@ mod test { logctx.log.clone(), Arc::new(authz::Authz::new(&logctx.log)), authn::Context::internal_db_init(), - Arc::clone(&datastore), + Arc::clone(&datastore) as Arc, ); info!(&log, "cleaning up database"); diff --git a/nexus/tests/integration_tests/saml.rs b/nexus/tests/integration_tests/saml.rs index 80816f2ea2..e075f3e4da 100644 --- a/nexus/tests/integration_tests/saml.rs +++ b/nexus/tests/integration_tests/saml.rs @@ -106,20 +106,23 @@ async fn test_create_a_saml_idp(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let (.., retrieved_silo_idp_from_nexus) = IdentityProviderType::lookup( - &nexus.datastore(), - &nexus.opctx_external_authn(), - &omicron_common::api::external::Name::try_from(SILO_NAME.to_string()) + let (.., retrieved_silo_idp_from_nexus) = nexus + .datastore() + .identity_provider_lookup( + &nexus.opctx_external_authn(), + &omicron_common::api::external::Name::try_from( + SILO_NAME.to_string(), + ) + .unwrap() + .into(), + &omicron_common::api::external::Name::try_from( + "some-totally-real-saml-provider".to_string(), + ) .unwrap() .into(), - &omicron_common::api::external::Name::try_from( - "some-totally-real-saml-provider".to_string(), ) - .unwrap() - .into(), - ) - .await - .unwrap(); + .await + .unwrap(); match retrieved_silo_idp_from_nexus { IdentityProviderType::Saml(_) => { diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index e95b2870ca..2e6c21bb79 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -4,9 +4,7 @@ use crate::integration_tests::saml::SAML_IDP_DESCRIPTOR; use dropshot::ResultsPage; -use nexus_db_queries::authn::silos::{ - AuthenticatedSubject, IdentityProviderType, -}; +use nexus_db_queries::authn::silos::AuthenticatedSubject; use nexus_db_queries::authn::{USER_TEST_PRIVILEGED, USER_TEST_UNPRIVILEGED}; use nexus_db_queries::authz::{self}; use nexus_db_queries::context::OpContext; @@ -525,19 +523,22 @@ async fn test_deleting_a_silo_deletes_the_idp( // Expect that the silo is gone let nexus = &cptestctx.server.server_context().nexus; - let response = IdentityProviderType::lookup( - &nexus.datastore(), - &nexus.opctx_external_authn(), - &omicron_common::api::external::Name::try_from(SILO_NAME.to_string()) + let response = nexus + .datastore() + .identity_provider_lookup( + &nexus.opctx_external_authn(), + &omicron_common::api::external::Name::try_from( + SILO_NAME.to_string(), + ) + .unwrap() + .into(), + &omicron_common::api::external::Name::try_from( + "some-totally-real-saml-provider".to_string(), + ) .unwrap() .into(), - &omicron_common::api::external::Name::try_from( - "some-totally-real-saml-provider".to_string(), ) - .unwrap() - .into(), - ) - .await; + .await; assert!(response.is_err()); match response.err().unwrap() { diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index f82fe1c833..7880422c47 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -220,6 +220,7 @@ tracing = { version = "0.1.40", features = ["log"] } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.23" } +unicode-xid = { version = "0.2.4" } usdt = { version = "0.5.0" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.8.0", features = ["serde", "v4"] } From 5523ed1d1f9f6fbcfc45dd2faa2797fca0c179da Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 4 Jun 2024 15:35:52 -0700 Subject: [PATCH 10/16] [xtask] Redirect old bash ci_download scripts (#5482) As of https://github.com/oxidecomputer/omicron/pull/5481 , we have a rust replacement with `cargo xtask download`. This replaces the bash scripts with versions that print a deprecation warning and call `cargo xtask download`. --------- Co-authored-by: David Crespo Co-authored-by: iliana etaoin --- .github/buildomat/jobs/package.sh | 2 +- README.adoc | 2 +- clients/dpd-client/build.rs | 2 +- package-manifest.toml | 10 +- tools/README.adoc | 20 +-- tools/ci_download_clickhouse | 156 +-------------------- tools/ci_download_cockroachdb | 176 +----------------------- tools/ci_download_console | 101 +------------- tools/ci_download_dendrite_openapi | 87 +----------- tools/ci_download_dendrite_stub | 182 +------------------------ tools/ci_download_maghemite_mgd | 171 +---------------------- tools/ci_download_softnpu_machinery | 29 +--- tools/ci_download_thundermuffin | 153 --------------------- tools/ci_download_transceiver_control | 158 +-------------------- tools/install_builder_prerequisites.sh | 32 ++--- tools/install_runner_prerequisites.sh | 2 +- 16 files changed, 51 insertions(+), 1232 deletions(-) delete mode 100755 tools/ci_download_thundermuffin diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 81ed41a961..7099306a97 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -20,7 +20,7 @@ WORK=/work pfexec mkdir -p $WORK && pfexec chown $USER $WORK ptime -m ./tools/install_builder_prerequisites.sh -yp -ptime -m ./tools/ci_download_softnpu_machinery +ptime -m cargo xtask download softnpu # Build the test target export CARGO_INCREMENTAL=0 diff --git a/README.adoc b/README.adoc index 9db11f0337..f0e3a88343 100644 --- a/README.adoc +++ b/README.adoc @@ -259,7 +259,7 @@ it, which will be used if no configuration file is given or present in the curre The server also accepts command-line flags for overriding the values of the configuration parameters. -The packages downloaded by `ci_download_clickhouse` include a `config.xml` file with them. +The packages downloaded by `cargo xtask download clickhouse` include a `config.xml` file with them. You should probably run ClickHouse via the `omicron-dev` tool, but if you decide to run it manually, you can start the server with: diff --git a/clients/dpd-client/build.rs b/clients/dpd-client/build.rs index 536869b4a2..952a7ddee6 100644 --- a/clients/dpd-client/build.rs +++ b/clients/dpd-client/build.rs @@ -43,7 +43,7 @@ fn main() -> Result<()> { format!("../../out/downloads/dpd-{commit}.json") }); if !Path::new(&local_path).exists() { - bail!("{local_path} doesn't exist; rerun `tools/ci_download_dendrite_openapi` (after updating `tools/dendrite_openapi_version` if the dendrite commit in package-manifest.toml has changed)"); + bail!("{local_path} doesn't exist; rerun `cargo xtask download dendrite-openapi` (after updating `tools/dendrite_openapi_version` if the dendrite commit in package-manifest.toml has changed)"); } println!("cargo:rerun-if-changed={local_path}"); local_path diff --git a/package-manifest.toml b/package-manifest.toml index 9d372cd4df..e1dfd1f4d6 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -121,7 +121,7 @@ source.paths = [ ] output.type = "zone" setup_hint = """ -- Run `./tools/ci_download_console` to download the web console assets +- Run `cargo xtask download console` to download the web console assets - Run `pkg install library/postgresql-13` to download Postgres libraries """ output.intermediate_only = true @@ -170,7 +170,7 @@ source.paths = [ ] output.type = "zone" output.intermediate_only = true -setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary binaries" +setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" [package.clickhouse_keeper] service_name = "clickhouse_keeper" @@ -196,7 +196,7 @@ source.paths = [ ] output.type = "zone" output.intermediate_only = true -setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary binaries" +setup_hint = "Run `cargo xtask download clickhouse` to download the necessary binaries" [package.cockroachdb] service_name = "cockroachdb" @@ -223,7 +223,7 @@ source.paths = [ ] output.type = "zone" output.intermediate_only = true -setup_hint = "Run `./tools/ci_download_cockroachdb` to download the necessary binaries" +setup_hint = "Run `cargo xtask download cockroach` to download the necessary binaries" [package.omicron-cockroach-admin] service_name = "cockroach-admin" @@ -686,7 +686,7 @@ source.paths = [ ] output.type = "zone" output.intermediate_only = true -setup_hint = "Run `./tools/ci_download_transceiver_control` to download the necessary binaries" +setup_hint = "Run `cargo xtask download transceiver-control` to download the necessary binaries" [package.thundermuffin] service_name = "thundermuffin" diff --git a/tools/README.adoc b/tools/README.adoc index d13d2ba754..5da1c96027 100644 --- a/tools/README.adoc +++ b/tools/README.adoc @@ -46,17 +46,10 @@ https://github.com/oxidecomputer/console[console]). There are many scripts here related to each other, but they share some common naming. At a high-level: -- scripts with names of the form `ci_download_*` are intended to install a - single dependency. This typically involves installing a tarfile and unpacking - into a known location (such as `out/` from the root of the source tree). They - may also fetch artifacts from buildomat. Some of these tools will verify the - download binary against a checksum. Despite their name, these - scripts are not exclusively used in CI (they are suggested to be run by - developers in hint messages of other tools). -- `\*\_checksums` contain known checksums against which `ci_download_*` will +- `\*\_checksums` contain known checksums against which `cargo xtask download` will verify a dependency when it is downloaded. - `\*\_version` files (such as `console_version`) specify a version that is used - by the `ci_download_*` scripts. + by the `cargo xtask download` scripts. - scripts of the form `install_*.sh` are intended to install specific dependencies (or collections of dependencies) onto a development machine of some kind, for running or deploying Omicron. A current example is @@ -71,4 +64,11 @@ The `update_*.sh` scripts are intended to help update the current supported version of a specific dependency. Some of these operate on Rust dependencies (such as https://github.com/oxidecomputer/propolis[propolis] and https://github.com/oxidecomputer/crucible[crucible]), while others are related -to the binaries downloaded by the `ci_download_*` scripts. +to the binaries downloaded by the `cargo xtask download` scripts. + +== "Virtual Hardware" Management + +Another class of scripts is related to the setup and teardown of "virtual +hardware", which can be useful when deploying omicron with a real sled agent. +Current examples includes the `cargo xtask virtual-hardware` task. +See xref:docs/how-to-run.adoc[] for details. diff --git a/tools/ci_download_clickhouse b/tools/ci_download_clickhouse index c5f1cae914..41dfda8510 100755 --- a/tools/ci_download_clickhouse +++ b/tools/ci_download_clickhouse @@ -1,155 +1,5 @@ #!/usr/bin/env bash -# -# ci_download_clickhouse: fetches the appropriate ClickHouse binary tarball -# based on the currently running operating system, unpacks it, and creates a -# copy called "clickhouse", all in the current directory. -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" -# Location where the final clickhouse directory should end up. -DEST_DIR="./$TARGET_DIR/clickhouse" - -# If you change the version in clickhouse_version, you must also update the -# md5sums in clickhouse_checksums -CIDL_VERSION="$(cat "$SOURCE_DIR/clickhouse_version")" -source "$SOURCE_DIR/clickhouse_checksums" - -# Download from manually-populated S3 bucket for now -CIDL_URL_BASE="https://oxide-clickhouse-build.s3.us-west-2.amazonaws.com" - -function main -{ - # - # Process command-line arguments. We generally don't expect any, but - # we allow callers to specify a value to override OSTYPE, just for - # testing. - # - if [[ $# != 0 ]]; then - CIDL_OS="$1" - shift - else - CIDL_OS="$OSTYPE" - fi - - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Configure this program - configure_os "$CIDL_OS" - CIDL_URL="$CIDL_URL_BASE/$TARBALL_FILENAME" - - # Download the file. - echo "URL: $CIDL_URL" - echo "Local file: $TARBALL_FILE" - - mkdir -p "$DOWNLOAD_DIR" - mkdir -p "$DEST_DIR" - - local DO_DOWNLOAD="true" - if [[ -f "$TARBALL_FILE" ]]; then - # If the file exists with a valid checksum, we can skip downloading. - calculated_md5="$($CIDL_MD5FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate md5sum" - if [[ "$calculated_md5" == "$CIDL_MD5" ]]; then - DO_DOWNLOAD="false" - fi - fi - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$CIDL_URL" "$TARBALL_FILE" || \ - fail "failed to download file" - - # Verify the md5sum. - calculated_md5="$($CIDL_MD5FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate md5sum" - if [[ "$calculated_md5" != "$CIDL_MD5" ]]; then - fail "md5sum mismatch \ - (expected $CIDL_MD5, found $calculated_md5)" - fi - fi - - # Unpack the tarball into a local directory - do_untar "$TARBALL_FILE" "$DEST_DIR" - - # on macOS, we need to take the binary out of quarantine after download - # https://github.com/ClickHouse/clickhouse-docs/blob/08d7a329d/knowledgebase/fix-developer-verification-error-in-macos.md - if [[ $CIDL_OS == darwin* ]]; then - xattr -d com.apple.quarantine "$DEST_DIR/clickhouse" - fi - - # Run the binary as a sanity-check. - "$DEST_DIR/clickhouse" server --version -} - -function fail -{ - echo "$ARG0: $*" >&2 - exit 1 -} - -function configure_os -{ - echo "current directory: $PWD" - echo "configuring based on OS: \"$1\"" - CIDL_DASHREV= - case "$1" in - darwin*) - CIDL_PLATFORM="macos" - CIDL_MD5="$CIDL_MD5_DARWIN" - CIDL_MD5FUNC="do_md5" - ;; - linux-gnu*) - CIDL_PLATFORM="linux" - CIDL_MD5="$CIDL_MD5_LINUX" - CIDL_MD5FUNC="do_md5sum" - ;; - solaris*) - CIDL_PLATFORM="illumos" - CIDL_MD5="$CIDL_MD5_ILLUMOS" - CIDL_MD5FUNC="do_md5sum" - ;; - *) - fail "unsupported OS: $1" - ;; - esac - - TARBALL_DIRNAME="clickhouse-$CIDL_VERSION" - TARBALL_FILENAME="$TARBALL_DIRNAME$CIDL_DASHREV.$CIDL_PLATFORM.tar.gz" - - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_md5 -{ - md5 < "$1" -} - -function do_md5sum -{ - md5sum < "$1" | awk '{print $1}' -} - -function do_untar -{ - mkdir -p "$2" && tar xzf "$1" -C "$2" -} - -main "$@" +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download clickhouse\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download clickhouse diff --git a/tools/ci_download_cockroachdb b/tools/ci_download_cockroachdb index 5755e7e665..6e4e9c8a32 100755 --- a/tools/ci_download_cockroachdb +++ b/tools/ci_download_cockroachdb @@ -1,175 +1,5 @@ #!/usr/bin/env bash -# -# ci_download_cockroachdb: fetches the appropriate CockroachDB binary tarball -# based on the currently running operating system, unpacks it, and creates a -# copy called "cockroach", all in the current directory. -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -# If you change this, you must also update the sha256sums below -CIDL_VERSION="$(cat "$SOURCE_DIR/cockroachdb_version")" -source "$SOURCE_DIR/cockroachdb_checksums" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" -# Location where the final cockroachdb directory should end up. -DEST_DIR="./$TARGET_DIR/cockroachdb" - -# Official (or unofficial) download sites -CIDL_URL_COCKROACH="https://binaries.cockroachdb.com" -CIDL_URL_ILLUMOS="https://illumos.org/downloads" - -function main -{ - # - # Process command-line arguments. We generally don't expect any, but - # we allow callers to specify a value to override OSTYPE, just for - # testing. - # - if [[ $# != 0 ]]; then - CIDL_OS="$1" - shift - else - CIDL_OS="$OSTYPE" - fi - - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Configure this program - configure_os "$CIDL_OS" - CIDL_URL="$CIDL_URL_BASE/$TARBALL_FILENAME" - CIDL_SHA256FUNC="do_sha256sum" - - # Download the file. - echo "URL: $CIDL_URL" - echo "Local file: $TARBALL_FILE" - - mkdir -p "$DOWNLOAD_DIR" - mkdir -p "$DEST_DIR" - - local DO_DOWNLOAD="true" - if [[ -f "$TARBALL_FILE" ]]; then - # If the file exists with a valid checksum, we can skip downloading. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" == "$CIDL_SHA256" ]]; then - DO_DOWNLOAD="false" - fi - fi - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$CIDL_URL" "$TARBALL_FILE" || \ - fail "failed to download file" - - # Verify the sha256sum. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" != "$CIDL_SHA256" ]]; then - fail "sha256sum mismatch \ - (expected $CIDL_SHA256, found $calculated_sha256)" - fi - fi - - # Unpack the tarball. - do_untar "$TARBALL_FILE" - - # Copy the "cockroach" binary to the right spot. - $CIDL_ASSEMBLE - - # Run the binary as a sanity-check. - "$DEST_DIR/bin/cockroach" version -} - -function fail -{ - echo "$ARG0: $*" >&2 - exit 1 -} - -function configure_os -{ - echo "current directory: $PWD" - echo "configuring based on OS: \"$1\"" - case "$1" in - darwin*) - CIDL_BUILD="darwin-10.9-amd64" - CIDL_SUFFIX="tgz" - CIDL_SHA256="$CIDL_SHA256_DARWIN" - CIDL_URL_BASE="$CIDL_URL_COCKROACH" - CIDL_ASSEMBLE="do_assemble_official" - ;; - linux-gnu*) - CIDL_BUILD="linux-amd64" - CIDL_SUFFIX="tgz" - CIDL_SHA256="$CIDL_SHA256_LINUX" - CIDL_URL_BASE="$CIDL_URL_COCKROACH" - CIDL_ASSEMBLE="do_assemble_official" - ;; - solaris*) - CIDL_BUILD="illumos" - CIDL_SUFFIX="tar.gz" - CIDL_SHA256="$CIDL_SHA256_ILLUMOS" - CIDL_URL_BASE="$CIDL_URL_ILLUMOS" - CIDL_ASSEMBLE="do_assemble_illumos" - ;; - *) - fail "unsupported OS: $1" - ;; - esac - - TARBALL_DIRNAME="cockroach-$CIDL_VERSION.$CIDL_BUILD" - TARBALL_FILENAME="$TARBALL_DIRNAME.$CIDL_SUFFIX" - - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - TARBALL_DIR="$DOWNLOAD_DIR/$TARBALL_DIRNAME" -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_sha256sum -{ - sha256sum < "$1" | awk '{print $1}' -} - -function do_untar -{ - tar xzf "$1" -C "$DOWNLOAD_DIR" -} - -# -# "Assembling" here is taking unpacked tarball and putting together a directory -# structure that's common for all platforms. This allows consumers (i.e., CI) -# to assume the same directory structure for all platforms. This is -# platform-specific because on illumos, the tarball itself has a different -# structure than the official release tarballs and the `cockroach` binary has -# dynamic library dependencies. -# - -function do_assemble_official -{ - mkdir -p "$DEST_DIR/bin" - cp "$TARBALL_DIR/cockroach" "$DEST_DIR/bin" -} - -function do_assemble_illumos -{ - rm -r "$DEST_DIR" || true - cp -r "$DOWNLOAD_DIR/cockroach-$CIDL_VERSION" "$DEST_DIR" -} - -main "$@" +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download cockroach\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download cockroach diff --git a/tools/ci_download_console b/tools/ci_download_console index b27cebe79f..567168081b 100755 --- a/tools/ci_download_console +++ b/tools/ci_download_console @@ -1,100 +1,5 @@ #!/usr/bin/env bash -# -# ci_download_console: fetches the appropriate Console assets. -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" -# Location where the final console directory should end up. -DEST_DIR="./$TARGET_DIR/console-assets" - -source "$SOURCE_DIR/console_version" - -URL="https://dl.oxide.computer/releases/console/$COMMIT.tar.gz" -TARBALL_FILE="$DOWNLOAD_DIR/console.tar.gz" - -function main -{ - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Download the file. - echo "URL: $URL" - echo "Local file: $TARBALL_FILE" - - local DO_DOWNLOAD="true" - if [[ -f "$TARBALL_FILE" ]]; then - calculated_sha2="$(do_sha256sum "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha2" == "$SHA2" ]]; then - DO_DOWNLOAD="false" - fi - fi - - mkdir -p "$DOWNLOAD_DIR" - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$URL" "$TARBALL_FILE" || \ - fail "failed to download file" - - # Verify the sha256sum. - calculated_sha2="$(do_sha256sum "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha2" != "$SHA2" ]]; then - fail "sha256sum mismatch \ - (expected $SHA2, found $calculated_sha2)" - fi - - fi - - # clear out existing console assets - rm -rf "$DEST_DIR" - mkdir -p "$DEST_DIR" - - # Unpack the tarball into a local directory - do_untar "$TARBALL_FILE" "$DEST_DIR" -} - -function fail -{ - echo "$ARG0: $*" >&2 - exit 1 -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_sha256sum -{ - case "$OSTYPE" in - darwin*) - SHA="shasum -a 256" - ;; - *) - SHA="sha256sum" - ;; - esac - - $SHA < "$1" | awk '{print $1}' -} - -function do_untar -{ - mkdir -p "$2" && tar xzf "$1" -C "$2" -} - -main "$@" +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download console\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download console diff --git a/tools/ci_download_dendrite_openapi b/tools/ci_download_dendrite_openapi index 395bc62056..fc640e9d34 100755 --- a/tools/ci_download_dendrite_openapi +++ b/tools/ci_download_dendrite_openapi @@ -1,86 +1,5 @@ #!/usr/bin/env bash -# -# ci_download_dendrite_openapi: fetches the appropriate dendrite openapi spec. -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" - -source "$SOURCE_DIR/dendrite_openapi_version" - -URL="https://buildomat.eng.oxide.computer/public/file/oxidecomputer/dendrite/openapi/$COMMIT/dpd.json" -LOCAL_FILE="$DOWNLOAD_DIR/dpd-$COMMIT.json" - -function main -{ - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Download the file. - echo "URL: $URL" - echo "Local file: $LOCAL_FILE" - - local DO_DOWNLOAD="true" - if [[ -f "$LOCAL_FILE" ]]; then - calculated_sha2="$(do_sha256sum "$LOCAL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha2" == "$SHA2" ]]; then - DO_DOWNLOAD="false" - fi - fi - - mkdir -p "$DOWNLOAD_DIR" - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$URL" "$LOCAL_FILE" || \ - fail "failed to download file" - - # Verify the sha256sum. - calculated_sha2="$(do_sha256sum "$LOCAL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha2" != "$SHA2" ]]; then - fail "sha256sum mismatch \ - (expected $SHA2, found $calculated_sha2)" - fi - - fi -} - -function fail -{ - echo "$ARG0: $*" >&2 - exit 1 -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_sha256sum -{ - case "$OSTYPE" in - darwin*) - SHA="shasum -a 256" - ;; - *) - SHA="sha256sum" - ;; - esac - - $SHA < "$1" | awk '{print $1}' -} - -main "$@" +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download dendrite-openapi\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download dendrite-openapi diff --git a/tools/ci_download_dendrite_stub b/tools/ci_download_dendrite_stub index d1db31c697..f0d1b2c7cb 100755 --- a/tools/ci_download_dendrite_stub +++ b/tools/ci_download_dendrite_stub @@ -1,181 +1,5 @@ #!/usr/bin/env bash -# -# ci_download_dendrite_stub: fetches the appropriate Dendrite binary tarball -# based on the currently running operating system, unpacks it, and creates a -# copy called "dendrite-stub", all in the current directory. -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -# If you change this, you must also update the sha256sums below -source "$SOURCE_DIR/dendrite_stub_checksums" -source "$SOURCE_DIR/dendrite_openapi_version" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" -# Location where the final dendrite-stub directory should end up. -DEST_DIR="./$TARGET_DIR/dendrite-stub" -BIN_DIR="$DEST_DIR/root/opt/oxide/dendrite/bin" - -ARTIFACT_URL="https://buildomat.eng.oxide.computer/public/file" - -REPO='oxidecomputer/dendrite' -PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/image/$COMMIT" - -function main -{ - # - # Process command-line arguments. We generally don't expect any, but - # we allow callers to specify a value to override OSTYPE, just for - # testing. - # - if [[ $# != 0 ]]; then - CIDL_OS="$1" - shift - else - CIDL_OS="$OSTYPE" - fi - - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Configure this program - configure_os "$CIDL_OS" - - CIDL_SHA256="$CIDL_SHA256_ILLUMOS" - CIDL_SHA256FUNC="do_sha256sum" - TARBALL_FILENAME="dendrite-stub.tar.gz" - PACKAGE_URL="$PACKAGE_BASE_URL/$TARBALL_FILENAME" - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - - # Download the file. - echo "URL: $PACKAGE_URL" - echo "Local file: $TARBALL_FILE" - - mkdir -p "$DOWNLOAD_DIR" - mkdir -p "$DEST_DIR" - - fetch_and_verify - - do_untar "$TARBALL_FILE" - - do_assemble - - $SET_BINARIES -} - -function fail -{ - echo "$ARG0: $*" >&2 - exit 1 -} - -function configure_os -{ - echo "current directory: $PWD" - echo "configuring based on OS: \"$1\"" - case "$1" in - linux-gnu*) - SET_BINARIES="linux_binaries" - ;; - solaris*) - SET_BINARIES="" - ;; - *) - echo "WARNING: binaries for $1 are not published by dendrite" - echo "Network apis will be unavailable" - SET_BINARIES="unsupported_os" - ;; - esac -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_sha256sum -{ - sha256sum < "$1" | awk '{print $1}' -} - -function do_untar -{ - tar xzf "$1" -C "$DOWNLOAD_DIR" -} - -function do_assemble -{ - rm -r "$DEST_DIR" || true - mkdir "$DEST_DIR" - cp -r "$DOWNLOAD_DIR/root" "$DEST_DIR/root" - # Symbolic links for backwards compatibility with existing setups - ln -s "$PWD"/out/dendrite-stub/root/opt/oxide/dendrite/bin/ "$PWD"/out/dendrite-stub/bin -} - -function fetch_and_verify -{ - local DO_DOWNLOAD="true" - if [[ -f "$TARBALL_FILE" ]]; then - # If the file exists with a valid checksum, we can skip downloading. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" == "$CIDL_SHA256" ]]; then - DO_DOWNLOAD="false" - fi - fi - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$PACKAGE_URL" "$TARBALL_FILE" || \ - fail "failed to download file" - - # Verify the sha256sum. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" != "$CIDL_SHA256" ]]; then - fail "sha256sum mismatch \ - (expected $CIDL_SHA256, found $calculated_sha256)" - fi - fi - -} - -function linux_binaries -{ - PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/linux-bin/$COMMIT" - CIDL_SHA256="$CIDL_SHA256_LINUX_DPD" - CIDL_SHA256FUNC="do_sha256sum" - TARBALL_FILENAME="dpd" - PACKAGE_URL="$PACKAGE_BASE_URL/$TARBALL_FILENAME" - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - fetch_and_verify - chmod +x "$DOWNLOAD_DIR/dpd" - cp "$DOWNLOAD_DIR/dpd" "$BIN_DIR" - - CIDL_SHA256="$CIDL_SHA256_LINUX_SWADM" - TARBALL_FILENAME="swadm" - PACKAGE_URL="$PACKAGE_BASE_URL/$TARBALL_FILENAME" - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - fetch_and_verify - chmod +x "$DOWNLOAD_DIR/swadm" - cp "$DOWNLOAD_DIR/swadm" "$BIN_DIR" -} - -function unsupported_os -{ - mkdir -p "$BIN_DIR" - echo "echo 'unsupported os' && exit 1" >> "$BIN_DIR/dpd" - chmod +x "$BIN_DIR/dpd" -} - -main "$@" +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download dendrite-stub\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download dendrite-stub diff --git a/tools/ci_download_maghemite_mgd b/tools/ci_download_maghemite_mgd index bf6be1d5b1..d94c211c65 100755 --- a/tools/ci_download_maghemite_mgd +++ b/tools/ci_download_maghemite_mgd @@ -1,170 +1,5 @@ #!/usr/bin/env bash -# -# ci_download_maghemite_mgd: fetches the maghemite mgd binary tarball, unpacks -# it, and creates a copy called mgd, all in the current directory -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -source "$SOURCE_DIR/maghemite_mgd_checksums" -source "$SOURCE_DIR/maghemite_mg_openapi_version" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" -# Location where the final mgd directory should end up. -DEST_DIR="./$TARGET_DIR/mgd" -BIN_DIR="$DEST_DIR/root/opt/oxide/mgd/bin" - -ARTIFACT_URL="https://buildomat.eng.oxide.computer/public/file" - -REPO='oxidecomputer/maghemite' -PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/image/$COMMIT" - -function main -{ - rm -rf $DOWNLOAD_DIR/root - - # - # Process command-line arguments. We generally don't expect any, but - # we allow callers to specify a value to override OSTYPE, just for - # testing. - # - if [[ $# != 0 ]]; then - CIDL_OS="$1" - shift - else - CIDL_OS="$OSTYPE" - fi - - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Configure this program - configure_os "$CIDL_OS" - - CIDL_SHA256FUNC="do_sha256sum" - TARBALL_FILENAME="mgd.tar.gz" - PACKAGE_URL="$PACKAGE_BASE_URL/$TARBALL_FILENAME" - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - - # Download the file. - echo "URL: $PACKAGE_URL" - echo "Local file: $TARBALL_FILE" - - mkdir -p "$DOWNLOAD_DIR" - mkdir -p "$DEST_DIR" - - fetch_and_verify - - do_untar "$TARBALL_FILE" - - do_assemble - - $SET_BINARIES -} - -function fail -{ - echo "$ARG0: $@" >&2 - exit 1 -} - -function configure_os -{ - echo "current directory: $PWD" - echo "configuring based on OS: \"$1\"" - case "$1" in - linux-gnu*) - SET_BINARIES="linux_binaries" - ;; - solaris*) - SET_BINARIES="" - ;; - *) - echo "WARNING: binaries for $1 are not published by maghemite" - echo "Dynamic routing apis will be unavailable" - SET_BINARIES="unsupported_os" - ;; - esac -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_sha256sum -{ - sha256sum < "$1" | awk '{print $1}' -} - -function do_untar -{ - tar xzf "$1" -C "$DOWNLOAD_DIR" -} - -function do_assemble -{ - rm -r "$DEST_DIR" || true - mkdir "$DEST_DIR" - cp -r "$DOWNLOAD_DIR/root" "$DEST_DIR/root" -} - -function fetch_and_verify -{ - local DO_DOWNLOAD="true" - if [[ -f "$TARBALL_FILE" ]]; then - # If the file exists with a valid checksum, we can skip downloading. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" == "$CIDL_SHA256" ]]; then - DO_DOWNLOAD="false" - fi - fi - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$PACKAGE_URL" "$TARBALL_FILE" || \ - fail "failed to download file" - - # Verify the sha256sum. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" != "$CIDL_SHA256" ]]; then - fail "sha256sum mismatch \ - (expected $CIDL_SHA256, found $calculated_sha256)" - fi - fi - -} - -function linux_binaries -{ - PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/linux/$COMMIT" - CIDL_SHA256="$MGD_LINUX_SHA256" - CIDL_SHA256FUNC="do_sha256sum" - TARBALL_FILENAME="mgd" - PACKAGE_URL="$PACKAGE_BASE_URL/$TARBALL_FILENAME" - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - fetch_and_verify - chmod +x "$DOWNLOAD_DIR/mgd" - cp "$DOWNLOAD_DIR/mgd" "$BIN_DIR" -} - -function unsupported_os -{ - mkdir -p "$BIN_DIR" - echo "echo 'unsupported os' && exit 1" >> "$BIN_DIR/dpd" - chmod +x "$BIN_DIR/dpd" -} - -main "$@" +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download maghemite-mgd\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download maghemite-mgd diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index 5ceb2121fc..5e1a9e7a4b 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -1,28 +1,5 @@ #!/usr/bin/env bash -# -# This script fetches the following from CI -# -# - the softnpu ASIC simulator (softnpu) -# - a softnpu admin program (scadm) -# - the sidecar-lite precompiled P4 program -# -set -euo pipefail - -TOOLS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" - -OUT_DIR="out/npuzone" - -# Pinned commit for softnpu ASIC simulator -SOFTNPU_REPO="softnpu" -SOFTNPU_COMMIT="3203c51cf4473d30991b522062ac0df2e045c2f2" - -# This is the softnpu ASIC simulator -echo "fetching npuzone" -mkdir -p $OUT_DIR -"$TOOLS_DIR"/ensure_buildomat_artifact.sh \ - -O $OUT_DIR \ - "npuzone" \ - "$SOFTNPU_REPO" \ - "$SOFTNPU_COMMIT" -chmod +x $OUT_DIR/npuzone +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download softnpu\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download softnpu diff --git a/tools/ci_download_thundermuffin b/tools/ci_download_thundermuffin deleted file mode 100755 index 014d1b30b2..0000000000 --- a/tools/ci_download_thundermuffin +++ /dev/null @@ -1,153 +0,0 @@ -#!/bin/bash - -# -# ci_download_probe_packages: fetches thundermuffin binary tarball package, -# unpacks it, and creates a copy, all in the current directory -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -source "$SOURCE_DIR/thundermuffin_checksums" -source "$SOURCE_DIR/thundermuffin_version" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" -# Location where the final thundermuffin directory should end up. -DEST_DIR="./$TARGET_DIR/thundermuffin" -BIN_DIR="$DEST_DIR/root/opt/oxide/thundermuffin/bin" - -ARTIFACT_URL="https://buildomat.eng.oxide.computer/public/file" - -REPO='oxidecomputer/thundermuffin' -PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/image/$COMMIT" - -function main -{ - rm -rf $DOWNLOAD_DIR/root - - # - # Process command-line arguments. We generally don't expect any, but - # we allow callers to specify a value to override OSTYPE, just for - # testing. - # - if [[ $# != 0 ]]; then - CIDL_OS="$1" - shift - else - CIDL_OS="$OSTYPE" - fi - - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Configure this program - configure_os "$CIDL_OS" - - CIDL_SHA256FUNC="do_sha256sum" - TARBALL_FILENAME="thundermuffin.tar.gz" - PACKAGE_URL="$PACKAGE_BASE_URL/$TARBALL_FILENAME" - TARBALL_FILE="$DOWNLOAD_DIR/$TARBALL_FILENAME" - - # Download the file. - echo "URL: $PACKAGE_URL" - echo "Local file: $TARBALL_FILE" - - mkdir -p "$DOWNLOAD_DIR" - mkdir -p "$DEST_DIR" - - fetch_and_verify - - do_untar "$TARBALL_FILE" - - do_assemble - - $SET_BINARIES -} - -function fail -{ - echo "$ARG0: $@" >&2 - exit 1 -} - -function configure_os -{ - echo "current directory: $PWD" - echo "configuring based on OS: \"$1\"" - case "$1" in - solaris*) - SET_BINARIES="" - ;; - *) - echo "WARNING: binaries for $1 are not published by thundermuffin" - SET_BINARIES="unsupported_os" - ;; - esac -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_sha256sum -{ - sha256sum < "$1" | awk '{print $1}' -} - -function do_untar -{ - tar xzf "$1" -C "$DOWNLOAD_DIR" -} - -function do_assemble -{ - rm -r "$DEST_DIR" || true - mkdir "$DEST_DIR" - cp -r "$DOWNLOAD_DIR/root" "$DEST_DIR/root" -} - -function fetch_and_verify -{ - local DO_DOWNLOAD="true" - if [[ -f "$TARBALL_FILE" ]]; then - # If the file exists with a valid checksum, we can skip downloading. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" == "$CIDL_SHA256" ]]; then - DO_DOWNLOAD="false" - fi - fi - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$PACKAGE_URL" "$TARBALL_FILE" || \ - fail "failed to download file" - - # Verify the sha256sum. - calculated_sha256="$($CIDL_SHA256FUNC "$TARBALL_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" != "$CIDL_SHA256" ]]; then - fail "sha256sum mismatch \ - (expected $CIDL_SHA256, found $calculated_sha256)" - fi - fi - -} - -function unsupported_os -{ - mkdir -p "$BIN_DIR" - echo "echo 'unsupported os' && exit 1" >> "$BIN_DIR/thundermuffin" - chmod +x "$BIN_DIR/thundermuffin" -} - -main "$@" diff --git a/tools/ci_download_transceiver_control b/tools/ci_download_transceiver_control index cdd2528ef3..52a3981f20 100755 --- a/tools/ci_download_transceiver_control +++ b/tools/ci_download_transceiver_control @@ -1,157 +1,5 @@ #!/usr/bin/env bash -# -# ci_download_transceiver_control: fetches the appropriate transceiver-control -# binary tarball based on the currently running operating system, unpacks it, -# and creates a copy called "transceiver-control", all in the current directory. -# - -set -o pipefail -set -o xtrace -set -o errexit - -SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" -ARG0="$(basename "${BASH_SOURCE[0]}")" - -source "$SOURCE_DIR/transceiver_control_version" - -TARGET_DIR="out" -# Location where intermediate artifacts are downloaded / unpacked. -DOWNLOAD_DIR="$TARGET_DIR/downloads" -# Location where the final directory should end up. -DEST_DIR="./$TARGET_DIR/transceiver-control" -BIN_DIR="/opt/oxide/bin" -BINARY="xcvradm" - -ARTIFACT_URL="https://buildomat.eng.oxide.computer/public/file" - -REPO='oxidecomputer/transceiver-control' -PACKAGE_BASE_URL="$ARTIFACT_URL/$REPO/bins/$COMMIT" - -function main -{ - # - # Process command-line arguments. We generally don't expect any, but - # we allow callers to specify a value to override OSTYPE, just for - # testing. - # - if [[ $# != 0 ]]; then - CIDL_OS="$1" - shift - else - CIDL_OS="$OSTYPE" - fi - - if [[ $# != 0 ]]; then - echo "unexpected arguments" >&2 - exit 2 - fi - - # Configure this program - configure_os "$CIDL_OS" - - CIDL_SHA256="$CIDL_SHA256_ILLUMOS" - CIDL_SHA256FUNC="do_sha256sum" - ARCHIVE_FILENAME="xcvradm.gz" - PACKAGE_URL="$PACKAGE_BASE_URL/$ARCHIVE_FILENAME" - ARCHIVE_FILE="$DOWNLOAD_DIR/$ARCHIVE_FILENAME" - - # Download the file. - echo "URL: $PACKAGE_URL" - echo "Local file: $ARCHIVE_FILE" - - mkdir -p "$DOWNLOAD_DIR" - mkdir -p "$DEST_DIR" - - fetch_and_verify - - do_unpack "$ARCHIVE_FILE" - - do_assemble - - $SET_BINARIES -} - -function fail -{ - echo "$ARG0: $*" >&2 - exit 1 -} - -function configure_os -{ - echo "current directory: $PWD" - echo "configuring based on OS: \"$1\"" - case "$1" in - solaris*) - SET_BINARIES="" - ;; - *) - echo "WARNING: binaries for $1 are not published by transceiver-control" - SET_BINARIES="unsupported_os" - ;; - esac -} - -function do_download_curl -{ - curl --silent --show-error --fail --location --output "$2" "$1" -} - -function do_sha256sum -{ - sha256sum < "$1" | awk '{print $1}' -} - -function do_unpack -{ - mkdir -p "$DOWNLOAD_DIR/root/$BIN_DIR" - gzip -dc "$1" > "$DOWNLOAD_DIR/root/$BIN_DIR/$BINARY" - chmod +x "$DOWNLOAD_DIR/root/$BIN_DIR/$BINARY" -} - -function do_assemble -{ - rm -r "$DEST_DIR" || true - mkdir "$DEST_DIR" - cp -r "$DOWNLOAD_DIR/root" "$DEST_DIR/root" -} - -function fetch_and_verify -{ - local DO_DOWNLOAD="true" - if [[ -f "$ARCHIVE_FILE" ]]; then - # If the file exists with a valid checksum, we can skip downloading. - calculated_sha256="$($CIDL_SHA256FUNC "$ARCHIVE_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" == "$CIDL_SHA256" ]]; then - DO_DOWNLOAD="false" - fi - fi - - if [ "$DO_DOWNLOAD" == "true" ]; then - echo "Downloading..." - do_download_curl "$PACKAGE_URL" "$ARCHIVE_FILE" || \ - fail "failed to download file" - - # Verify the sha256sum. - calculated_sha256="$($CIDL_SHA256FUNC "$ARCHIVE_FILE")" || \ - fail "failed to calculate sha256sum" - if [[ "$calculated_sha256" != "$CIDL_SHA256" ]]; then - fail "sha256sum mismatch \ - (expected $CIDL_SHA256, found $calculated_sha256)" - fi - fi - -} - -function unsupported_os -{ - typeset dir="$DEST_DIR/$BIN_DIR" - - mkdir -p "$dir" - echo "echo 'unsupported os' && exit 1" >> "$dir/$BINARY" - chmod +x "$dir/$BINARY" -} - -main "$@" +echo -e "\e[0;33m$0 is deprecated; running: cargo xtask download transceiver-control\e[0m" +cd "$(dirname "${BASH_SOURCE[0]}")/.." || exit 42 +exec cargo xtask download transceiver-control diff --git a/tools/install_builder_prerequisites.sh b/tools/install_builder_prerequisites.sh index 071ea1b458..ead36ca2a9 100755 --- a/tools/install_builder_prerequisites.sh +++ b/tools/install_builder_prerequisites.sh @@ -191,30 +191,14 @@ retry install_packages # - Packaging: When constructing packages on Helios, these utilities # are packaged into zones which may be launched by the sled agent. -retry ./tools/ci_download_cockroachdb -retry ./tools/ci_download_clickhouse - -# Install static console assets. These are used when packaging Nexus. -retry ./tools/ci_download_console - -# Download the OpenAPI spec for dendrite. This is required to build the -# dpd-client crate. -retry ./tools/ci_download_dendrite_openapi - -# Download dendrite-stub. This is required to run tests without a live -# asic and running dendrite instance -retry ./tools/ci_download_dendrite_stub - -# Download mgd. This is required to run tests that invovle dynamic external -# routing -retry ./tools/ci_download_maghemite_mgd - -# Download transceiver-control. This is used as the source for the -# xcvradm binary which is bundled with the switch zone. -retry ./tools/ci_download_transceiver_control - -# Download thundermuffin. This is required to launch network probes. -retry ./tools/ci_download_thundermuffin +retry cargo xtask download \ + cockroach \ + clickhouse \ + console \ + dendrite-openapi \ + dendrite-stub \ + maghemite-mgd \ + transceiver-control # Validate the PATH: expected_in_path=( diff --git a/tools/install_runner_prerequisites.sh b/tools/install_runner_prerequisites.sh index 7cf8722447..c863afcbd4 100755 --- a/tools/install_runner_prerequisites.sh +++ b/tools/install_runner_prerequisites.sh @@ -154,7 +154,7 @@ if [[ "${HOST_OS}" == "SunOS" ]]; then # Grab the SoftNPU machinery (ASIC simulator, scadm, P4 program, etc.) # # "cargo xtask virtual-hardware create" will use those to setup the softnpu zone - retry ./tools/ci_download_softnpu_machinery + retry cargo xtask download softnpu fi echo "All runner prerequisites installed successfully" From fc104d751d3f9a71b33f43b8cc5d803e8cbe7595 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 4 Jun 2024 19:18:52 -0400 Subject: [PATCH 11/16] Bump gateway-messages to expose stage0 information (#5679) This is a dependency for some upcoming work to be able update stage0 (RoT bootloader) This change exposes the stage0 information --- Cargo.lock | 208 +++------------ Cargo.toml | 4 +- clients/gateway-client/src/lib.rs | 1 + dev-tools/omdb/src/bin/omdb/mgs.rs | 91 ++++++- dev-tools/omdb/tests/successes.out | 80 ++++-- .../configs/sp_sim_config.test.toml | 10 +- gateway/src/http_entrypoints.rs | 107 +++++++- gateway/src/http_entrypoints/conversions.rs | 249 ++++++++++++++--- nexus/db-model/src/inventory.rs | 151 +++++++++++ nexus/db-model/src/schema.rs | 7 + nexus/db-model/src/schema_versions.rs | 3 +- .../db-queries/src/db/datastore/inventory.rs | 34 +++ nexus/inventory/src/builder.rs | 48 +++- nexus/inventory/src/collector.rs | 2 + nexus/inventory/src/examples.rs | 10 +- .../tests/output/collector_basic.txt | 24 +- .../tests/output/collector_errors.txt | 24 +- .../output/collector_sled_agent_errors.txt | 24 +- nexus/reconfigurator/planning/src/system.rs | 123 ++++++--- nexus/types/src/inventory.rs | 10 + openapi/gateway.json | 183 ++++++++++++- openapi/wicketd.json | 125 ++++++++- .../tests/output/self-stat-schema.json | 4 +- schema/crdb/dbinit.sql | 29 +- schema/crdb/expose-stage0/up01.sql | 17 ++ schema/crdb/expose-stage0/up02.sql | 8 + schema/crdb/expose-stage0/up03.sql | 2 + schema/crdb/expose-stage0/up04.sql | 2 + smf/sp-sim/config.toml | 4 +- sp-sim/examples/config.toml | 5 +- sp-sim/src/config.rs | 7 + sp-sim/src/gimlet.rs | 215 +++++++++++++-- sp-sim/src/lib.rs | 1 + sp-sim/src/sidecar.rs | 174 ++++++++++-- wicket/src/ui/panes/overview.rs | 250 +++++++++++++++++- wicketd/src/inventory.rs | 4 + wicketd/src/mgs/inventory.rs | 67 ++++- workspace-hack/Cargo.toml | 5 +- 38 files changed, 1949 insertions(+), 363 deletions(-) create mode 100644 schema/crdb/expose-stage0/up01.sql create mode 100644 schema/crdb/expose-stage0/up02.sql create mode 100644 schema/crdb/expose-stage0/up03.sql create mode 100644 schema/crdb/expose-stage0/up04.sql diff --git a/Cargo.lock b/Cargo.lock index 4f4fa019c1..8b73743ef3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -358,7 +358,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "serde_tokenstream 0.2.0", + "serde_tokenstream", "syn 2.0.64", ] @@ -1584,7 +1584,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "serde_tokenstream 0.2.0", + "serde_tokenstream", "syn 2.0.64", ] @@ -1776,7 +1776,7 @@ source = "git+https://github.com/oxidecomputer/diesel-dtrace?branch=main#62ef5ca dependencies = [ "diesel", "serde", - "usdt 0.5.0", + "usdt", "uuid", "version_check", ] @@ -1956,16 +1956,6 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" -[[package]] -name = "dof" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e6b21a1211455e82b1245d6e1b024f30606afbb734c114515d40d0e0b34ce81" -dependencies = [ - "thiserror", - "zerocopy 0.3.2", -] - [[package]] name = "dof" version = "0.3.0" @@ -2052,7 +2042,7 @@ dependencies = [ "tokio", "tokio-rustls 0.25.0", "toml 0.8.13", - "usdt 0.5.0", + "usdt", "uuid", "version_check", "waitgroup", @@ -2066,21 +2056,10 @@ dependencies = [ "proc-macro2", "quote", "serde", - "serde_tokenstream 0.2.0", + "serde_tokenstream", "syn 2.0.64", ] -[[package]] -name = "dtrace-parser" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bed110893a7f9f4ceb072e166354a09f6cb4cc416eec5b5e5e8ee367442d434b" -dependencies = [ - "pest", - "pest_derive", - "thiserror", -] - [[package]] name = "dtrace-parser" version = "0.2.0" @@ -2691,9 +2670,9 @@ dependencies = [ [[package]] name = "gateway-messages" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/management-gateway-service?rev=2739c18e80697aa6bc235c935176d14b4d757ee9#2739c18e80697aa6bc235c935176d14b4d757ee9" +source = "git+https://github.com/oxidecomputer/management-gateway-service?rev=c85a4ca043aaa389df12aac5348d8a3feda28762#c85a4ca043aaa389df12aac5348d8a3feda28762" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.5.0", "hubpack 0.1.2", "serde", "serde_repr", @@ -2707,7 +2686,7 @@ dependencies = [ [[package]] name = "gateway-sp-comms" version = "0.1.1" -source = "git+https://github.com/oxidecomputer/management-gateway-service?rev=2739c18e80697aa6bc235c935176d14b4d757ee9#2739c18e80697aa6bc235c935176d14b4d757ee9" +source = "git+https://github.com/oxidecomputer/management-gateway-service?rev=c85a4ca043aaa389df12aac5348d8a3feda28762#c85a4ca043aaa389df12aac5348d8a3feda28762" dependencies = [ "async-trait", "backoff", @@ -2718,18 +2697,19 @@ dependencies = [ "hubpack 0.1.2", "hubtools", "lru-cache", - "nix 0.26.2", + "nix 0.27.1", "once_cell", "paste", "serde", "serde-big-array 0.5.1", "slog", + "slog-error-chain", "socket2 0.5.7", "string_cache", "thiserror", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git?branch=main)", "tokio", - "usdt 0.3.5", + "usdt", "uuid", "version_check", "zip", @@ -4286,9 +4266,9 @@ dependencies = [ [[package]] name = "memoffset" -version = "0.7.1" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" dependencies = [ "autocfg", ] @@ -4686,7 +4666,7 @@ dependencies = [ "term", "thiserror", "tokio", - "usdt 0.5.0", + "usdt", "uuid", ] @@ -4988,15 +4968,14 @@ dependencies = [ [[package]] name = "nix" -version = "0.26.2" -source = "git+https://github.com/jgallagher/nix?branch=r0.26-illumos#c1a3636db0524f194b714cfd117cd9b637b8b10e" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.5.0", "cfg-if", "libc", "memoffset", - "pin-utils", - "static_assertions", ] [[package]] @@ -5805,7 +5784,7 @@ dependencies = [ "tokio-stream", "tokio-util", "toml 0.8.13", - "usdt 0.5.0", + "usdt", "uuid", "zeroize", "zone 0.3.0", @@ -5844,7 +5823,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "usdt 0.5.0", + "usdt", "uuid", "walkdir", ] @@ -5886,7 +5865,7 @@ dependencies = [ "der", "diesel", "digest", - "dof 0.3.0", + "dof", "either", "elliptic-curve", "ff", @@ -5968,8 +5947,8 @@ dependencies = [ "unicode-bidi", "unicode-normalization", "unicode-xid", - "usdt 0.5.0", - "usdt-impl 0.5.0", + "usdt", + "usdt-impl", "uuid", "yasna", "zerocopy 0.7.34", @@ -6331,7 +6310,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "usdt 0.5.0", + "usdt", "uuid", ] @@ -7166,7 +7145,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "serde_tokenstream 0.2.0", + "serde_tokenstream", "serde_yaml", "syn 2.0.64", ] @@ -7196,7 +7175,7 @@ dependencies = [ "strum", "thiserror", "tokio", - "usdt 0.5.0", + "usdt", "uuid", "viona_api", ] @@ -8533,17 +8512,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_tokenstream" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "797ba1d80299b264f3aac68ab5d12e5825a561749db4df7cd7c8083900c5d4e9" -dependencies = [ - "proc-macro2", - "serde", - "syn 1.0.109", -] - [[package]] name = "serde_tokenstream" version = "0.2.0" @@ -8908,7 +8876,7 @@ dependencies = [ "serde", "serde_json", "slog", - "usdt 0.5.0", + "usdt", "version_check", ] @@ -9437,18 +9405,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "unicode-xid", -] - [[package]] name = "system-configuration" version = "0.5.1" @@ -10419,7 +10375,7 @@ dependencies = [ "semver 1.0.23", "serde", "serde_json", - "serde_tokenstream 0.2.0", + "serde_tokenstream", "syn 2.0.64", "typify-impl", ] @@ -10601,47 +10557,20 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "usdt" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b4c48f9e522b977bbe938a0d7c4d36633d267ba0155aaa253fb57d0531be0fb" -dependencies = [ - "dtrace-parser 0.1.14", - "serde", - "usdt-attr-macro 0.3.5", - "usdt-impl 0.3.5", - "usdt-macro 0.3.5", -] - [[package]] name = "usdt" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf5c47fb471a0bff3d7b17a250817bba8c6cc99b0492abaefe5b3bb99045f02" dependencies = [ - "dof 0.3.0", - "dtrace-parser 0.2.0", + "dof", + "dtrace-parser", "goblin", "memmap", "serde", - "usdt-attr-macro 0.5.0", - "usdt-impl 0.5.0", - "usdt-macro 0.5.0", -] - -[[package]] -name = "usdt-attr-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e6ae4f982ae74dcbaa8eb17baf36ca0d464a3abc8a7172b3bd74c73e9505d6" -dependencies = [ - "dtrace-parser 0.1.14", - "proc-macro2", - "quote", - "serde_tokenstream 0.1.7", - "syn 1.0.109", - "usdt-impl 0.3.5", + "usdt-attr-macro", + "usdt-impl", + "usdt-macro", ] [[package]] @@ -10650,32 +10579,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "025161fff40db24774e7757f75df74ecc47e93d7e11e0f6cdfc31b40eacfe136" dependencies = [ - "dtrace-parser 0.2.0", + "dtrace-parser", "proc-macro2", "quote", - "serde_tokenstream 0.2.0", + "serde_tokenstream", "syn 2.0.64", - "usdt-impl 0.5.0", -] - -[[package]] -name = "usdt-impl" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f53b4ca0b33aae466dc47b30b98adc4f88454928837af8010b6ed02d18474cb1" -dependencies = [ - "byteorder", - "dof 0.1.5", - "dtrace-parser 0.1.14", - "libc", - "proc-macro2", - "quote", - "serde", - "serde_json", - "syn 1.0.109", - "thiserror", - "thread-id", - "version_check", + "usdt-impl", ] [[package]] @@ -10685,8 +10594,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f925814e5942ebb87af2d9fcf4c3f8665e37903f741eb11f0fa2396c6ef5f7b1" dependencies = [ "byteorder", - "dof 0.3.0", - "dtrace-parser 0.2.0", + "dof", + "dtrace-parser", "libc", "proc-macro2", "quote", @@ -10698,32 +10607,18 @@ dependencies = [ "version_check", ] -[[package]] -name = "usdt-macro" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cb093f9653dc91632621c754f9ed4ee25d14e46e0239b6ccaf74a6c0c2788bd" -dependencies = [ - "dtrace-parser 0.1.14", - "proc-macro2", - "quote", - "serde_tokenstream 0.1.7", - "syn 1.0.109", - "usdt-impl 0.3.5", -] - [[package]] name = "usdt-macro" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ddd86f8f3abac0b7c87f59fe82446fc96a3854a413f176dd2797ed686b7af4c" dependencies = [ - "dtrace-parser 0.2.0", + "dtrace-parser", "proc-macro2", "quote", - "serde_tokenstream 0.2.0", + "serde_tokenstream", "syn 2.0.64", - "usdt-impl 0.5.0", + "usdt-impl", ] [[package]] @@ -11487,16 +11382,6 @@ dependencies = [ "time", ] -[[package]] -name = "zerocopy" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da091bab2bd35db397c46f5b81748b56f28f8fda837087fab9b6b07b6d66e3f1" -dependencies = [ - "byteorder", - "zerocopy-derive 0.2.0", -] - [[package]] name = "zerocopy" version = "0.6.6" @@ -11517,17 +11402,6 @@ dependencies = [ "zerocopy-derive 0.7.34", ] -[[package]] -name = "zerocopy-derive" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" -dependencies = [ - "proc-macro2", - "syn 1.0.109", - "synstructure", -] - [[package]] name = "zerocopy-derive" version = "0.6.6" diff --git a/Cargo.toml b/Cargo.toml index 4eb76f5859..f9b4906779 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -275,8 +275,8 @@ foreign-types = "0.3.2" fs-err = "2.11.0" futures = "0.3.30" gateway-client = { path = "clients/gateway-client" } -gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", default-features = false, features = ["std"] } -gateway-sp-comms = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9" } +gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "c85a4ca043aaa389df12aac5348d8a3feda28762", default-features = false, features = ["std"] } +gateway-sp-comms = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "c85a4ca043aaa389df12aac5348d8a3feda28762" } gateway-test-utils = { path = "gateway-test-utils" } gethostname = "0.4.3" glob = "0.3.1" diff --git a/clients/gateway-client/src/lib.rs b/clients/gateway-client/src/lib.rs index 6e932577a7..9693c5e62a 100644 --- a/clients/gateway-client/src/lib.rs +++ b/clients/gateway-client/src/lib.rs @@ -53,6 +53,7 @@ progenitor::generate_api!( HostPhase2RecoveryImageId = { derives = [PartialEq, Eq, PartialOrd, Ord] }, ImageVersion = { derives = [PartialEq, Eq, PartialOrd, Ord] }, RotImageDetails = { derives = [PartialEq, Eq, PartialOrd, Ord] }, + RotImageError = { derives = [ PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize] }, RotSlot = { derives = [PartialEq, Eq, PartialOrd, Ord] }, RotState = { derives = [PartialEq, Eq, PartialOrd, Ord] }, SpIdentifier = { derives = [Copy, PartialEq, Hash, Eq] }, diff --git a/dev-tools/omdb/src/bin/omdb/mgs.rs b/dev-tools/omdb/src/bin/omdb/mgs.rs index 7f33d5de15..6b7c8b2641 100644 --- a/dev-tools/omdb/src/bin/omdb/mgs.rs +++ b/dev-tools/omdb/src/bin/omdb/mgs.rs @@ -294,10 +294,16 @@ fn show_sp_states( RotState::CommunicationFailed { message } => { format!("error: {}", message) } - RotState::Enabled { active: RotSlot::A, .. } => { + RotState::V2 { active: RotSlot::A, .. } => { "slot A".to_string() } - RotState::Enabled { active: RotSlot::B, .. } => { + RotState::V2 { active: RotSlot::B, .. } => { + "slot B".to_string() + } + RotState::V3 { active: RotSlot::A, .. } => { + "slot A".to_string() + } + RotState::V3 { active: RotSlot::B, .. } => { "slot B".to_string() } }, @@ -332,7 +338,7 @@ async fn show_sp_details( RotState::CommunicationFailed { message } => { println!(" error: {}", message); } - RotState::Enabled { + RotState::V2 { active, pending_persistent_boot_preference, persistent_boot_preference, @@ -382,6 +388,85 @@ async fn show_sp_details( }, ]; + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + println!("{}", textwrap::indent(&table.to_string(), " ")); + println!(""); + } + RotState::V3 { + active, + pending_persistent_boot_preference, + persistent_boot_preference, + slot_a_fwid, + slot_b_fwid, + transient_boot_preference, + stage0_fwid, + stage0next_fwid, + slot_a_error, + slot_b_error, + stage0_error, + stage0next_error, + } => { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct Row { + name: &'static str, + value: String, + } + + let rows = vec![ + Row { + name: "active slot", + value: format!("slot {:?}", active), + }, + Row { + name: "persistent boot preference", + value: format!("slot {:?}", persistent_boot_preference), + }, + Row { + name: "pending persistent boot preference", + value: pending_persistent_boot_preference + .map(|s| format!("slot {:?}", s)) + .unwrap_or_else(|| "-".to_string()), + }, + Row { + name: "transient boot preference", + value: transient_boot_preference + .map(|s| format!("slot {:?}", s)) + .unwrap_or_else(|| "-".to_string()), + }, + Row { name: "slot A FWID", value: slot_a_fwid.clone() }, + Row { name: "slot B FWID", value: slot_b_fwid.clone() }, + Row { name: "Stage0 FWID", value: stage0_fwid.clone() }, + Row { name: "Stage0Next FWID", value: stage0next_fwid.clone() }, + Row { + name: "Slot A status", + value: (*slot_a_error) + .map(|x| format!("error: {:?}", x)) + .unwrap_or_else(|| "VALID".to_string()), + }, + Row { + name: "Slot B status", + value: (*slot_b_error) + .map(|x| format!("error: {:?}", x)) + .unwrap_or_else(|| "VALID".to_string()), + }, + Row { + name: "Stage0 status", + value: (*stage0_error) + .map(|x| format!("error: {:?}", x)) + .unwrap_or_else(|| "VALID".to_string()), + }, + Row { + name: "stage0next status", + value: (*stage0next_error) + .map(|x| format!("error: {:?}", x)) + .unwrap_or_else(|| "VALID".to_string()), + }, + ]; + let table = tabled::Table::new(rows) .with(tabled::settings::Style::empty()) .with(tabled::settings::Padding::new(0, 1, 0, 0)) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 07ebeb10bf..22d613f838 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -125,13 +125,19 @@ SP DETAILS: type "Sled" slot 0 ROOT OF TRUST - NAME VALUE - active slot slot A - persistent boot preference slot A - pending persistent boot preference - - transient boot preference - - slot A SHA3 256 digest - - slot B SHA3 256 digest - + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A FWID aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + slot B FWID bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + Stage0 FWID cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + Stage0Next FWID dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd + Slot A status VALID + Slot B status VALID + Stage0 status VALID + stage0next status VALID COMPONENTS @@ -145,13 +151,19 @@ SP DETAILS: type "Sled" slot 1 ROOT OF TRUST - NAME VALUE - active slot slot A - persistent boot preference slot A - pending persistent boot preference - - transient boot preference - - slot A SHA3 256 digest - - slot B SHA3 256 digest - + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A FWID aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + slot B FWID bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + Stage0 FWID cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + Stage0Next FWID dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd + Slot A status VALID + Slot B status VALID + Stage0 status VALID + stage0next status VALID COMPONENTS @@ -164,13 +176,19 @@ SP DETAILS: type "Switch" slot 0 ROOT OF TRUST - NAME VALUE - active slot slot A - persistent boot preference slot A - pending persistent boot preference - - transient boot preference - - slot A SHA3 256 digest - - slot B SHA3 256 digest - + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A FWID aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + slot B FWID bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + Stage0 FWID cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + Stage0Next FWID dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd + Slot A status VALID + Slot B status VALID + Stage0 status VALID + stage0next status VALID COMPONENTS @@ -184,13 +202,19 @@ SP DETAILS: type "Switch" slot 1 ROOT OF TRUST - NAME VALUE - active slot slot A - persistent boot preference slot A - pending persistent boot preference - - transient boot preference - - slot A SHA3 256 digest - - slot B SHA3 256 digest - + NAME VALUE + active slot slot A + persistent boot preference slot A + pending persistent boot preference - + transient boot preference - + slot A FWID aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + slot B FWID bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb + Stage0 FWID cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc + Stage0Next FWID dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd + Slot A status VALID + Slot B status VALID + Stage0 status VALID + stage0next status VALID COMPONENTS: none found diff --git a/gateway-test-utils/configs/sp_sim_config.test.toml b/gateway-test-utils/configs/sp_sim_config.test.toml index e1e3c0d057..cc08eec30b 100644 --- a/gateway-test-utils/configs/sp_sim_config.test.toml +++ b/gateway-test-utils/configs/sp_sim_config.test.toml @@ -18,14 +18,14 @@ device_id_cert_seed = "01de00000000000000000000000000000000000000000000000000000 id = "dev-0" device = "fake-tmp-sensor" description = "FAKE temperature sensor 1" -capabilities.bits = 0x2 +capabilities = 0x2 presence = "Present" [[simulated_sps.sidecar.components]] id = "dev-1" device = "fake-tmp-sensor" description = "FAKE temperature sensor 2" -capabilities.bits = 0x2 +capabilities = 0x2 presence = "Failed" [[simulated_sps.sidecar]] @@ -46,7 +46,7 @@ device_id_cert_seed = "01de00000000000000000000000000000000000000000000000000000 id = "sp3-host-cpu" device = "sp3-host-cpu" description = "FAKE host cpu" -capabilities.bits = 0 +capabilities = 0 presence = "Present" serial_console = "[::1]:0" @@ -54,7 +54,7 @@ serial_console = "[::1]:0" id = "dev-0" device = "fake-tmp-sensor" description = "FAKE temperature sensor" -capabilities.bits = 0x2 +capabilities = 0x2 presence = "Failed" [[simulated_sps.gimlet]] @@ -68,7 +68,7 @@ device_id_cert_seed = "01de00000000000000000000000000000000000000000000000000000 id = "sp3-host-cpu" device = "sp3-host-cpu" description = "FAKE host cpu" -capabilities.bits = 0 +capabilities = 0 presence = "Present" serial_console = "[::1]:0" diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index 727ba0950d..7e1c8a991e 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -61,6 +61,34 @@ pub struct SpState { pub rot: RotState, } +#[derive( + Debug, + Clone, + PartialEq, + Eq, + PartialOrd, + Ord, + Deserialize, + Serialize, + JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum RotImageError { + Unchecked, + FirstPageErased, + PartiallyProgrammed, + InvalidLength, + HeaderNotProgrammed, + BootloaderTooSmall, + BadMagic, + HeaderImageSize, + UnalignedLength, + UnsupportedType, + ResetVectorNotThumb2, + ResetVector, + Signature, +} + #[derive( Debug, Clone, @@ -74,7 +102,7 @@ pub struct SpState { )] #[serde(tag = "state", rename_all = "snake_case")] pub enum RotState { - Enabled { + V2 { active: RotSlot, persistent_boot_preference: RotSlot, pending_persistent_boot_preference: Option, @@ -85,6 +113,22 @@ pub enum RotState { CommunicationFailed { message: String, }, + V3 { + active: RotSlot, + persistent_boot_preference: RotSlot, + pending_persistent_boot_preference: Option, + transient_boot_preference: Option, + + slot_a_fwid: String, + slot_b_fwid: String, + stage0_fwid: String, + stage0next_fwid: String, + + slot_a_error: Option, + slot_b_error: Option, + stage0_error: Option, + stage0next_error: Option, + }, } #[derive( @@ -184,6 +228,21 @@ pub struct RotCfpa { pub slot: RotCfpaSlot, } +#[derive( + Debug, + Clone, + PartialEq, + Eq, + PartialOrd, + Ord, + Deserialize, + Serialize, + JsonSchema, +)] +pub struct GetRotBootInfoParams { + pub version: u8, +} + #[derive( Debug, Clone, @@ -626,7 +685,12 @@ async fn sp_get( SpCommsError::SpCommunicationFailed { sp: sp_id, err } })?; - Ok(HttpResponseOk(state.into())) + let rot_state = sp + .rot_state(gateway_messages::RotBootInfo::HIGHEST_KNOWN_VERSION) + .await; + + let final_state = SpState::from((state, rot_state)); + Ok(HttpResponseOk(final_state)) } /// Get host startup options for a sled @@ -1044,7 +1108,9 @@ async fn sp_component_reset( let component = component_from_str(&component)?; sp.reset_component_prepare(component) - .and_then(|()| sp.reset_component_trigger(component)) + // We always want to run with the watchdog when resetting as + // disabling the watchdog should be considered a debug only feature + .and_then(|()| sp.reset_component_trigger(component, false)) .await .map_err(|err| SpCommsError::SpCommunicationFailed { sp: sp_id, @@ -1224,6 +1290,40 @@ async fn sp_rot_cfpa_get( Ok(HttpResponseOk(RotCfpa { base64_data, slot })) } +/// Read the RoT boot state from a root of trust +/// +/// This endpoint is only valid for the `rot` component. +#[endpoint { + method = GET, + path = "/sp/{type}/{slot}/component/{component}/rot-boot-info", +}] +async fn sp_rot_boot_info( + rqctx: RequestContext>, + path: Path, + params: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + + let PathSpComponent { sp, component } = path.into_inner(); + let GetRotBootInfoParams { version } = params.into_inner(); + let sp_id = sp.into(); + + // Ensure the caller knows they're asking for the RoT + if component_from_str(&component)? != SpComponent::ROT { + return Err(HttpError::for_bad_request( + Some("RequestUnsupportedForComponent".to_string()), + "rot_boot_info only makes sent for a RoT".into(), + )); + } + + let sp = apictx.mgmt_switch.sp(sp_id)?; + let state = sp.rot_state(version).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseOk(state.into())) +} + /// List SPs via Ignition /// /// Retreive information for all SPs via the Ignition controller. This is lower @@ -1600,6 +1700,7 @@ pub fn api() -> GatewayApiDescription { api.register(sp_component_update_abort)?; api.register(sp_rot_cmpa_get)?; api.register(sp_rot_cfpa_get)?; + api.register(sp_rot_boot_info)?; api.register(sp_host_phase2_progress_get)?; api.register(sp_host_phase2_progress_delete)?; api.register(ignition_list)?; diff --git a/gateway/src/http_entrypoints/conversions.rs b/gateway/src/http_entrypoints/conversions.rs index df3d1c5436..c7fcb29922 100644 --- a/gateway/src/http_entrypoints/conversions.rs +++ b/gateway/src/http_entrypoints/conversions.rs @@ -12,6 +12,7 @@ use super::ImageVersion; use super::InstallinatorImageId; use super::PowerState; use super::RotImageDetails; +use super::RotImageError; use super::RotSlot; use super::RotState; use super::SpComponentInfo; @@ -27,6 +28,7 @@ use super::SpType; use super::SpUpdateStatus; use super::UpdatePreparationProgress; use dropshot::HttpError; +use gateway_messages::RotBootInfo; use gateway_messages::SpComponent; use gateway_messages::StartupOptions; use gateway_messages::UpdateStatus; @@ -133,6 +135,45 @@ impl From for ImageVersion { } } +impl From for RotImageError { + fn from(error: gateway_messages::ImageError) -> Self { + match error { + gateway_messages::ImageError::Unchecked => RotImageError::Unchecked, + gateway_messages::ImageError::FirstPageErased => { + RotImageError::FirstPageErased + } + gateway_messages::ImageError::PartiallyProgrammed => { + RotImageError::PartiallyProgrammed + } + gateway_messages::ImageError::InvalidLength => { + RotImageError::InvalidLength + } + gateway_messages::ImageError::HeaderNotProgrammed => { + RotImageError::HeaderNotProgrammed + } + gateway_messages::ImageError::BootloaderTooSmall => { + RotImageError::BootloaderTooSmall + } + gateway_messages::ImageError::BadMagic => RotImageError::BadMagic, + gateway_messages::ImageError::HeaderImageSize => { + RotImageError::HeaderImageSize + } + gateway_messages::ImageError::UnalignedLength => { + RotImageError::UnalignedLength + } + gateway_messages::ImageError::UnsupportedType => { + RotImageError::UnsupportedType + } + gateway_messages::ImageError::ResetVectorNotThumb2 => { + RotImageError::ResetVectorNotThumb2 + } + gateway_messages::ImageError::ResetVector => { + RotImageError::ResetVector + } + gateway_messages::ImageError::Signature => RotImageError::Signature, + } + } +} // We expect serial and model numbers to be ASCII and 0-padded: find the first 0 // byte and convert to a string. If that fails, hexlify the entire slice. fn stringify_byte_string(bytes: &[u8]) -> String { @@ -143,8 +184,30 @@ fn stringify_byte_string(bytes: &[u8]) -> String { .unwrap_or_else(|_err| hex::encode(bytes)) } +impl From<(gateway_messages::SpStateV1, RotState)> for SpState { + fn from(all: (gateway_messages::SpStateV1, RotState)) -> Self { + let (state, rot) = all; + Self { + serial_number: stringify_byte_string(&state.serial_number), + model: stringify_byte_string(&state.model), + revision: state.revision, + hubris_archive_id: hex::encode(state.hubris_archive_id), + base_mac_address: state.base_mac_address, + power_state: PowerState::from(state.power_state), + rot, + } + } +} + impl From for SpState { fn from(state: gateway_messages::SpStateV1) -> Self { + Self::from((state, RotState::from(state.rot))) + } +} + +impl From<(gateway_messages::SpStateV2, RotState)> for SpState { + fn from(all: (gateway_messages::SpStateV2, RotState)) -> Self { + let (state, rot) = all; Self { serial_number: stringify_byte_string(&state.serial_number), model: stringify_byte_string(&state.model), @@ -152,13 +215,20 @@ impl From for SpState { hubris_archive_id: hex::encode(state.hubris_archive_id), base_mac_address: state.base_mac_address, power_state: PowerState::from(state.power_state), - rot: RotState::from(state.rot), + rot, } } } impl From for SpState { fn from(state: gateway_messages::SpStateV2) -> Self { + Self::from((state, RotState::from(state.rot))) + } +} + +impl From<(gateway_messages::SpStateV3, RotState)> for SpState { + fn from(all: (gateway_messages::SpStateV3, RotState)) -> Self { + let (state, rot) = all; Self { serial_number: stringify_byte_string(&state.serial_number), model: stringify_byte_string(&state.model), @@ -166,16 +236,55 @@ impl From for SpState { hubris_archive_id: hex::encode(state.hubris_archive_id), base_mac_address: state.base_mac_address, power_state: PowerState::from(state.power_state), - rot: RotState::from(state.rot), + rot, } } } -impl From for SpState { - fn from(value: gateway_sp_comms::VersionedSpState) -> Self { - match value { - gateway_sp_comms::VersionedSpState::V1(s) => Self::from(s), - gateway_sp_comms::VersionedSpState::V2(s) => Self::from(s), +impl + From<( + gateway_sp_comms::VersionedSpState, + Result< + gateway_messages::RotBootInfo, + gateway_sp_comms::error::CommunicationError, + >, + )> for SpState +{ + fn from( + all: ( + gateway_sp_comms::VersionedSpState, + Result< + gateway_messages::RotBootInfo, + gateway_sp_comms::error::CommunicationError, + >, + ), + ) -> Self { + // We need to keep this backwards compatible. If we get an error from reading `rot_state` + // it could be because the RoT/SP isn't updated or because we have failed for some + // other reason. If we're on V1/V2 SP info and we fail, just fall back to using the + // RoT info in that struct since any error will also be communicated there. + match (all.0, all.1) { + (gateway_sp_comms::VersionedSpState::V1(s), Err(_)) => { + Self::from(s) + } + (gateway_sp_comms::VersionedSpState::V1(s), Ok(r)) => { + Self::from((s, RotState::from(r))) + } + (gateway_sp_comms::VersionedSpState::V2(s), Err(_)) => { + Self::from(s) + } + (gateway_sp_comms::VersionedSpState::V2(s), Ok(r)) => { + Self::from((s, RotState::from(r))) + } + (gateway_sp_comms::VersionedSpState::V3(s), Ok(r)) => { + Self::from((s, RotState::from(r))) + } + (gateway_sp_comms::VersionedSpState::V3(s), Err(err)) => { + Self::from(( + s, + RotState::CommunicationFailed { message: err.to_string() }, + )) + } } } } @@ -187,25 +296,7 @@ impl From> result: Result, ) -> Self { match result { - Ok(state) => { - let boot_state = state.rot_updates.boot_state; - Self::Enabled { - active: boot_state.active.into(), - slot_a_sha3_256_digest: boot_state - .slot_a - .map(|details| hex::encode(details.digest)), - slot_b_sha3_256_digest: boot_state - .slot_b - .map(|details| hex::encode(details.digest)), - // RotState(V1) didn't have the following fields, so we make - // it up as best we can. This RoT version is pre-shipping - // and should only exist on (not updated recently) test - // systems. - persistent_boot_preference: boot_state.active.into(), - pending_persistent_boot_preference: None, - transient_boot_preference: None, - } - } + Ok(state) => Self::from(state), Err(err) => Self::CommunicationFailed { message: err.to_string() }, } } @@ -221,29 +312,99 @@ impl From> >, ) -> Self { match result { - Ok(state) => Self::Enabled { - active: state.active.into(), - persistent_boot_preference: state - .persistent_boot_preference - .into(), - pending_persistent_boot_preference: state - .pending_persistent_boot_preference - .map(Into::into), - transient_boot_preference: state - .transient_boot_preference - .map(Into::into), - slot_a_sha3_256_digest: state - .slot_a_sha3_256_digest - .map(hex::encode), - slot_b_sha3_256_digest: state - .slot_b_sha3_256_digest - .map(hex::encode), - }, + Ok(state) => Self::from(state), Err(err) => Self::CommunicationFailed { message: err.to_string() }, } } } +impl RotState { + fn fwid_to_string(fwid: gateway_messages::Fwid) -> String { + match fwid { + gateway_messages::Fwid::Sha3_256(digest) => hex::encode(digest), + } + } +} + +impl From for RotState { + fn from(state: gateway_messages::RotStateV3) -> Self { + Self::V3 { + active: state.active.into(), + persistent_boot_preference: state.persistent_boot_preference.into(), + pending_persistent_boot_preference: state + .pending_persistent_boot_preference + .map(Into::into), + transient_boot_preference: state + .transient_boot_preference + .map(Into::into), + slot_a_fwid: Self::fwid_to_string(state.slot_a_fwid), + slot_b_fwid: Self::fwid_to_string(state.slot_b_fwid), + + stage0_fwid: Self::fwid_to_string(state.stage0_fwid), + stage0next_fwid: Self::fwid_to_string(state.stage0next_fwid), + + slot_a_error: state.slot_a_status.err().map(From::from), + slot_b_error: state.slot_b_status.err().map(From::from), + + stage0_error: state.stage0_status.err().map(From::from), + stage0next_error: state.stage0next_status.err().map(From::from), + } + } +} + +impl From for RotState { + fn from(state: gateway_messages::RotStateV2) -> Self { + Self::V2 { + active: state.active.into(), + persistent_boot_preference: state.persistent_boot_preference.into(), + pending_persistent_boot_preference: state + .pending_persistent_boot_preference + .map(Into::into), + transient_boot_preference: state + .transient_boot_preference + .map(Into::into), + slot_a_sha3_256_digest: state + .slot_a_sha3_256_digest + .map(hex::encode), + slot_b_sha3_256_digest: state + .slot_b_sha3_256_digest + .map(hex::encode), + } + } +} + +impl From for RotState { + fn from(state: gateway_messages::RotState) -> Self { + let boot_state = state.rot_updates.boot_state; + Self::V2 { + active: boot_state.active.into(), + slot_a_sha3_256_digest: boot_state + .slot_a + .map(|details| hex::encode(details.digest)), + slot_b_sha3_256_digest: boot_state + .slot_b + .map(|details| hex::encode(details.digest)), + // RotState(V1) didn't have the following fields, so we make + // it up as best we can. This RoT version is pre-shipping + // and should only exist on (not updated recently) test + // systems. + persistent_boot_preference: boot_state.active.into(), + pending_persistent_boot_preference: None, + transient_boot_preference: None, + } + } +} + +impl From for RotState { + fn from(value: gateway_messages::RotBootInfo) -> Self { + match value { + RotBootInfo::V1(s) => Self::from(s), + RotBootInfo::V2(s) => Self::from(s), + RotBootInfo::V3(s) => Self::from(s), + } + } +} + impl From for RotSlot { fn from(slot: gateway_messages::RotSlotId) -> Self { match slot { diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 456987f0ce..4abd7fe927 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -124,6 +124,8 @@ impl_enum_type!( SpSlot1 => b"sp_slot_1" RotSlotA => b"rot_slot_A" RotSlotB => b"rot_slot_B" + Stage0 => b"stage0" + Stage0Next => b"stage0next" ); impl From for CabooseWhich { @@ -134,6 +136,10 @@ impl From for CabooseWhich { nexus_inventory::CabooseWhich::SpSlot1 => CabooseWhich::SpSlot1, nexus_inventory::CabooseWhich::RotSlotA => CabooseWhich::RotSlotA, nexus_inventory::CabooseWhich::RotSlotB => CabooseWhich::RotSlotB, + nexus_inventory::CabooseWhich::Stage0 => CabooseWhich::Stage0, + nexus_inventory::CabooseWhich::Stage0Next => { + CabooseWhich::Stage0Next + } } } } @@ -146,6 +152,10 @@ impl From for nexus_types::inventory::CabooseWhich { CabooseWhich::SpSlot1 => nexus_inventory::CabooseWhich::SpSlot1, CabooseWhich::RotSlotA => nexus_inventory::CabooseWhich::RotSlotA, CabooseWhich::RotSlotB => nexus_inventory::CabooseWhich::RotSlotB, + CabooseWhich::Stage0 => nexus_inventory::CabooseWhich::Stage0, + CabooseWhich::Stage0Next => { + nexus_inventory::CabooseWhich::Stage0Next + } } } } @@ -203,6 +213,125 @@ impl From for nexus_types::inventory::RotPageWhich { } } +// See [`nexus_types::inventory::RotImageError`]. +impl_enum_type!( + #[derive(SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "rot_image_error", schema = "public"))] + pub struct RotImageErrorEnum; + + #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] + #[diesel(sql_type = RotImageErrorEnum)] + pub enum RotImageError; + + // Enum values + Unchecked => b"unchecked" + FirstPageErased => b"first_page_erased" + PartiallyProgrammed => b"partially_programmed" + InvalidLength => b"invalid_length" + HeaderNotProgrammed => b"header_not_programmed" + BootloaderTooSmall => b"bootloader_too_small" + BadMagic => b"bad_magic" + HeaderImageSize => b"header_image_size" + UnalignedLength => b"unaligned_length" + UnsupportedType => b"unsupported_type" + ResetVectorNotThumb2 => b"not_thumb2" + ResetVector => b"reset_vector" + Signature => b"signature" + +); + +impl From for RotImageError { + fn from(c: nexus_types::inventory::RotImageError) -> Self { + match c { + nexus_types::inventory::RotImageError::Unchecked => { + RotImageError::Unchecked + } + nexus_types::inventory::RotImageError::FirstPageErased => { + RotImageError::FirstPageErased + } + nexus_types::inventory::RotImageError::PartiallyProgrammed => { + RotImageError::PartiallyProgrammed + } + nexus_types::inventory::RotImageError::InvalidLength => { + RotImageError::InvalidLength + } + nexus_types::inventory::RotImageError::HeaderNotProgrammed => { + RotImageError::HeaderNotProgrammed + } + nexus_types::inventory::RotImageError::BootloaderTooSmall => { + RotImageError::BootloaderTooSmall + } + nexus_types::inventory::RotImageError::BadMagic => { + RotImageError::BadMagic + } + nexus_types::inventory::RotImageError::HeaderImageSize => { + RotImageError::HeaderImageSize + } + nexus_types::inventory::RotImageError::UnalignedLength => { + RotImageError::UnalignedLength + } + nexus_types::inventory::RotImageError::UnsupportedType => { + RotImageError::UnsupportedType + } + nexus_types::inventory::RotImageError::ResetVectorNotThumb2 => { + RotImageError::ResetVectorNotThumb2 + } + nexus_types::inventory::RotImageError::ResetVector => { + RotImageError::ResetVector + } + nexus_types::inventory::RotImageError::Signature => { + RotImageError::Signature + } + } + } +} + +impl From for nexus_types::inventory::RotImageError { + fn from(row: RotImageError) -> Self { + match row { + RotImageError::Unchecked => { + nexus_types::inventory::RotImageError::Unchecked + } + RotImageError::FirstPageErased => { + nexus_types::inventory::RotImageError::FirstPageErased + } + RotImageError::PartiallyProgrammed => { + nexus_types::inventory::RotImageError::PartiallyProgrammed + } + RotImageError::InvalidLength => { + nexus_types::inventory::RotImageError::InvalidLength + } + RotImageError::HeaderNotProgrammed => { + nexus_types::inventory::RotImageError::HeaderNotProgrammed + } + RotImageError::BootloaderTooSmall => { + nexus_types::inventory::RotImageError::BootloaderTooSmall + } + RotImageError::BadMagic => { + nexus_types::inventory::RotImageError::BadMagic + } + RotImageError::HeaderImageSize => { + nexus_types::inventory::RotImageError::HeaderImageSize + } + RotImageError::UnalignedLength => { + nexus_types::inventory::RotImageError::UnalignedLength + } + RotImageError::UnsupportedType => { + nexus_types::inventory::RotImageError::UnsupportedType + } + RotImageError::ResetVectorNotThumb2 => { + nexus_types::inventory::RotImageError::ResetVectorNotThumb2 + } + RotImageError::ResetVector => { + nexus_types::inventory::RotImageError::ResetVector + } + RotImageError::Signature => { + nexus_types::inventory::RotImageError::Signature + } + } + } +} + // See [`nexus_types::inventory::SpType`]. impl_enum_type!( #[derive(SqlType, Debug, QueryId)] @@ -532,6 +661,13 @@ pub struct InvRootOfTrust { pub slot_boot_pref_persistent_pending: Option, pub slot_a_sha3_256: Option, pub slot_b_sha3_256: Option, + pub stage0_fwid: Option, + pub stage0next_fwid: Option, + + pub slot_a_error: Option, + pub slot_b_error: Option, + pub stage0_error: Option, + pub stage0next_error: Option, } impl From for nexus_types::inventory::RotState { @@ -551,6 +687,21 @@ impl From for nexus_types::inventory::RotState { .map(RotSlot::from), slot_a_sha3_256_digest: row.slot_a_sha3_256, slot_b_sha3_256_digest: row.slot_b_sha3_256, + stage0_digest: row.stage0_fwid, + stage0next_digest: row.stage0next_fwid, + + slot_a_error: row + .slot_a_error + .map(nexus_types::inventory::RotImageError::from), + slot_b_error: row + .slot_b_error + .map(nexus_types::inventory::RotImageError::from), + stage0_error: row + .stage0_error + .map(nexus_types::inventory::RotImageError::from), + stage0next_error: row + .stage0next_error + .map(nexus_types::inventory::RotImageError::from), } } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 22ef60483d..8a00ce6e37 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1370,6 +1370,13 @@ table! { slot_boot_pref_persistent_pending -> Nullable, slot_a_sha3_256 -> Nullable, slot_b_sha3_256 -> Nullable, + stage0_fwid -> Nullable, + stage0next_fwid -> Nullable, + + slot_a_error -> Nullable, + slot_b_error -> Nullable, + stage0_error -> Nullable, + stage0next_error -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index ebc9d0173a..4465c3aacf 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(68, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(69, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(69, "expose-stage0"), KnownVersion::new(68, "filter-v2p-mapping-by-instance-state"), KnownVersion::new(67, "add-instance-updater-lock"), KnownVersion::new(66, "blueprint-crdb-preserve-downgrade"), diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 6faa8ea251..289e443213 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -47,6 +47,8 @@ use nexus_db_model::InvServiceProcessor; use nexus_db_model::InvSledAgent; use nexus_db_model::InvSledOmicronZones; use nexus_db_model::InvZpool; +use nexus_db_model::RotImageError; +use nexus_db_model::RotImageErrorEnum; use nexus_db_model::RotPageWhichEnum; use nexus_db_model::SledRole; use nexus_db_model::SledRoleEnum; @@ -404,6 +406,26 @@ impl DataStore { .clone() .into_sql::>( ), + rot.stage0_digest + .clone() + .into_sql::>( + ), + rot.stage0next_digest + .clone() + .into_sql::>( + ), + rot.slot_a_error + .map(RotImageError::from) + .into_sql::>(), + rot.slot_b_error + .map(RotImageError::from) + .into_sql::>(), + rot.stage0_error + .map(RotImageError::from) + .into_sql::>(), + rot.stage0next_error + .map(RotImageError::from) + .into_sql::>(), )) .filter( baseboard_dsl::part_number @@ -429,6 +451,12 @@ impl DataStore { rot_dsl::slot_boot_pref_transient, rot_dsl::slot_a_sha3_256, rot_dsl::slot_b_sha3_256, + rot_dsl::stage0_fwid, + rot_dsl::stage0next_fwid, + rot_dsl::slot_a_error, + rot_dsl::slot_b_error, + rot_dsl::stage0_error, + rot_dsl::stage0next_error, )) .execute_async(&conn) .await?; @@ -447,6 +475,12 @@ impl DataStore { _slot_boot_pref_transient, _slot_a_sha3_256, _slot_b_sha3_256, + _stage0_fwid, + _stage0next_fwid, + _slot_a_error, + _slot_b_error, + _stage0_error, + _stage0next_error, ) = rot_dsl::inv_root_of_trust::all_columns(); } } diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index bfa330669f..65bdae63ce 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -217,7 +217,7 @@ impl CollectionBuilder { }); match sp_state.rot { - gateway_client::types::RotState::Enabled { + gateway_client::types::RotState::V2 { active, pending_persistent_boot_preference, persistent_boot_preference, @@ -236,6 +236,12 @@ impl CollectionBuilder { transient_boot_preference, slot_a_sha3_256_digest, slot_b_sha3_256_digest, + stage0_digest: None, + stage0next_digest: None, + slot_a_error: None, + slot_b_error: None, + stage0_error: None, + stage0next_error: None, } }); } @@ -249,6 +255,40 @@ impl CollectionBuilder { message ))); } + gateway_client::types::RotState::V3 { + active, + pending_persistent_boot_preference, + persistent_boot_preference, + slot_a_fwid, + slot_b_fwid, + stage0_fwid, + stage0next_fwid, + transient_boot_preference, + slot_a_error, + slot_b_error, + stage0_error, + stage0next_error, + } => { + let _ = + self.rots.entry(baseboard.clone()).or_insert_with(|| { + RotState { + time_collected: now, + source: source.to_owned(), + active_slot: active, + persistent_boot_preference, + pending_persistent_boot_preference, + transient_boot_preference, + slot_a_sha3_256_digest: Some(slot_a_fwid), + slot_b_sha3_256_digest: Some(slot_b_fwid), + stage0_digest: Some(stage0_fwid), + stage0next_digest: Some(stage0next_fwid), + slot_a_error, + slot_b_error, + stage0_error, + stage0next_error, + } + }); + } } Some(baseboard) @@ -965,7 +1005,7 @@ mod test { model: String::from("model1"), power_state: PowerState::A0, revision: 0, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::A, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, @@ -990,7 +1030,7 @@ mod test { model: String::from("model1"), power_state: PowerState::A0, revision: 0, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::A, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, @@ -1016,7 +1056,7 @@ mod test { model: String::from("model1"), power_state: PowerState::A0, revision: 1, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::A, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 48761479b0..a64d092e1c 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -174,6 +174,8 @@ impl<'a> Collector<'a> { CabooseWhich::SpSlot1 => ("sp", 1), CabooseWhich::RotSlotA => ("rot", 0), CabooseWhich::RotSlotB => ("rot", 1), + CabooseWhich::Stage0 => ("stage0", 0), + CabooseWhich::Stage0Next => ("stage0", 1), }; let result = client diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 1a0c70f456..c2e283a640 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -49,7 +49,7 @@ pub fn representative() -> Representative { model: String::from("model1"), power_state: PowerState::A0, revision: 0, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::A, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, @@ -74,7 +74,7 @@ pub fn representative() -> Representative { model: String::from("model2"), power_state: PowerState::A2, revision: 1, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::B, pending_persistent_boot_preference: Some(RotSlot::A), persistent_boot_preference: RotSlot::A, @@ -101,7 +101,7 @@ pub fn representative() -> Representative { model: String::from("model3"), power_state: PowerState::A1, revision: 2, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::B, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, @@ -128,7 +128,7 @@ pub fn representative() -> Representative { model: String::from("model4"), power_state: PowerState::A2, revision: 3, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::B, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, @@ -471,7 +471,7 @@ pub fn sp_state(unique: &str) -> SpState { model: format!("model{}", unique), power_state: PowerState::A2, revision: 0, - rot: RotState::Enabled { + rot: RotState::V2 { active: RotSlot::A, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index 0fc1c552ab..896cdcddbc 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -7,10 +7,18 @@ baseboards: part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" cabooses: - board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" - board "SimRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" - board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" - board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" + board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "fefefefe" + board "SimGimletSp" name "SimGimlet" version "0.0.2" git_commit "ffffffff" + board "SimRot" name "SimGimletRot" version "0.0.3" git_commit "edededed" + board "SimRot" name "SimSidecar" version "0.0.3" git_commit "edededed" + board "SimRot" name "SimGimletRot" version "0.0.4" git_commit "eeeeeeee" + board "SimRot" name "SimSidecar" version "0.0.4" git_commit "eeeeeeee" + board "SimRotStage0" name "SimSidecar" version "0.0.200" git_commit "dadadada" + board "SimRotStage0" name "SimGimletRot" version "0.0.200" git_commit "dadadadad" + board "SimRotStage0" name "SimSidecar" version "0.0.200" git_commit "dddddddd" + board "SimRotStage0" name "SimGimletRot" version "0.0.200" git_commit "ddddddddd" + board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "fefefefe" + board "SimSidecarSp" name "SimSidecar" version "0.0.2" git_commit "ffffffff" rot pages: data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" @@ -51,6 +59,14 @@ cabooses found: RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" RotSlotB baseboard part "i86pc" serial "SimGimlet00": board "SimRot" RotSlotB baseboard part "i86pc" serial "SimGimlet01": board "SimRot" + Stage0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRotStage0" + Stage0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRotStage0" + Stage0 baseboard part "i86pc" serial "SimGimlet00": board "SimRotStage0" + Stage0 baseboard part "i86pc" serial "SimGimlet01": board "SimRotStage0" + Stage0Next baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRotStage0" + Stage0Next baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRotStage0" + Stage0Next baseboard part "i86pc" serial "SimGimlet00": board "SimRotStage0" + Stage0Next baseboard part "i86pc" serial "SimGimlet01": board "SimRotStage0" rot pages found: Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" diff --git a/nexus/inventory/tests/output/collector_errors.txt b/nexus/inventory/tests/output/collector_errors.txt index 20e9bb301e..79d61567dd 100644 --- a/nexus/inventory/tests/output/collector_errors.txt +++ b/nexus/inventory/tests/output/collector_errors.txt @@ -5,10 +5,18 @@ baseboards: part "i86pc" serial "SimGimlet01" cabooses: - board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" - board "SimRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" - board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" - board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" + board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "fefefefe" + board "SimGimletSp" name "SimGimlet" version "0.0.2" git_commit "ffffffff" + board "SimRot" name "SimGimletRot" version "0.0.3" git_commit "edededed" + board "SimRot" name "SimSidecar" version "0.0.3" git_commit "edededed" + board "SimRot" name "SimGimletRot" version "0.0.4" git_commit "eeeeeeee" + board "SimRot" name "SimSidecar" version "0.0.4" git_commit "eeeeeeee" + board "SimRotStage0" name "SimSidecar" version "0.0.200" git_commit "dadadada" + board "SimRotStage0" name "SimGimletRot" version "0.0.200" git_commit "dadadadad" + board "SimRotStage0" name "SimSidecar" version "0.0.200" git_commit "dddddddd" + board "SimRotStage0" name "SimGimletRot" version "0.0.200" git_commit "ddddddddd" + board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "fefefefe" + board "SimSidecarSp" name "SimSidecar" version "0.0.2" git_commit "ffffffff" rot pages: data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" @@ -49,6 +57,14 @@ cabooses found: RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" RotSlotB baseboard part "i86pc" serial "SimGimlet00": board "SimRot" RotSlotB baseboard part "i86pc" serial "SimGimlet01": board "SimRot" + Stage0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRotStage0" + Stage0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRotStage0" + Stage0 baseboard part "i86pc" serial "SimGimlet00": board "SimRotStage0" + Stage0 baseboard part "i86pc" serial "SimGimlet01": board "SimRotStage0" + Stage0Next baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRotStage0" + Stage0Next baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRotStage0" + Stage0Next baseboard part "i86pc" serial "SimGimlet00": board "SimRotStage0" + Stage0Next baseboard part "i86pc" serial "SimGimlet01": board "SimRotStage0" rot pages found: Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt index 7b9bbce84e..9e9c79aa92 100644 --- a/nexus/inventory/tests/output/collector_sled_agent_errors.txt +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -6,10 +6,18 @@ baseboards: part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" cabooses: - board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" - board "SimRot" name "SimGimlet" version "0.0.1" git_commit "eeeeeeee" - board "SimRot" name "SimSidecar" version "0.0.1" git_commit "eeeeeeee" - board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "ffffffff" + board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "fefefefe" + board "SimGimletSp" name "SimGimlet" version "0.0.2" git_commit "ffffffff" + board "SimRot" name "SimGimletRot" version "0.0.3" git_commit "edededed" + board "SimRot" name "SimSidecar" version "0.0.3" git_commit "edededed" + board "SimRot" name "SimGimletRot" version "0.0.4" git_commit "eeeeeeee" + board "SimRot" name "SimSidecar" version "0.0.4" git_commit "eeeeeeee" + board "SimRotStage0" name "SimSidecar" version "0.0.200" git_commit "dadadada" + board "SimRotStage0" name "SimGimletRot" version "0.0.200" git_commit "dadadadad" + board "SimRotStage0" name "SimSidecar" version "0.0.200" git_commit "dddddddd" + board "SimRotStage0" name "SimGimletRot" version "0.0.200" git_commit "ddddddddd" + board "SimSidecarSp" name "SimSidecar" version "0.0.1" git_commit "fefefefe" + board "SimSidecarSp" name "SimSidecar" version "0.0.2" git_commit "ffffffff" rot pages: data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" @@ -50,6 +58,14 @@ cabooses found: RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" RotSlotB baseboard part "i86pc" serial "SimGimlet00": board "SimRot" RotSlotB baseboard part "i86pc" serial "SimGimlet01": board "SimRot" + Stage0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRotStage0" + Stage0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRotStage0" + Stage0 baseboard part "i86pc" serial "SimGimlet00": board "SimRotStage0" + Stage0 baseboard part "i86pc" serial "SimGimlet01": board "SimRotStage0" + Stage0Next baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRotStage0" + Stage0Next baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRotStage0" + Stage0Next baseboard part "i86pc" serial "SimGimlet00": board "SimRotStage0" + Stage0Next baseboard part "i86pc" serial "SimGimlet01": board "SimRotStage0" rot pages found: Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 74c9313e05..f2a979cb4a 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -483,16 +483,18 @@ impl Sled { model: model.clone(), power_state: PowerState::A2, revision, - rot: RotState::Enabled { + rot: RotState::V3 { active: RotSlot::A, pending_persistent_boot_preference: None, persistent_boot_preference: RotSlot::A, - slot_a_sha3_256_digest: Some(String::from( - "slotAdigest1", - )), - slot_b_sha3_256_digest: Some(String::from( - "slotBdigest1", - )), + slot_a_fwid: String::from("slotAdigest1"), + slot_b_fwid: String::from("slotBdigest1"), + stage0_fwid: String::from("stage0_fwid"), + stage0next_fwid: String::from("stage0next_fwid"), + slot_a_error: None, + slot_b_error: None, + stage0_error: None, + stage0next_error: None, transient_boot_preference: None, }, serial_number: serial.clone(), @@ -582,35 +584,86 @@ impl Sled { .unwrap_or(sled_agent_client::types::Baseboard::Unknown); let inventory_sp = inventory_sp.map(|sledhw| { - let sp_state = SpState { - base_mac_address: [0; 6], - hubris_archive_id: sledhw.sp.hubris_archive.clone(), - model: sledhw.baseboard_id.part_number.clone(), - power_state: sledhw.sp.power_state, - revision: sledhw.sp.baseboard_revision, - rot: RotState::Enabled { - active: sledhw.rot.active_slot, - pending_persistent_boot_preference: sledhw - .rot - .pending_persistent_boot_preference, - persistent_boot_preference: sledhw - .rot - .persistent_boot_preference, - slot_a_sha3_256_digest: sledhw - .rot - .slot_a_sha3_256_digest - .clone(), - slot_b_sha3_256_digest: sledhw - .rot - .slot_b_sha3_256_digest - .clone(), - transient_boot_preference: sledhw - .rot - .transient_boot_preference, - }, - serial_number: sledhw.baseboard_id.serial_number.clone(), + // RotStateV3 unconditionally sets all of these + let sp_state = if sledhw.rot.slot_a_sha3_256_digest.is_some() + && sledhw.rot.slot_b_sha3_256_digest.is_some() + && sledhw.rot.stage0_digest.is_some() + && sledhw.rot.stage0next_digest.is_some() + { + SpState { + base_mac_address: [0; 6], + hubris_archive_id: sledhw.sp.hubris_archive.clone(), + model: sledhw.baseboard_id.part_number.clone(), + power_state: sledhw.sp.power_state, + revision: sledhw.sp.baseboard_revision, + rot: RotState::V3 { + active: sledhw.rot.active_slot, + pending_persistent_boot_preference: sledhw + .rot + .pending_persistent_boot_preference, + persistent_boot_preference: sledhw + .rot + .persistent_boot_preference, + slot_a_fwid: sledhw + .rot + .slot_a_sha3_256_digest + .clone() + .expect("slot_a_fwid should be set"), + slot_b_fwid: sledhw + .rot + .slot_b_sha3_256_digest + .clone() + .expect("slot_b_fwid should be set"), + stage0_fwid: sledhw + .rot + .stage0_digest + .clone() + .expect("stage0 fwid should be set"), + stage0next_fwid: sledhw + .rot + .stage0next_digest + .clone() + .expect("stage0 fwid should be set"), + transient_boot_preference: sledhw + .rot + .transient_boot_preference, + slot_a_error: sledhw.rot.slot_a_error, + slot_b_error: sledhw.rot.slot_b_error, + stage0_error: sledhw.rot.stage0_error, + stage0next_error: sledhw.rot.stage0next_error, + }, + serial_number: sledhw.baseboard_id.serial_number.clone(), + } + } else { + SpState { + base_mac_address: [0; 6], + hubris_archive_id: sledhw.sp.hubris_archive.clone(), + model: sledhw.baseboard_id.part_number.clone(), + power_state: sledhw.sp.power_state, + revision: sledhw.sp.baseboard_revision, + rot: RotState::V2 { + active: sledhw.rot.active_slot, + pending_persistent_boot_preference: sledhw + .rot + .pending_persistent_boot_preference, + persistent_boot_preference: sledhw + .rot + .persistent_boot_preference, + slot_a_sha3_256_digest: sledhw + .rot + .slot_a_sha3_256_digest + .clone(), + slot_b_sha3_256_digest: sledhw + .rot + .slot_b_sha3_256_digest + .clone(), + transient_boot_preference: sledhw + .rot + .transient_boot_preference, + }, + serial_number: sledhw.baseboard_id.serial_number.clone(), + } }; - (sledhw.sp.sp_slot, sp_state) }); diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 6acbcaca6a..661c4c088d 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -15,6 +15,7 @@ use crate::external_api::shared::Baseboard; use chrono::DateTime; use chrono::Utc; pub use gateway_client::types::PowerState; +pub use gateway_client::types::RotImageError; pub use gateway_client::types::RotSlot; pub use gateway_client::types::SpType; use omicron_common::api::external::ByteCount; @@ -256,6 +257,13 @@ pub struct RotState { pub transient_boot_preference: Option, pub slot_a_sha3_256_digest: Option, pub slot_b_sha3_256_digest: Option, + pub stage0_digest: Option, + pub stage0next_digest: Option, + + pub slot_a_error: Option, + pub slot_b_error: Option, + pub stage0_error: Option, + pub stage0next_error: Option, } /// Describes which caboose this is (which component, which slot) @@ -276,6 +284,8 @@ pub enum CabooseWhich { SpSlot1, RotSlotA, RotSlotB, + Stage0, + Stage0Next, } /// Root of trust page contents found during a collection diff --git a/openapi/gateway.json b/openapi/gateway.json index c5d0eab0b1..8bd71e7c99 100644 --- a/openapi/gateway.json +++ b/openapi/gateway.json @@ -762,6 +762,70 @@ } } }, + "/sp/{type}/{slot}/component/{component}/rot-boot-info": { + "get": { + "summary": "Read the RoT boot state from a root of trust", + "description": "This endpoint is only valid for the `rot` component.", + "operationId": "sp_rot_boot_info", + "parameters": [ + { + "in": "path", + "name": "component", + "description": "ID for the component of the SP; this is the internal identifier used by the SP itself to identify its components.", + "required": true, + "schema": { + "type": "string" + } + }, + { + "in": "path", + "name": "slot", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "in": "path", + "name": "type", + "required": true, + "schema": { + "$ref": "#/components/schemas/SpType" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GetRotBootInfoParams" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RotState" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/sp/{type}/{slot}/component/{component}/serial-console/attach": { "get": { "summary": "Upgrade into a websocket connection attached to the given SP component's", @@ -1499,6 +1563,19 @@ "slot" ] }, + "GetRotBootInfoParams": { + "type": "object", + "properties": { + "version": { + "type": "integer", + "format": "uint8", + "minimum": 0 + } + }, + "required": [ + "version" + ] + }, "HostPhase2Progress": { "oneOf": [ { @@ -2316,6 +2393,24 @@ "base64_data" ] }, + "RotImageError": { + "type": "string", + "enum": [ + "unchecked", + "first_page_erased", + "partially_programmed", + "invalid_length", + "header_not_programmed", + "bootloader_too_small", + "bad_magic", + "header_image_size", + "unaligned_length", + "unsupported_type", + "reset_vector_not_thumb2", + "reset_vector", + "signature" + ] + }, "RotSlot": { "oneOf": [ { @@ -2378,7 +2473,7 @@ "state": { "type": "string", "enum": [ - "enabled" + "v2" ] }, "transient_boot_preference": { @@ -2413,6 +2508,92 @@ "message", "state" ] + }, + { + "type": "object", + "properties": { + "active": { + "$ref": "#/components/schemas/RotSlot" + }, + "pending_persistent_boot_preference": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotSlot" + } + ] + }, + "persistent_boot_preference": { + "$ref": "#/components/schemas/RotSlot" + }, + "slot_a_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "slot_a_fwid": { + "type": "string" + }, + "slot_b_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "slot_b_fwid": { + "type": "string" + }, + "stage0_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "stage0_fwid": { + "type": "string" + }, + "stage0next_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "stage0next_fwid": { + "type": "string" + }, + "state": { + "type": "string", + "enum": [ + "v3" + ] + }, + "transient_boot_preference": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotSlot" + } + ] + } + }, + "required": [ + "active", + "persistent_boot_preference", + "slot_a_fwid", + "slot_b_fwid", + "stage0_fwid", + "stage0next_fwid", + "state" + ] } ] }, diff --git a/openapi/wicketd.json b/openapi/wicketd.json index fd8e49b6e3..edef5b9813 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -2584,6 +2584,25 @@ "sps" ] }, + "RotImageError": { + "description": "RotImageError\n\n
JSON schema\n\n```json { \"type\": \"string\", \"enum\": [ \"unchecked\", \"first_page_erased\", \"partially_programmed\", \"invalid_length\", \"header_not_programmed\", \"bootloader_too_small\", \"bad_magic\", \"header_image_size\", \"unaligned_length\", \"unsupported_type\", \"reset_vector_not_thumb2\", \"reset_vector\", \"signature\" ] } ```
", + "type": "string", + "enum": [ + "unchecked", + "first_page_erased", + "partially_programmed", + "invalid_length", + "header_not_programmed", + "bootloader_too_small", + "bad_magic", + "header_image_size", + "unaligned_length", + "unsupported_type", + "reset_vector_not_thumb2", + "reset_vector", + "signature" + ] + }, "RotInventory": { "description": "RoT-related data that isn't already supplied in [`SpState`].", "type": "object", @@ -2606,6 +2625,22 @@ "$ref": "#/components/schemas/SpComponentCaboose" } ] + }, + "caboose_stage0": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/SpComponentCaboose" + } + ] + }, + "caboose_stage0next": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/SpComponentCaboose" + } + ] } }, "required": [ @@ -2646,7 +2681,7 @@ ] }, "RotState": { - "description": "RotState\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"active\", \"persistent_boot_preference\", \"state\" ], \"properties\": { \"active\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"pending_persistent_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] }, \"persistent_boot_preference\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"slot_a_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"slot_b_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"state\": { \"type\": \"string\", \"enum\": [ \"enabled\" ] }, \"transient_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] } } }, { \"type\": \"object\", \"required\": [ \"message\", \"state\" ], \"properties\": { \"message\": { \"type\": \"string\" }, \"state\": { \"type\": \"string\", \"enum\": [ \"communication_failed\" ] } } } ] } ```
", + "description": "RotState\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"active\", \"persistent_boot_preference\", \"state\" ], \"properties\": { \"active\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"pending_persistent_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] }, \"persistent_boot_preference\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"slot_a_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"slot_b_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"state\": { \"type\": \"string\", \"enum\": [ \"v2\" ] }, \"transient_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] } } }, { \"type\": \"object\", \"required\": [ \"message\", \"state\" ], \"properties\": { \"message\": { \"type\": \"string\" }, \"state\": { \"type\": \"string\", \"enum\": [ \"communication_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"active\", \"persistent_boot_preference\", \"slot_a_fwid\", \"slot_b_fwid\", \"stage0_fwid\", \"stage0next_fwid\", \"state\" ], \"properties\": { \"active\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"pending_persistent_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] }, \"persistent_boot_preference\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"slot_a_error\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotImageError\" } ] }, \"slot_a_fwid\": { \"type\": \"string\" }, \"slot_b_error\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotImageError\" } ] }, \"slot_b_fwid\": { \"type\": \"string\" }, \"stage0_error\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotImageError\" } ] }, \"stage0_fwid\": { \"type\": \"string\" }, \"stage0next_error\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotImageError\" } ] }, \"stage0next_fwid\": { \"type\": \"string\" }, \"state\": { \"type\": \"string\", \"enum\": [ \"v3\" ] }, \"transient_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2676,7 +2711,7 @@ "state": { "type": "string", "enum": [ - "enabled" + "v2" ] }, "transient_boot_preference": { @@ -2711,6 +2746,92 @@ "message", "state" ] + }, + { + "type": "object", + "properties": { + "active": { + "$ref": "#/components/schemas/RotSlot" + }, + "pending_persistent_boot_preference": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotSlot" + } + ] + }, + "persistent_boot_preference": { + "$ref": "#/components/schemas/RotSlot" + }, + "slot_a_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "slot_a_fwid": { + "type": "string" + }, + "slot_b_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "slot_b_fwid": { + "type": "string" + }, + "stage0_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "stage0_fwid": { + "type": "string" + }, + "stage0next_error": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotImageError" + } + ] + }, + "stage0next_fwid": { + "type": "string" + }, + "state": { + "type": "string", + "enum": [ + "v3" + ] + }, + "transient_boot_preference": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RotSlot" + } + ] + } + }, + "required": [ + "active", + "persistent_boot_preference", + "slot_a_fwid", + "slot_b_fwid", + "stage0_fwid", + "stage0next_fwid", + "state" + ] } ] }, diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json index 286ac63405..019e05b494 100644 --- a/oximeter/collector/tests/output/self-stat-schema.json +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -39,7 +39,7 @@ } ], "datum_type": "cumulative_u64", - "created": "2024-05-21T18:32:24.199619581Z" + "created": "2024-06-04T20:49:05.675711686Z" }, "oximeter_collector:failed_collections": { "timeseries_name": "oximeter_collector:failed_collections", @@ -86,6 +86,6 @@ } ], "datum_type": "cumulative_u64", - "created": "2024-05-21T18:32:24.200514936Z" + "created": "2024-06-04T20:49:05.676050088Z" } } \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index cf6bc2bf53..b759f86f1b 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2912,6 +2912,22 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_service_processor ( PRIMARY KEY (inv_collection_id, hw_baseboard_id) ); +CREATE TYPE IF NOT EXISTS omicron.public.rot_image_error AS ENUM ( + 'unchecked', + 'first_page_erased', + 'partially_programmed', + 'invalid_length', + 'header_not_programmed', + 'bootloader_too_small', + 'bad_magic', + 'header_image_size', + 'unaligned_length', + 'unsupported_type', + 'not_thumb2', + 'reset_vector', + 'signature' +); + -- root of trust information reported by SP -- There's usually one row here for each row in inv_service_processor, but not -- necessarily. @@ -2933,6 +2949,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_root_of_trust ( slot_boot_pref_persistent_pending omicron.public.hw_rot_slot, -- nullable slot_a_sha3_256 TEXT, -- nullable slot_b_sha3_256 TEXT, -- nullable + stage0_fwid TEXT, -- nullable + stage0next_fwid TEXT, -- nullable + + slot_a_error omicron.public.rot_image_error, -- nullable + slot_b_error omicron.public.rot_image_error, -- nullable + stage0_error omicron.public.rot_image_error, -- nullable + stage0next_error omicron.public.rot_image_error, -- nullable PRIMARY KEY (inv_collection_id, hw_baseboard_id) ); @@ -2941,7 +2964,9 @@ CREATE TYPE IF NOT EXISTS omicron.public.caboose_which AS ENUM ( 'sp_slot_0', 'sp_slot_1', 'rot_slot_A', - 'rot_slot_B' + 'rot_slot_B', + 'stage0', + 'stage0next' ); -- cabooses found @@ -4020,7 +4045,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '68.0.0', NULL) + (TRUE, NOW(), NOW(), '69.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/expose-stage0/up01.sql b/schema/crdb/expose-stage0/up01.sql new file mode 100644 index 0000000000..f36e2c11e1 --- /dev/null +++ b/schema/crdb/expose-stage0/up01.sql @@ -0,0 +1,17 @@ +-- Create the `rot_image_error` type +CREATE TYPE IF NOT EXISTS omicron.public.rot_image_error AS ENUM ( + 'unchecked', + 'first_page_erased', + 'partially_programmed', + 'invalid_length', + 'header_not_programmed', + 'bootloader_too_small', + 'bad_magic', + 'header_image_size', + 'unaligned_length', + 'unsupported_type', + 'not_thumb2', + 'reset_vector', + 'signature' +); + diff --git a/schema/crdb/expose-stage0/up02.sql b/schema/crdb/expose-stage0/up02.sql new file mode 100644 index 0000000000..389b2ef7a6 --- /dev/null +++ b/schema/crdb/expose-stage0/up02.sql @@ -0,0 +1,8 @@ +-- Add new fields for stage0. These can all correctly be NULL +ALTER TABLE omicron.inv_root_of_trust + ADD COLUMN IF NOT EXISTS stage0_fwid TEXT, + ADD COLUMN IF NOT EXISTS stage0next_fwid TEXT, + ADD COLUMN IF NOT EXISTS slot_a_error omicron.public.rot_image_error, + ADD COLUMN IF NOT EXISTS slot_b_error omicron.public.rot_image_error, + ADD COLUMN IF NOT EXISTS stage0_error omicron.public.rot_image_error, + ADD COLUMN IF NOT EXISTS stage0next_error omicron.public.rot_image_error; diff --git a/schema/crdb/expose-stage0/up03.sql b/schema/crdb/expose-stage0/up03.sql new file mode 100644 index 0000000000..5f12436af3 --- /dev/null +++ b/schema/crdb/expose-stage0/up03.sql @@ -0,0 +1,2 @@ +-- add stage0/stage0next to the caboose targets +ALTER TYPE omicron.public.caboose_which ADD VALUE IF NOT EXISTS 'stage0'; diff --git a/schema/crdb/expose-stage0/up04.sql b/schema/crdb/expose-stage0/up04.sql new file mode 100644 index 0000000000..c72f43560f --- /dev/null +++ b/schema/crdb/expose-stage0/up04.sql @@ -0,0 +1,2 @@ +-- add stage0/stage0next to the caboose targets +ALTER TYPE omicron.public.caboose_which ADD VALUE IF NOT EXISTS 'stage0next'; diff --git a/smf/sp-sim/config.toml b/smf/sp-sim/config.toml index 9766be2f5c..eda37959cd 100644 --- a/smf/sp-sim/config.toml +++ b/smf/sp-sim/config.toml @@ -21,7 +21,7 @@ device_id_cert_seed = "01de00000000000000000000000000000000000000000000000000000 id = "sp3-host-cpu" device = "sp3-host-cpu" description = "FAKE host cpu" -capabilities.bits = 0 +capabilities = 0 presence = "Present" serial_console = "[::1]:33312" @@ -36,7 +36,7 @@ device_id_cert_seed = "01de00000000000000000000000000000000000000000000000000000 id = "sp3-host-cpu" device = "sp3-host-cpu" description = "FAKE host cpu" -capabilities.bits = 0 +capabilities = 0 presence = "Present" serial_console = "[::1]:33322" diff --git a/sp-sim/examples/config.toml b/sp-sim/examples/config.toml index 9766be2f5c..cf338ecf2e 100644 --- a/sp-sim/examples/config.toml +++ b/sp-sim/examples/config.toml @@ -9,7 +9,6 @@ serial_number = "SimSidecar0" manufacturing_root_cert_seed = "01de01de01de01de01de01de01de01de01de01de01de01de01de01de01de01de" device_id_cert_seed = "01de000000000000000000000000000000000000000000000000000000000000" - [[simulated_sps.gimlet]] multicast_addr = "ff15:0:1de::1" bind_addrs = ["[::]:33310", "[::]:33311"] @@ -21,7 +20,7 @@ device_id_cert_seed = "01de00000000000000000000000000000000000000000000000000000 id = "sp3-host-cpu" device = "sp3-host-cpu" description = "FAKE host cpu" -capabilities.bits = 0 +capabilities = 0 presence = "Present" serial_console = "[::1]:33312" @@ -36,7 +35,7 @@ device_id_cert_seed = "01de00000000000000000000000000000000000000000000000000000 id = "sp3-host-cpu" device = "sp3-host-cpu" description = "FAKE host cpu" -capabilities.bits = 0 +capabilities = 0 presence = "Present" serial_console = "[::1]:33322" diff --git a/sp-sim/src/config.rs b/sp-sim/src/config.rs index f2b6336953..b64953e5ed 100644 --- a/sp-sim/src/config.rs +++ b/sp-sim/src/config.rs @@ -36,6 +36,13 @@ pub struct SpCommonConfig { /// Fake components. #[serde(skip_serializing_if = "Vec::is_empty", default)] pub components: Vec, + /// Return errors for `versioned_rot_boot_info` simulating + /// an older RoT + #[serde(default)] + pub old_rot_state: bool, + /// Simulate a RoT stage0 with no caboose + #[serde(default)] + pub no_stage0_caboose: bool, } /// Configuration of a simulated SP component diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 0c109c1bd7..280248d034 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -25,6 +25,7 @@ use gateway_messages::sp_impl::{BoundsChecked, DeviceDescription}; use gateway_messages::CfpaPage; use gateway_messages::ComponentAction; use gateway_messages::Header; +use gateway_messages::RotBootInfo; use gateway_messages::RotRequest; use gateway_messages::RotResponse; use gateway_messages::RotSlotId; @@ -56,6 +57,19 @@ use tokio::sync::Mutex as TokioMutex; use tokio::task::{self, JoinHandle}; pub const SIM_GIMLET_BOARD: &str = "SimGimletSp"; +const SP_GITC0: &[u8] = b"ffffffff"; +const SP_GITC1: &[u8] = b"fefefefe"; +const SP_BORD: &[u8] = SIM_GIMLET_BOARD.as_bytes(); +const SP_NAME: &[u8] = b"SimGimlet"; +const SP_VERS0: &[u8] = b"0.0.2"; +const SP_VERS1: &[u8] = b"0.0.1"; + +const ROT_GITC0: &[u8] = b"eeeeeeee"; +const ROT_GITC1: &[u8] = b"edededed"; +const ROT_BORD: &[u8] = SIM_ROT_BOARD.as_bytes(); +const ROT_NAME: &[u8] = b"SimGimletRot"; +const ROT_VERS0: &[u8] = b"0.0.4"; +const ROT_VERS1: &[u8] = b"0.0.3"; /// Type of request most recently handled by a simulated SP. /// @@ -278,6 +292,8 @@ impl Gimlet { commands_rx, Arc::clone(&last_request_handled), log, + gimlet.common.old_rot_state, + gimlet.common.no_stage0_caboose, ); inner_tasks .push(task::spawn(async move { inner.run().await.unwrap() })); @@ -498,6 +514,8 @@ impl UdpTask { commands: mpsc::UnboundedReceiver, last_request_handled: Arc>>, log: Logger, + old_rot_state: bool, + no_stage0_caboose: bool, ) -> (Self, Arc>, watch::Receiver) { let [udp0, udp1] = servers; let handler = Arc::new(TokioMutex::new(Handler::new( @@ -506,6 +524,8 @@ impl UdpTask { attached_mgs, incoming_serial_console, log, + old_rot_state, + no_stage0_caboose, ))); let responses_sent_count = watch::Sender::new(0); let responses_sent_count_rx = responses_sent_count.subscribe(); @@ -642,6 +662,8 @@ struct Handler { // this, our caller will pass us a function to call if they should ignore // whatever result we return and fail to respond at all. should_fail_to_respond_signal: Option>, + no_stage0_caboose: bool, + old_rot_state: bool, } impl Handler { @@ -651,6 +673,8 @@ impl Handler { attached_mgs: Arc>>, incoming_serial_console: HashMap>>, log: Logger, + old_rot_state: bool, + no_stage0_caboose: bool, ) -> Self { let mut leaked_component_device_strings = Vec::with_capacity(components.len()); @@ -679,6 +703,8 @@ impl Handler { reset_pending: None, last_request_handled: None, should_fail_to_respond_signal: None, + old_rot_state, + no_stage0_caboose, } } @@ -701,8 +727,8 @@ impl Handler { persistent_boot_preference: RotSlotId::A, pending_persistent_boot_preference: None, transient_boot_preference: None, - slot_a_sha3_256_digest: None, - slot_b_sha3_256_digest: None, + slot_a_sha3_256_digest: Some([0x55; 32]), + slot_b_sha3_256_digest: Some([0x66; 32]), }), } } @@ -1389,29 +1415,66 @@ impl SpHandler for Handler { fn get_component_caboose_value( &mut self, component: SpComponent, - _slot: u16, + slot: u16, key: [u8; 4], buf: &mut [u8], ) -> std::result::Result { - static SP_GITC: &[u8] = b"ffffffff"; - static SP_BORD: &[u8] = SIM_GIMLET_BOARD.as_bytes(); - static SP_NAME: &[u8] = b"SimGimlet"; - static SP_VERS: &[u8] = b"0.0.1"; - - static ROT_GITC: &[u8] = b"eeeeeeee"; - static ROT_BORD: &[u8] = SIM_ROT_BOARD.as_bytes(); - static ROT_NAME: &[u8] = b"SimGimlet"; - static ROT_VERS: &[u8] = b"0.0.1"; - - let val = match (component, &key) { - (SpComponent::SP_ITSELF, b"GITC") => SP_GITC, - (SpComponent::SP_ITSELF, b"BORD") => SP_BORD, - (SpComponent::SP_ITSELF, b"NAME") => SP_NAME, - (SpComponent::SP_ITSELF, b"VERS") => SP_VERS, - (SpComponent::ROT, b"GITC") => ROT_GITC, - (SpComponent::ROT, b"BORD") => ROT_BORD, - (SpComponent::ROT, b"NAME") => ROT_NAME, - (SpComponent::ROT, b"VERS") => ROT_VERS, + use crate::SIM_ROT_STAGE0_BOARD; + + const STAGE0_GITC0: &[u8] = b"ddddddddd"; + const STAGE0_GITC1: &[u8] = b"dadadadad"; + const STAGE0_BORD: &[u8] = SIM_ROT_STAGE0_BOARD.as_bytes(); + const STAGE0_NAME: &[u8] = b"SimGimletRot"; + const STAGE0_VERS0: &[u8] = b"0.0.200"; + const STAGE0_VERS1: &[u8] = b"0.0.200"; + + let val = match (component, &key, slot, self.no_stage0_caboose) { + (SpComponent::SP_ITSELF, b"GITC", 0, _) => SP_GITC0, + (SpComponent::SP_ITSELF, b"GITC", 1, _) => SP_GITC1, + (SpComponent::SP_ITSELF, b"BORD", _, _) => SP_BORD, + (SpComponent::SP_ITSELF, b"NAME", _, _) => SP_NAME, + (SpComponent::SP_ITSELF, b"VERS", 0, _) => SP_VERS0, + (SpComponent::SP_ITSELF, b"VERS", 1, _) => SP_VERS1, + (SpComponent::ROT, b"GITC", 0, _) => ROT_GITC0, + (SpComponent::ROT, b"GITC", 1, _) => ROT_GITC1, + (SpComponent::ROT, b"BORD", _, _) => ROT_BORD, + (SpComponent::ROT, b"NAME", _, _) => ROT_NAME, + (SpComponent::ROT, b"VERS", 0, _) => ROT_VERS0, + (SpComponent::ROT, b"VERS", 1, _) => ROT_VERS1, + (SpComponent::STAGE0, b"GITC", 0, false) => STAGE0_GITC0, + (SpComponent::STAGE0, b"GITC", 1, false) => STAGE0_GITC1, + (SpComponent::STAGE0, b"BORD", _, false) => STAGE0_BORD, + (SpComponent::STAGE0, b"NAME", _, false) => STAGE0_NAME, + (SpComponent::STAGE0, b"VERS", 0, false) => STAGE0_VERS0, + (SpComponent::STAGE0, b"VERS", 1, false) => STAGE0_VERS1, + _ => return Err(SpError::NoSuchCabooseKey(key)), + }; + + buf[..val.len()].copy_from_slice(val); + Ok(val.len()) + } + + #[cfg(any(feature = "no-caboose", feature = "old-state"))] + fn get_component_caboose_value( + &mut self, + component: SpComponent, + slot: u16, + key: [u8; 4], + buf: &mut [u8], + ) -> std::result::Result { + let val = match (component, &key, slot) { + (SpComponent::SP_ITSELF, b"GITC", 0) => SP_GITC0, + (SpComponent::SP_ITSELF, b"GITC", 1) => SP_GITC1, + (SpComponent::SP_ITSELF, b"BORD", _) => SP_BORD, + (SpComponent::SP_ITSELF, b"NAME", _) => SP_NAME, + (SpComponent::SP_ITSELF, b"VERS", 0) => SP_VERS0, + (SpComponent::SP_ITSELF, b"VERS", 1) => SP_VERS1, + (SpComponent::ROT, b"GITC", 0) => ROT_GITC0, + (SpComponent::ROT, b"GITC", 1) => ROT_GITC1, + (SpComponent::ROT, b"BORD", _) => ROT_BORD, + (SpComponent::ROT, b"NAME", _) => ROT_NAME, + (SpComponent::ROT, b"VERS", 0) => ROT_VERS0, + (SpComponent::ROT, b"VERS", 1) => ROT_VERS1, _ => return Err(SpError::NoSuchCabooseKey(key)), }; @@ -1445,6 +1508,114 @@ impl SpHandler for Handler { buf[dummy_page.len()..].fill(0); Ok(RotResponse::Ok) } + + fn vpd_lock_status_all( + &mut self, + _buf: &mut [u8], + ) -> Result { + Err(SpError::RequestUnsupportedForSp) + } + + fn reset_component_trigger_with_watchdog( + &mut self, + component: SpComponent, + _time_ms: u32, + ) -> Result<(), SpError> { + debug!( + &self.log, "received reset trigger with watchdog request"; + "component" => ?component, + ); + if component == SpComponent::SP_ITSELF { + if self.reset_pending == Some(SpComponent::SP_ITSELF) { + self.update_state.sp_reset(); + self.reset_pending = None; + if let Some(signal) = self.should_fail_to_respond_signal.take() + { + // Instruct `server::handle_request()` to _not_ respond to + // this request at all, simulating an SP actually resetting. + signal(); + } + Ok(()) + } else { + Err(SpError::ResetComponentTriggerWithoutPrepare) + } + } else if component == SpComponent::ROT { + if self.reset_pending == Some(SpComponent::ROT) { + self.update_state.rot_reset(); + self.reset_pending = None; + Ok(()) + } else { + Err(SpError::ResetComponentTriggerWithoutPrepare) + } + } else { + Err(SpError::RequestUnsupportedForComponent) + } + } + + fn disable_component_watchdog( + &mut self, + _component: SpComponent, + ) -> Result<(), SpError> { + Ok(()) + } + fn component_watchdog_supported( + &mut self, + _component: SpComponent, + ) -> Result<(), SpError> { + Ok(()) + } + + fn versioned_rot_boot_info( + &mut self, + _sender: SocketAddrV6, + _port: SpPort, + version: u8, + ) -> Result { + if self.old_rot_state { + Err(SpError::RequestUnsupportedForSp) + } else { + const SLOT_A_DIGEST: [u8; 32] = [0xaa; 32]; + const SLOT_B_DIGEST: [u8; 32] = [0xbb; 32]; + const STAGE0_DIGEST: [u8; 32] = [0xcc; 32]; + const STAGE0NEXT_DIGEST: [u8; 32] = [0xdd; 32]; + + match version { + 0 => Err(SpError::Update( + gateway_messages::UpdateError::VersionNotSupported, + )), + 1 => Ok(RotBootInfo::V2(gateway_messages::RotStateV2 { + active: RotSlotId::A, + persistent_boot_preference: RotSlotId::A, + pending_persistent_boot_preference: None, + transient_boot_preference: None, + slot_a_sha3_256_digest: Some(SLOT_A_DIGEST), + slot_b_sha3_256_digest: Some(SLOT_B_DIGEST), + })), + _ => Ok(RotBootInfo::V3(gateway_messages::RotStateV3 { + active: RotSlotId::A, + persistent_boot_preference: RotSlotId::A, + pending_persistent_boot_preference: None, + transient_boot_preference: None, + slot_a_fwid: gateway_messages::Fwid::Sha3_256( + SLOT_A_DIGEST, + ), + slot_b_fwid: gateway_messages::Fwid::Sha3_256( + SLOT_B_DIGEST, + ), + stage0_fwid: gateway_messages::Fwid::Sha3_256( + STAGE0_DIGEST, + ), + stage0next_fwid: gateway_messages::Fwid::Sha3_256( + STAGE0NEXT_DIGEST, + ), + slot_a_status: Ok(()), + slot_b_status: Ok(()), + stage0_status: Ok(()), + stage0next_status: Ok(()), + })), + } + } + } } impl SimSpHandler for Handler { diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 8a8418b84d..ca9231bec0 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -30,6 +30,7 @@ use tokio::sync::mpsc; use tokio::sync::watch; pub const SIM_ROT_BOARD: &str = "SimRot"; +pub const SIM_ROT_STAGE0_BOARD: &str = "SimRotStage0"; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Responsiveness { diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index 1bd6fe4964..696989f791 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -17,6 +17,7 @@ use crate::update::SimSpUpdate; use crate::Responsiveness; use crate::SimulatedSp; use crate::SIM_ROT_BOARD; +use crate::SIM_ROT_STAGE0_BOARD; use anyhow::Result; use async_trait::async_trait; use futures::future; @@ -35,6 +36,7 @@ use gateway_messages::IgnitionCommand; use gateway_messages::IgnitionState; use gateway_messages::MgsError; use gateway_messages::PowerState; +use gateway_messages::RotBootInfo; use gateway_messages::RotRequest; use gateway_messages::RotResponse; use gateway_messages::RotSlotId; @@ -206,6 +208,8 @@ impl Sidecar { FakeIgnition::new(&config.simulated_sps), commands_rx, log, + sidecar.common.old_rot_state, + sidecar.common.no_stage0_caboose, ); let inner_task = task::spawn(async move { inner.run().await.unwrap() }); @@ -262,6 +266,7 @@ struct Inner { } impl Inner { + #[allow(clippy::too_many_arguments)] fn new( servers: [UdpServer; 2], components: Vec, @@ -269,6 +274,8 @@ impl Inner { ignition: FakeIgnition, commands: mpsc::UnboundedReceiver, log: Logger, + old_rot_state: bool, + no_stage0_caboose: bool, ) -> (Self, Arc>, watch::Receiver) { let [udp0, udp1] = servers; let handler = Arc::new(TokioMutex::new(Handler::new( @@ -276,6 +283,8 @@ impl Inner { components, ignition, log, + old_rot_state, + no_stage0_caboose, ))); let responses_sent_count = watch::Sender::new(0); let responses_sent_count_rx = responses_sent_count.subscribe(); @@ -406,6 +415,8 @@ struct Handler { // this, our caller will pass us a function to call if they should ignore // whatever result we return and fail to respond at all. should_fail_to_respond_signal: Option>, + no_stage0_caboose: bool, + old_rot_state: bool, } impl Handler { @@ -414,6 +425,8 @@ impl Handler { components: Vec, ignition: FakeIgnition, log: Logger, + old_rot_state: bool, + no_stage0_caboose: bool, ) -> Self { let mut leaked_component_device_strings = Vec::with_capacity(components.len()); @@ -439,6 +452,8 @@ impl Handler { update_state: SimSpUpdate::default(), reset_pending: None, should_fail_to_respond_signal: None, + old_rot_state, + no_stage0_caboose, } } @@ -1118,29 +1133,50 @@ impl SpHandler for Handler { fn get_component_caboose_value( &mut self, component: SpComponent, - _slot: u16, + slot: u16, key: [u8; 4], buf: &mut [u8], ) -> std::result::Result { - static SP_GITC: &[u8] = b"ffffffff"; + static SP_GITC0: &[u8] = b"ffffffff"; + static SP_GITC1: &[u8] = b"fefefefe"; static SP_BORD: &[u8] = SIM_SIDECAR_BOARD.as_bytes(); static SP_NAME: &[u8] = b"SimSidecar"; - static SP_VERS: &[u8] = b"0.0.1"; + static SP_VERS0: &[u8] = b"0.0.2"; + static SP_VERS1: &[u8] = b"0.0.1"; - static ROT_GITC: &[u8] = b"eeeeeeee"; + static ROT_GITC0: &[u8] = b"eeeeeeee"; + static ROT_GITC1: &[u8] = b"edededed"; static ROT_BORD: &[u8] = SIM_ROT_BOARD.as_bytes(); static ROT_NAME: &[u8] = b"SimSidecar"; - static ROT_VERS: &[u8] = b"0.0.1"; - - let val = match (component, &key) { - (SpComponent::SP_ITSELF, b"GITC") => SP_GITC, - (SpComponent::SP_ITSELF, b"BORD") => SP_BORD, - (SpComponent::SP_ITSELF, b"NAME") => SP_NAME, - (SpComponent::SP_ITSELF, b"VERS") => SP_VERS, - (SpComponent::ROT, b"GITC") => ROT_GITC, - (SpComponent::ROT, b"BORD") => ROT_BORD, - (SpComponent::ROT, b"NAME") => ROT_NAME, - (SpComponent::ROT, b"VERS") => ROT_VERS, + static ROT_VERS0: &[u8] = b"0.0.4"; + static ROT_VERS1: &[u8] = b"0.0.3"; + + static STAGE0_GITC0: &[u8] = b"dddddddd"; + static STAGE0_GITC1: &[u8] = b"dadadada"; + static STAGE0_BORD: &[u8] = SIM_ROT_STAGE0_BOARD.as_bytes(); + static STAGE0_NAME: &[u8] = b"SimSidecar"; + static STAGE0_VERS0: &[u8] = b"0.0.200"; + static STAGE0_VERS1: &[u8] = b"0.0.200"; + + let val = match (component, &key, slot, self.no_stage0_caboose) { + (SpComponent::SP_ITSELF, b"GITC", 0, _) => SP_GITC0, + (SpComponent::SP_ITSELF, b"GITC", 1, _) => SP_GITC1, + (SpComponent::SP_ITSELF, b"BORD", _, _) => SP_BORD, + (SpComponent::SP_ITSELF, b"NAME", _, _) => SP_NAME, + (SpComponent::SP_ITSELF, b"VERS", 0, _) => SP_VERS0, + (SpComponent::SP_ITSELF, b"VERS", 1, _) => SP_VERS1, + (SpComponent::ROT, b"GITC", 0, _) => ROT_GITC0, + (SpComponent::ROT, b"GITC", 1, _) => ROT_GITC1, + (SpComponent::ROT, b"BORD", _, _) => ROT_BORD, + (SpComponent::ROT, b"NAME", _, _) => ROT_NAME, + (SpComponent::ROT, b"VERS", 0, _) => ROT_VERS0, + (SpComponent::ROT, b"VERS", 1, _) => ROT_VERS1, + (SpComponent::STAGE0, b"GITC", 0, false) => STAGE0_GITC0, + (SpComponent::STAGE0, b"GITC", 1, false) => STAGE0_GITC1, + (SpComponent::STAGE0, b"BORD", _, false) => STAGE0_BORD, + (SpComponent::STAGE0, b"NAME", _, false) => STAGE0_NAME, + (SpComponent::STAGE0, b"VERS", 0, false) => STAGE0_VERS0, + (SpComponent::STAGE0, b"VERS", 1, false) => STAGE0_VERS1, _ => return Err(SpError::NoSuchCabooseKey(key)), }; @@ -1174,6 +1210,114 @@ impl SpHandler for Handler { buf[dummy_page.len()..].fill(0); Ok(RotResponse::Ok) } + + fn vpd_lock_status_all( + &mut self, + _buf: &mut [u8], + ) -> Result { + Err(SpError::RequestUnsupportedForSp) + } + + fn reset_component_trigger_with_watchdog( + &mut self, + component: SpComponent, + _time_ms: u32, + ) -> Result<(), SpError> { + debug!( + &self.log, "received sys-reset trigger with wathcdog request"; + "component" => ?component, + ); + if component == SpComponent::SP_ITSELF { + if self.reset_pending == Some(SpComponent::SP_ITSELF) { + self.update_state.sp_reset(); + self.reset_pending = None; + if let Some(signal) = self.should_fail_to_respond_signal.take() + { + // Instruct `server::handle_request()` to _not_ respond to + // this request at all, simulating an SP actually resetting. + signal(); + } + Ok(()) + } else { + Err(SpError::ResetComponentTriggerWithoutPrepare) + } + } else if component == SpComponent::ROT { + if self.reset_pending == Some(SpComponent::ROT) { + self.update_state.rot_reset(); + self.reset_pending = None; + Ok(()) + } else { + Err(SpError::ResetComponentTriggerWithoutPrepare) + } + } else { + Err(SpError::RequestUnsupportedForComponent) + } + } + + fn disable_component_watchdog( + &mut self, + _component: SpComponent, + ) -> Result<(), SpError> { + Ok(()) + } + fn component_watchdog_supported( + &mut self, + _component: SpComponent, + ) -> Result<(), SpError> { + Ok(()) + } + + fn versioned_rot_boot_info( + &mut self, + _sender: SocketAddrV6, + _port: SpPort, + version: u8, + ) -> Result { + if self.old_rot_state { + Err(SpError::RequestUnsupportedForSp) + } else { + const SLOT_A_DIGEST: [u8; 32] = [0xaa; 32]; + const SLOT_B_DIGEST: [u8; 32] = [0xbb; 32]; + const STAGE0_DIGEST: [u8; 32] = [0xcc; 32]; + const STAGE0NEXT_DIGEST: [u8; 32] = [0xdd; 32]; + + match version { + 0 => Err(SpError::Update( + gateway_messages::UpdateError::VersionNotSupported, + )), + 1 => Ok(RotBootInfo::V2(gateway_messages::RotStateV2 { + active: RotSlotId::A, + persistent_boot_preference: RotSlotId::A, + pending_persistent_boot_preference: None, + transient_boot_preference: None, + slot_a_sha3_256_digest: Some(SLOT_A_DIGEST), + slot_b_sha3_256_digest: Some(SLOT_B_DIGEST), + })), + _ => Ok(RotBootInfo::V3(gateway_messages::RotStateV3 { + active: RotSlotId::A, + persistent_boot_preference: RotSlotId::A, + pending_persistent_boot_preference: None, + transient_boot_preference: None, + slot_a_fwid: gateway_messages::Fwid::Sha3_256( + SLOT_A_DIGEST, + ), + slot_b_fwid: gateway_messages::Fwid::Sha3_256( + SLOT_B_DIGEST, + ), + stage0_fwid: gateway_messages::Fwid::Sha3_256( + STAGE0_DIGEST, + ), + stage0next_fwid: gateway_messages::Fwid::Sha3_256( + STAGE0NEXT_DIGEST, + ), + slot_a_status: Ok(()), + slot_b_status: Ok(()), + stage0_status: Ok(()), + stage0next_status: Ok(()), + })), + } + } + } } impl SimSpHandler for Handler { diff --git a/wicket/src/ui/panes/overview.rs b/wicket/src/ui/panes/overview.rs index f2d4d4a7ab..7d60c41772 100644 --- a/wicket/src/ui/panes/overview.rs +++ b/wicket/src/ui/panes/overview.rs @@ -675,7 +675,255 @@ fn inventory_description(component: &Component) -> Text { let mut label = vec![Span::styled("Root of Trust: ", label_style)]; if let Some(rot) = sp.state().map(|sp| &sp.rot) { match rot { - RotState::Enabled { + RotState::V3 { + active, + pending_persistent_boot_preference, + persistent_boot_preference, + slot_a_fwid, + slot_b_fwid, + stage0_fwid, + stage0next_fwid, + transient_boot_preference, + slot_a_error, + slot_b_error, + stage0_error, + stage0next_error, + } => { + spans.push(label.into()); + spans.push( + vec![ + bullet(), + Span::styled("Active Slot: ", label_style), + Span::styled(format!("{active:?}"), ok_style), + ] + .into(), + ); + spans.push( + vec![ + bullet(), + Span::styled( + "Persistent Boot Preference: ", + label_style, + ), + Span::styled( + format!("{persistent_boot_preference:?}"), + ok_style, + ), + ] + .into(), + ); + spans.push( + vec![ + bullet(), + Span::styled( + "Pending Persistent Boot Preference: ", + label_style, + ), + Span::styled( + match pending_persistent_boot_preference.as_ref() { + Some(pref) => Cow::from(format!("{pref:?}")), + None => Cow::from("None"), + }, + ok_style, + ), + ] + .into(), + ); + spans.push( + vec![ + bullet(), + Span::styled( + "Transient Boot Preference: ", + label_style, + ), + Span::styled( + match transient_boot_preference.as_ref() { + Some(pref) => Cow::from(format!("{pref:?}")), + None => Cow::from("None"), + }, + ok_style, + ), + ] + .into(), + ); + spans.push( + vec![bullet(), Span::styled("Slot A:", label_style)].into(), + ); + spans.push( + vec![ + nest_bullet(), + Span::styled("Image SHA3-256: ", label_style), + Span::styled(slot_a_fwid.clone(), ok_style), + ] + .into(), + ); + if let Some(caboose) = + sp.rot().and_then(|r| r.caboose_a.as_ref()) + { + append_caboose(&mut spans, nest_bullet(), caboose); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("No further information", warn_style), + ] + .into(), + ); + } + if let Some(_) = slot_a_error { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Error: ", bad_style), + ] + .into(), + ); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Status Good", ok_style), + ] + .into(), + ); + } + spans.push( + vec![bullet(), Span::styled("Slot B:", label_style)].into(), + ); + spans.push( + vec![ + nest_bullet(), + Span::styled("Image SHA3-256: ", label_style), + Span::styled(slot_b_fwid.clone(), ok_style), + ] + .into(), + ); + if let Some(caboose) = + sp.rot().and_then(|r| r.caboose_b.as_ref()) + { + append_caboose(&mut spans, nest_bullet(), caboose); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("No further information", warn_style), + ] + .into(), + ); + } + if let Some(_) = slot_b_error { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Error: ", bad_style), + ] + .into(), + ); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Status Good", ok_style), + ] + .into(), + ); + } + + spans.push( + vec![bullet(), Span::styled("Stage0:", label_style)].into(), + ); + spans.push( + vec![ + nest_bullet(), + Span::styled("Image SHA3-256: ", label_style), + Span::styled(stage0_fwid.clone(), ok_style), + ] + .into(), + ); + if let Some(caboose) = + sp.rot().and_then(|r| r.caboose_stage0.as_ref()) + { + append_caboose(&mut spans, nest_bullet(), caboose); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("No further information", warn_style), + ] + .into(), + ); + } + if let Some(_) = stage0_error { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Error: ", bad_style), + ] + .into(), + ); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Status Good", ok_style), + ] + .into(), + ); + } + + spans.push( + vec![bullet(), Span::styled("Stage0Next:", label_style)] + .into(), + ); + spans.push( + vec![ + nest_bullet(), + Span::styled("Image SHA3-256: ", label_style), + Span::styled(stage0next_fwid.clone(), ok_style), + ] + .into(), + ); + if let Some(caboose) = + sp.rot().and_then(|r| r.caboose_stage0next.as_ref()) + { + append_caboose(&mut spans, nest_bullet(), caboose); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("No further information", warn_style), + ] + .into(), + ); + } + if let Some(_) = stage0next_error { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Error: ", bad_style), + ] + .into(), + ); + } else { + spans.push( + vec![ + nest_bullet(), + Span::styled("Image status: ", label_style), + Span::styled("Status Good", ok_style), + ] + .into(), + ); + } + } + + RotState::V2 { active, pending_persistent_boot_preference, persistent_boot_preference, diff --git a/wicketd/src/inventory.rs b/wicketd/src/inventory.rs index f6bf5c2984..e1465147b5 100644 --- a/wicketd/src/inventory.rs +++ b/wicketd/src/inventory.rs @@ -48,6 +48,10 @@ pub struct RotInventory { pub active: RotSlot, pub caboose_a: Option, pub caboose_b: Option, + // stage0 information is not available on all RoT versions + // `None` indicates we don't need to read + pub caboose_stage0: Option>, + pub caboose_stage0next: Option>, } /// The current state of the v1 Rack as known to wicketd diff --git a/wicketd/src/mgs/inventory.rs b/wicketd/src/mgs/inventory.rs index 5334c08a40..a9805b4c1d 100644 --- a/wicketd/src/mgs/inventory.rs +++ b/wicketd/src/mgs/inventory.rs @@ -316,11 +316,22 @@ async fn sp_fetching_task( if rot.is_none() || prev_state.as_ref() != Some(&state) { match &state.rot { - RotState::Enabled { active, .. } => { + RotState::V2 { active, .. } => { rot = Some(RotInventory { active: *active, caboose_a: None, caboose_b: None, + caboose_stage0: None, + caboose_stage0next: None, + }); + } + RotState::V3 { active, .. } => { + rot = Some(RotInventory { + active: *active, + caboose_a: None, + caboose_b: None, + caboose_stage0: Some(None), + caboose_stage0next: Some(None), }); } RotState::CommunicationFailed { message } => { @@ -456,6 +467,60 @@ async fn sp_fetching_task( } }; } + + if let Some(v) = &rot.caboose_stage0 { + if prev_state.as_ref() != Some(&state) || v.is_none() { + rot.caboose_stage0 = match mgs_client + .sp_component_caboose_get( + id.type_, + id.slot, + SpComponent::STAGE0.const_as_str(), + 0, + ) + .await + { + Ok(response) => { + mgs_received = Instant::now(); + Some(Some(response.into_inner())) + } + Err(err) => { + warn!( + log, "Failed to get RoT caboose (stage0) for sp"; + "sp" => ?id, + "err" => %err, + ); + Some(None) + } + }; + } + } + + if let Some(v) = &rot.caboose_stage0next { + if prev_state.as_ref() != Some(&state) || v.is_none() { + rot.caboose_stage0next = match mgs_client + .sp_component_caboose_get( + id.type_, + id.slot, + SpComponent::STAGE0.const_as_str(), + 1, + ) + .await + { + Ok(response) => { + mgs_received = Instant::now(); + Some(Some(response.into_inner())) + } + Err(err) => { + warn!( + log, "Failed to get RoT caboose (stage0next) for sp"; + "sp" => ?id, + "err" => %err, + ); + Some(None) + } + }; + } + } } let emit = FetchedSpData { diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 7880422c47..1b21b72495 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -53,7 +53,7 @@ futures-io = { version = "0.3.30", default-features = false, features = ["std"] futures-sink = { version = "0.3.30" } futures-task = { version = "0.3.30", default-features = false, features = ["std"] } futures-util = { version = "0.3.30", features = ["channel", "io", "sink"] } -gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } +gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "c85a4ca043aaa389df12aac5348d8a3feda28762", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.14", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } @@ -101,7 +101,6 @@ smallvec = { version = "1.13.2", default-features = false, features = ["const_ne spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } -syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.64", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.36", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.37.0", features = ["full", "test-util"] } @@ -158,7 +157,7 @@ futures-io = { version = "0.3.30", default-features = false, features = ["std"] futures-sink = { version = "0.3.30" } futures-task = { version = "0.3.30", default-features = false, features = ["std"] } futures-util = { version = "0.3.30", features = ["channel", "io", "sink"] } -gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", features = ["std"] } +gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "c85a4ca043aaa389df12aac5348d8a3feda28762", features = ["std"] } generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.14", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } From dbcc7547887859c48a20ea5694f34c8265a71c0a Mon Sep 17 00:00:00 2001 From: Daniel K <158104940+ubedan@users.noreply.github.com> Date: Wed, 5 Jun 2024 12:59:51 -0700 Subject: [PATCH 12/16] Update how-to-run.adoc (#5809) Fixed the external Nexus IP throughout the doc, and made the CLI section clearer. --------- Co-authored-by: iliana etaoin --- docs/how-to-run.adoc | 50 ++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index c904dca757..50c4b4e174 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -449,14 +449,16 @@ This won't be in public DNS, though. You'd need to be using the deployed system [source,console] ---- $ dig recovery.sys.oxide.test @192.168.1.20 +short -192.168.1.21 +192.168.1.22 +192.168.1.23 +192.168.1.24 ---- -Where did 192.168.1.20 come from? That's the external address of the external -DNS server. We knew that because it's listed in the `external_dns_ips` entry of +Where did 192.168.1.20 come from? That's an external address of the external +DNS server. We knew that because it's listed in the `external_dns_ips` array in the `config-rss.toml` file we're using. -Having looked this up, the easiest thing will be to use `http://192.168.1.21` for your URL (replacing with `https` if you used a certificate, and replacing that IP if needed). If you've set up networking right, you should be able to reach this from your web browser. You may have to instruct the browser to accept a self-signed TLS certificate. See also <>. +Having looked this up, the easiest thing will be to use `http://192.168.1.22` for your URL (replacing with `https` if you used a certificate, and replacing that IP if needed). If you've set up networking right, you should be able to reach this from your web browser. You may have to instruct the browser to accept a self-signed TLS certificate. See also <>. === Setting up an SSH tunnel for console access @@ -485,13 +487,32 @@ via something like: `127.0.0.1:1234`, using the port from the `ssh` command. === Using the CLI -Follow the instructions to set up the https://github.com/oxidecomputer/oxide.rs[Oxide CLI]. See the previous section to find the URL for the API. Then you can log in with: +Follow the instructions to set up the https://github.com/oxidecomputer/oxide.rs[Oxide CLI]. See the previous section to find the URL for the API. Then you can start the login flow with: [source,console] ---- -oxide auth login --host http://192.168.1.21 +$ oxide auth login --host http://192.168.1.22 + +Opened this URL in your browser: + http://192.168.1.22/device/verify + +Enter the code: CXKX-KPBK ---- +Assuming you haven't already logged in, this page will bring you to the recovery silo login. The username and password are defined in `config-rss.toml` and default to: + +``` +username: recovery +password: oxide +``` + +Once logged in, enter the 8-character code to complete the login flow. In a few moments the CLI should show you're logged in. + +[NOTE] +==== +If you're using an SSH tunnel, you will either need to change the `device/verify` URL (if running the CLI on the same host as the control plane) or the `--host` URL (if running the CLI on a different host) to point to your tunnel. In the previous section's example, the URL is `http://127.0.0.1:1234`. +==== + === Configure quotas for your silo Setting resource quotas is required before you can begin uploading images, provisioning instances, etc. @@ -522,7 +543,7 @@ An IP pool is needed to provide external connectivity to Instances. The address Here we will first create an ip pool for the recovery silo: [source,console] ---- +---- $ oxide api /v1/system/ip-pools --method POST --input - < Date: Wed, 5 Jun 2024 13:16:57 -0700 Subject: [PATCH 13/16] Split instance state enum into instance/VMM state enums (#5854) Split the existing `instance_state` enum in the CRDB schema into separate instance state and VMM state enums and remove unused enum variants. Instances now have five states: Creating, NoVmm, Vmm, Failed, and Destroyed. VMMs have most of the states they had before, except that the unused Creating and Repairing states have been removed. This change makes it easier to add new states (e.g. SagaUnwound, see #5848) that apply only to instances/VMMs without having to update code that's working with the other type of DB record. Update the routines the IP attach/detach sagas use to decide where (if anywhere) to dispatch IP change messages. These routines were looking for instance states that weren't actually being used (the states of interest appear in the instances' active VMMs instead). Add a data migration test to make sure that the Stopped and Running instance states are converted as expected. Tests: `cargo nextest`. --- clients/nexus-client/src/lib.rs | 80 ++++------ clients/sled-agent-client/src/lib.rs | 62 ++++---- common/src/api/external/mod.rs | 16 ++ common/src/api/internal/nexus.rs | 29 +++- nexus/db-model/src/instance.rs | 24 ++- nexus/db-model/src/instance_state.rs | 66 ++++----- nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 6 +- nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/sled_instance.rs | 6 +- nexus/db-model/src/vmm.rs | 22 ++- nexus/db-model/src/vmm_state.rs | 114 +++++++++++++++ nexus/db-queries/src/db/datastore/disk.rs | 10 +- .../src/db/datastore/external_ip.rs | 10 -- nexus/db-queries/src/db/datastore/instance.rs | 25 ++-- .../src/db/datastore/network_interface.rs | 10 +- nexus/db-queries/src/db/datastore/vmm.rs | 10 +- .../db-queries/src/db/queries/external_ip.rs | 26 ++-- .../src/db/queries/network_interface.rs | 74 +++------- .../app/background/abandoned_vmm_reaper.rs | 7 +- nexus/src/app/background/instance_watcher.rs | 2 +- nexus/src/app/instance.rs | 46 +++--- nexus/src/app/sagas/instance_common.rs | 114 ++++++++++++--- nexus/src/app/sagas/instance_create.rs | 7 +- nexus/src/app/sagas/instance_ip_attach.rs | 26 +++- nexus/src/app/sagas/instance_ip_detach.rs | 26 +++- nexus/src/app/sagas/instance_migrate.rs | 4 +- nexus/src/app/sagas/instance_start.rs | 24 ++- nexus/src/app/sagas/snapshot_create.rs | 8 +- nexus/tests/integration_tests/schema.rs | 103 ++++++++++++- openapi/nexus-internal.json | 138 ++++++++---------- openapi/sled-agent.json | 138 ++++++++---------- schema/crdb/dbinit.sql | 46 +++++- .../README.adoc | 64 ++++++++ .../separate-instance-and-vmm-states/up01.sql | 1 + .../separate-instance-and-vmm-states/up02.sql | 1 + .../separate-instance-and-vmm-states/up03.sql | 7 + .../separate-instance-and-vmm-states/up04.sql | 1 + .../separate-instance-and-vmm-states/up05.sql | 2 + .../separate-instance-and-vmm-states/up06.sql | 1 + .../separate-instance-and-vmm-states/up07.sql | 1 + .../separate-instance-and-vmm-states/up08.sql | 8 + .../separate-instance-and-vmm-states/up09.sql | 1 + .../separate-instance-and-vmm-states/up10.sql | 1 + .../separate-instance-and-vmm-states/up11.sql | 10 ++ .../separate-instance-and-vmm-states/up12.sql | 1 + .../separate-instance-and-vmm-states/up13.sql | 2 + .../separate-instance-and-vmm-states/up14.sql | 1 + .../separate-instance-and-vmm-states/up15.sql | 1 + .../separate-instance-and-vmm-states/up16.sql | 2 + .../separate-instance-and-vmm-states/up17.sql | 1 + .../separate-instance-and-vmm-states/up18.sql | 1 + .../separate-instance-and-vmm-states/up19.sql | 23 +++ .../separate-instance-and-vmm-states/up20.sql | 1 + .../separate-instance-and-vmm-states/up21.sql | 42 ++++++ .../separate-instance-and-vmm-states/up22.sql | 5 + sled-agent/src/common/instance.rs | 50 ++++--- sled-agent/src/instance.rs | 14 +- sled-agent/src/sim/collection.rs | 38 ++--- sled-agent/src/sim/instance.rs | 86 +++++------ 60 files changed, 1043 insertions(+), 607 deletions(-) create mode 100644 nexus/db-model/src/vmm_state.rs create mode 100644 schema/crdb/separate-instance-and-vmm-states/README.adoc create mode 100644 schema/crdb/separate-instance-and-vmm-states/up01.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up02.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up03.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up04.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up05.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up06.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up07.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up08.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up09.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up10.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up11.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up12.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up13.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up14.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up15.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up16.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up17.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up18.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up19.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up20.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up21.sql create mode 100644 schema/crdb/separate-instance-and-vmm-states/up22.sql diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 6546af8673..acf282a1f9 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -83,21 +83,34 @@ impl From for omicron_common::api::external::DiskState { } } -impl From - for omicron_common::api::external::InstanceState -{ - fn from(s: types::InstanceState) -> Self { +impl From for types::VmmState { + fn from(s: omicron_common::api::internal::nexus::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Input; match s { - types::InstanceState::Creating => Self::Creating, - types::InstanceState::Starting => Self::Starting, - types::InstanceState::Running => Self::Running, - types::InstanceState::Stopping => Self::Stopping, - types::InstanceState::Stopped => Self::Stopped, - types::InstanceState::Rebooting => Self::Rebooting, - types::InstanceState::Migrating => Self::Migrating, - types::InstanceState::Repairing => Self::Repairing, - types::InstanceState::Failed => Self::Failed, - types::InstanceState::Destroyed => Self::Destroyed, + Input::Starting => types::VmmState::Starting, + Input::Running => types::VmmState::Running, + Input::Stopping => types::VmmState::Stopping, + Input::Stopped => types::VmmState::Stopped, + Input::Rebooting => types::VmmState::Rebooting, + Input::Migrating => types::VmmState::Migrating, + Input::Failed => types::VmmState::Failed, + Input::Destroyed => types::VmmState::Destroyed, + } + } +} + +impl From for omicron_common::api::internal::nexus::VmmState { + fn from(s: types::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Output; + match s { + types::VmmState::Starting => Output::Starting, + types::VmmState::Running => Output::Running, + types::VmmState::Stopping => Output::Stopping, + types::VmmState::Stopped => Output::Stopped, + types::VmmState::Rebooting => Output::Rebooting, + types::VmmState::Migrating => Output::Migrating, + types::VmmState::Failed => Output::Failed, + types::VmmState::Destroyed => Output::Destroyed, } } } @@ -140,26 +153,6 @@ impl From } } -impl From - for types::InstanceState -{ - fn from(s: omicron_common::api::external::InstanceState) -> Self { - use omicron_common::api::external::InstanceState; - match s { - InstanceState::Creating => Self::Creating, - InstanceState::Starting => Self::Starting, - InstanceState::Running => Self::Running, - InstanceState::Stopping => Self::Stopping, - InstanceState::Stopped => Self::Stopped, - InstanceState::Rebooting => Self::Rebooting, - InstanceState::Migrating => Self::Migrating, - InstanceState::Repairing => Self::Repairing, - InstanceState::Failed => Self::Failed, - InstanceState::Destroyed => Self::Destroyed, - } - } -} - impl From for types::DiskRuntimeState { @@ -192,25 +185,6 @@ impl From for types::DiskState { } } -impl From<&types::InstanceState> - for omicron_common::api::external::InstanceState -{ - fn from(state: &types::InstanceState) -> Self { - match state { - types::InstanceState::Creating => Self::Creating, - types::InstanceState::Starting => Self::Starting, - types::InstanceState::Running => Self::Running, - types::InstanceState::Stopping => Self::Stopping, - types::InstanceState::Stopped => Self::Stopped, - types::InstanceState::Rebooting => Self::Rebooting, - types::InstanceState::Migrating => Self::Migrating, - types::InstanceState::Repairing => Self::Repairing, - types::InstanceState::Failed => Self::Failed, - types::InstanceState::Destroyed => Self::Destroyed, - } - } -} - impl From for types::ProducerKind { diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 300e3713ea..862ae00cc9 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -257,22 +257,18 @@ impl From } } -impl From - for types::InstanceState -{ - fn from(s: omicron_common::api::external::InstanceState) -> Self { - use omicron_common::api::external::InstanceState::*; +impl From for types::VmmState { + fn from(s: omicron_common::api::internal::nexus::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Input; match s { - Creating => Self::Creating, - Starting => Self::Starting, - Running => Self::Running, - Stopping => Self::Stopping, - Stopped => Self::Stopped, - Rebooting => Self::Rebooting, - Migrating => Self::Migrating, - Repairing => Self::Repairing, - Failed => Self::Failed, - Destroyed => Self::Destroyed, + Input::Starting => types::VmmState::Starting, + Input::Running => types::VmmState::Running, + Input::Stopping => types::VmmState::Stopping, + Input::Stopped => types::VmmState::Stopped, + Input::Rebooting => types::VmmState::Rebooting, + Input::Migrating => types::VmmState::Migrating, + Input::Failed => types::VmmState::Failed, + Input::Destroyed => types::VmmState::Destroyed, } } } @@ -299,6 +295,22 @@ impl From } } +impl From for omicron_common::api::internal::nexus::VmmState { + fn from(s: types::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Output; + match s { + types::VmmState::Starting => Output::Starting, + types::VmmState::Running => Output::Running, + types::VmmState::Stopping => Output::Stopping, + types::VmmState::Stopped => Output::Stopped, + types::VmmState::Rebooting => Output::Rebooting, + types::VmmState::Migrating => Output::Migrating, + types::VmmState::Failed => Output::Failed, + types::VmmState::Destroyed => Output::Destroyed, + } + } +} + impl From for omicron_common::api::internal::nexus::VmmRuntimeState { @@ -319,26 +331,6 @@ impl From } } -impl From - for omicron_common::api::external::InstanceState -{ - fn from(s: types::InstanceState) -> Self { - use types::InstanceState::*; - match s { - Creating => Self::Creating, - Starting => Self::Starting, - Running => Self::Running, - Stopping => Self::Stopping, - Stopped => Self::Stopped, - Rebooting => Self::Rebooting, - Migrating => Self::Migrating, - Repairing => Self::Repairing, - Failed => Self::Failed, - Destroyed => Self::Destroyed, - } - } -} - impl From for omicron_common::api::external::InstanceCpuCount { diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 07a7776f1e..6b171b59fe 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -996,6 +996,22 @@ pub enum InstanceState { Destroyed, } +impl From for InstanceState { + fn from(state: crate::api::internal::nexus::VmmState) -> Self { + use crate::api::internal::nexus::VmmState as InternalVmmState; + match state { + InternalVmmState::Starting => Self::Starting, + InternalVmmState::Running => Self::Running, + InternalVmmState::Stopping => Self::Stopping, + InternalVmmState::Stopped => Self::Stopped, + InternalVmmState::Rebooting => Self::Rebooting, + InternalVmmState::Migrating => Self::Migrating, + InternalVmmState::Failed => Self::Failed, + InternalVmmState::Destroyed => Self::Destroyed, + } + } +} + impl Display for InstanceState { fn fmt(&self, f: &mut Formatter) -> FormatResult { write!(f, "{}", self.label()) diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index de611262bf..b569437f43 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -6,7 +6,7 @@ use crate::api::external::{ ByteCount, DiskState, Generation, Hostname, InstanceCpuCount, - InstanceState, SemverVersion, Vni, + SemverVersion, Vni, }; use chrono::{DateTime, Utc}; use omicron_uuid_kinds::DownstairsRegionKind; @@ -60,11 +60,36 @@ pub struct InstanceRuntimeState { pub time_updated: DateTime, } +/// One of the states that a VMM can be in. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq, +)] +#[serde(rename_all = "snake_case")] +pub enum VmmState { + /// The VMM is initializing and has not started running guest CPUs yet. + Starting, + /// The VMM has finished initializing and may be running guest CPUs. + Running, + /// The VMM is shutting down. + Stopping, + /// The VMM's guest has stopped, and the guest will not run again, but the + /// VMM process may not have released all of its resources yet. + Stopped, + /// The VMM is being restarted or its guest OS is rebooting. + Rebooting, + /// The VMM is part of a live migration. + Migrating, + /// The VMM process reported an internal failure. + Failed, + /// The VMM process has been destroyed and its resources have been released. + Destroyed, +} + /// The dynamic runtime properties of an individual VMM process. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct VmmRuntimeState { /// The last state reported by this VMM. - pub state: InstanceState, + pub state: VmmState, /// The generation number for this VMM's state. pub gen: Generation, /// Timestamp for the VMM's state. diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 8f110aff71..99c0126174 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -72,9 +72,7 @@ impl Instance { InstanceIdentity::new(instance_id, params.identity.clone()); let runtime_state = InstanceRuntimeState::new( - InstanceState::new( - omicron_common::api::external::InstanceState::Creating, - ), + InstanceState::Creating, identity.time_modified, ); @@ -138,13 +136,6 @@ impl DatastoreAttachTargetConfig for Instance { // `diesel::prelude::AsChangeset`. #[diesel(table_name = instance, treat_none_as_null = true)] pub struct InstanceRuntimeState { - /// The instance state to fall back on if asked to compute this instance's - /// state while it has no active VMM. - /// - /// This field is guarded by the instance's `gen` field. - #[diesel(column_name = state)] - pub nexus_state: InstanceState, - /// The time at which the runtime state was last updated. This is distinct /// from the time the record was last modified, because some updates don't /// modify the runtime state. @@ -197,6 +188,13 @@ pub struct InstanceRuntimeState { /// lock was not held is still valid when setting the lock ID. #[diesel(column_name = updater_gen)] pub updater_gen: Generation, + + /// The "internal" state of this instance. The instance's externally-visible + /// state may be delegated to the instance's active VMM, if it has one. + /// + /// This field is guarded by the instance's `gen` field. + #[diesel(column_name = state)] + pub nexus_state: InstanceState, } impl InstanceRuntimeState { @@ -221,13 +219,13 @@ impl From state: omicron_common::api::internal::nexus::InstanceRuntimeState, ) -> Self { let nexus_state = if state.propolis_id.is_some() { - omicron_common::api::external::InstanceState::Running + InstanceState::Vmm } else { - omicron_common::api::external::InstanceState::Stopped + InstanceState::NoVmm }; Self { - nexus_state: InstanceState::new(nexus_state), + nexus_state, time_updated: state.time_updated, gen: state.gen.into(), propolis_id: state.propolis_id, diff --git a/nexus/db-model/src/instance_state.rs b/nexus/db-model/src/instance_state.rs index dca809758f..673b06e2cd 100644 --- a/nexus/db-model/src/instance_state.rs +++ b/nexus/db-model/src/instance_state.rs @@ -2,72 +2,60 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::impl_enum_wrapper; +use super::impl_enum_type; use omicron_common::api::external; use serde::Deserialize; use serde::Serialize; use std::fmt; -use std::io::Write; -impl_enum_wrapper!( +impl_enum_type!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "instance_state", schema = "public"))] + #[diesel(postgres_type(name = "instance_state_v2", schema = "public"))] pub struct InstanceStateEnum; - #[derive(Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)] + #[derive(Copy, Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)] #[diesel(sql_type = InstanceStateEnum)] - pub struct InstanceState(pub external::InstanceState); + pub enum InstanceState; // Enum values Creating => b"creating" - Starting => b"starting" - Running => b"running" - Stopping => b"stopping" - Stopped => b"stopped" - Rebooting => b"rebooting" - Migrating => b"migrating" - Repairing => b"repairing" + NoVmm => b"no_vmm" + Vmm => b"vmm" Failed => b"failed" Destroyed => b"destroyed" ); impl InstanceState { - pub fn new(state: external::InstanceState) -> Self { - Self(state) + pub fn state(&self) -> external::InstanceState { + external::InstanceState::from(*self) } - pub fn state(&self) -> &external::InstanceState { - &self.0 + pub fn label(&self) -> &'static str { + match self { + InstanceState::Creating => "creating", + InstanceState::NoVmm => "no VMM", + InstanceState::Vmm => "VMM", + InstanceState::Failed => "failed", + InstanceState::Destroyed => "destroyed", + } } } impl fmt::Display for InstanceState { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0) + write!(f, "{}", self.label()) } } -impl From for sled_agent_client::types::InstanceState { - fn from(s: InstanceState) -> Self { - use external::InstanceState::*; - use sled_agent_client::types::InstanceState as Output; - match s.0 { - Creating => Output::Creating, - Starting => Output::Starting, - Running => Output::Running, - Stopping => Output::Stopping, - Stopped => Output::Stopped, - Rebooting => Output::Rebooting, - Migrating => Output::Migrating, - Repairing => Output::Repairing, - Failed => Output::Failed, - Destroyed => Output::Destroyed, +impl From for omicron_common::api::external::InstanceState { + fn from(value: InstanceState) -> Self { + use omicron_common::api::external::InstanceState as Output; + match value { + InstanceState::Creating => Output::Creating, + InstanceState::NoVmm => Output::Stopped, + InstanceState::Vmm => Output::Running, + InstanceState::Failed => Output::Failed, + InstanceState::Destroyed => Output::Destroyed, } } } - -impl From for InstanceState { - fn from(state: external::InstanceState) -> Self { - Self::new(state) - } -} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 51fd0f6c9e..040882a8f0 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -56,6 +56,7 @@ mod semver_version; mod switch_interface; mod switch_port; mod v2p_mapping; +mod vmm_state; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. @@ -197,6 +198,7 @@ pub use v2p_mapping::*; pub use virtual_provisioning_collection::*; pub use virtual_provisioning_resource::*; pub use vmm::*; +pub use vmm_state::*; pub use vni::*; pub use volume::*; pub use volume_repair::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 8a00ce6e37..dedb0efc62 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -424,7 +424,6 @@ table! { memory -> Int8, hostname -> Text, boot_on_fault -> Bool, - state -> crate::InstanceStateEnum, time_state_updated -> Timestamptz, state_generation -> Int8, active_propolis_id -> Nullable, @@ -432,6 +431,7 @@ table! { migration_id -> Nullable, updater_id -> Nullable, updater_gen-> Int8, + state -> crate::InstanceStateEnum, } } @@ -444,9 +444,9 @@ table! { sled_id -> Uuid, propolis_ip -> Inet, propolis_port -> Int4, - state -> crate::InstanceStateEnum, time_state_updated -> Timestamptz, state_generation -> Int8, + state -> crate::VmmStateEnum, } } joinable!(vmm -> sled (sled_id)); @@ -459,7 +459,7 @@ table! { project_name -> Text, time_created -> Timestamptz, time_modified -> Timestamptz, - state -> crate::InstanceStateEnum, + state -> crate::VmmStateEnum, active_sled_id -> Uuid, migration_id -> Nullable, ncpus -> Int8, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 4465c3aacf..09039c952b 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(69, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(70, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(70, "separate-instance-and-vmm-states"), KnownVersion::new(69, "expose-stage0"), KnownVersion::new(68, "filter-v2p-mapping-by-instance-state"), KnownVersion::new(67, "add-instance-updater-lock"), diff --git a/nexus/db-model/src/sled_instance.rs b/nexus/db-model/src/sled_instance.rs index bbc92ddf18..1415c38eea 100644 --- a/nexus/db-model/src/sled_instance.rs +++ b/nexus/db-model/src/sled_instance.rs @@ -1,6 +1,6 @@ use crate::schema::sled_instance; -use crate::InstanceState; use crate::Name; +use crate::VmmState; use db_macros::Asset; use nexus_types::external_api::views; use nexus_types::identity::Asset; @@ -21,7 +21,7 @@ pub struct SledInstance { pub silo_name: Name, pub project_name: Name, - pub state: InstanceState, + pub state: VmmState, pub ncpus: i64, pub memory: i64, } @@ -34,7 +34,7 @@ impl From for views::SledInstance { active_sled_id: sled_instance.active_sled_id, silo_name: sled_instance.silo_name.into(), project_name: sled_instance.project_name.into(), - state: *sled_instance.state.state(), + state: sled_instance.state.into(), migration_id: sled_instance.migration_id, ncpus: sled_instance.ncpus, memory: sled_instance.memory, diff --git a/nexus/db-model/src/vmm.rs b/nexus/db-model/src/vmm.rs index ca3be120d4..cfa1d43759 100644 --- a/nexus/db-model/src/vmm.rs +++ b/nexus/db-model/src/vmm.rs @@ -12,7 +12,7 @@ //! state updates to each other without sending parameters that are useless to //! sled agent or that sled agent will never update (like the sled ID). -use super::{Generation, InstanceState}; +use super::{Generation, VmmState}; use crate::schema::vmm; use crate::SqlU16; use chrono::{DateTime, Utc}; @@ -68,12 +68,10 @@ impl Vmm { propolis_port: u16, initial_state: VmmInitialState, ) -> Self { - use omicron_common::api::external::InstanceState as ApiInstanceState; - let now = Utc::now(); - let api_state = match initial_state { - VmmInitialState::Starting => ApiInstanceState::Starting, - VmmInitialState::Migrating => ApiInstanceState::Migrating, + let state = match initial_state { + VmmInitialState::Starting => VmmState::Starting, + VmmInitialState::Migrating => VmmState::Migrating, }; Self { @@ -85,7 +83,7 @@ impl Vmm { propolis_ip, propolis_port: SqlU16(propolis_port), runtime: VmmRuntimeState { - state: InstanceState::new(api_state), + state, time_state_updated: now, gen: Generation::new(), }, @@ -106,16 +104,16 @@ impl Vmm { )] #[diesel(table_name = vmm)] pub struct VmmRuntimeState { - /// The state of this VMM. If this VMM is the active VMM for a given - /// instance, this state is the instance's logical state. - pub state: InstanceState, - /// The time at which this state was most recently updated. pub time_state_updated: DateTime, /// The generation number protecting this VMM's state and update time. #[diesel(column_name = state_generation)] pub gen: Generation, + + /// The state of this VMM. If this VMM is the active VMM for a given + /// instance, this state is the instance's logical state. + pub state: VmmState, } impl From @@ -125,7 +123,7 @@ impl From value: omicron_common::api::internal::nexus::VmmRuntimeState, ) -> Self { Self { - state: InstanceState::new(value.state), + state: value.state.into(), time_state_updated: value.time_updated, gen: value.gen.into(), } diff --git a/nexus/db-model/src/vmm_state.rs b/nexus/db-model/src/vmm_state.rs new file mode 100644 index 0000000000..f737f48f69 --- /dev/null +++ b/nexus/db-model/src/vmm_state.rs @@ -0,0 +1,114 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; + +impl_enum_type!( + #[derive(SqlType, Debug)] + #[diesel(postgres_type(name = "vmm_state", schema = "public"))] + pub struct VmmStateEnum; + + #[derive(Copy, Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)] + #[diesel(sql_type = VmmStateEnum)] + pub enum VmmState; + + Starting => b"starting" + Running => b"running" + Stopping => b"stopping" + Stopped => b"stopped" + Rebooting => b"rebooting" + Migrating => b"migrating" + Failed => b"failed" + Destroyed => b"destroyed" +); + +impl VmmState { + pub fn label(&self) -> &'static str { + match self { + VmmState::Starting => "starting", + VmmState::Running => "running", + VmmState::Stopping => "stopping", + VmmState::Stopped => "stopped", + VmmState::Rebooting => "rebooting", + VmmState::Migrating => "migrating", + VmmState::Failed => "failed", + VmmState::Destroyed => "destroyed", + } + } +} + +impl fmt::Display for VmmState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.label()) + } +} + +impl From for omicron_common::api::internal::nexus::VmmState { + fn from(value: VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Output; + match value { + VmmState::Starting => Output::Starting, + VmmState::Running => Output::Running, + VmmState::Stopping => Output::Stopping, + VmmState::Stopped => Output::Stopped, + VmmState::Rebooting => Output::Rebooting, + VmmState::Migrating => Output::Migrating, + VmmState::Failed => Output::Failed, + VmmState::Destroyed => Output::Destroyed, + } + } +} + +impl From for sled_agent_client::types::VmmState { + fn from(value: VmmState) -> Self { + use sled_agent_client::types::VmmState as Output; + match value { + VmmState::Starting => Output::Starting, + VmmState::Running => Output::Running, + VmmState::Stopping => Output::Stopping, + VmmState::Stopped => Output::Stopped, + VmmState::Rebooting => Output::Rebooting, + VmmState::Migrating => Output::Migrating, + VmmState::Failed => Output::Failed, + VmmState::Destroyed => Output::Destroyed, + } + } +} + +impl From for VmmState { + fn from(value: omicron_common::api::internal::nexus::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as ApiState; + use VmmState as Output; + match value { + ApiState::Starting => Output::Starting, + ApiState::Running => Output::Running, + ApiState::Stopping => Output::Stopping, + ApiState::Stopped => Output::Stopped, + ApiState::Rebooting => Output::Rebooting, + ApiState::Migrating => Output::Migrating, + ApiState::Failed => Output::Failed, + ApiState::Destroyed => Output::Destroyed, + } + } +} + +impl From for omicron_common::api::external::InstanceState { + fn from(value: VmmState) -> Self { + use omicron_common::api::external::InstanceState as Output; + + match value { + VmmState::Starting => Output::Starting, + VmmState::Running => Output::Running, + VmmState::Stopping => Output::Stopping, + VmmState::Stopped => Output::Stopped, + VmmState::Rebooting => Output::Rebooting, + VmmState::Migrating => Output::Migrating, + VmmState::Failed => Output::Failed, + VmmState::Destroyed => Output::Destroyed, + } + } +} diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index e1d504761c..de3d40969b 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -183,8 +183,8 @@ impl DataStore { // // We currently only permit attaching disks to stopped instances. let ok_to_attach_instance_states = vec![ - db::model::InstanceState(api::external::InstanceState::Creating), - db::model::InstanceState(api::external::InstanceState::Stopped), + db::model::InstanceState::Creating, + db::model::InstanceState::NoVmm, ]; let attach_update = DiskSetClauseForAttach::new(authz_instance.id()); @@ -321,9 +321,9 @@ impl DataStore { // // We currently only permit detaching disks from stopped instances. let ok_to_detach_instance_states = vec![ - db::model::InstanceState(api::external::InstanceState::Creating), - db::model::InstanceState(api::external::InstanceState::Stopped), - db::model::InstanceState(api::external::InstanceState::Failed), + db::model::InstanceState::Creating, + db::model::InstanceState::NoVmm, + db::model::InstanceState::Failed, ]; let detached_label = api::external::DiskState::Detached.label(); diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index c3cd45669f..8dda8041fa 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -31,7 +31,6 @@ use crate::db::queries::external_ip::NextExternalIp; use crate::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE; use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES; use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES_CREATING; -use crate::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; @@ -479,11 +478,6 @@ impl DataStore { } Err(match &collection.runtime_state.nexus_state { - state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) - => Error::unavail(&format!( - "tried to attach {kind} IP while instance was changing state: \ - attach will be safe to retry once start/stop completes" - )), state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { if attached_count >= i64::from(MAX_EXTERNAL_IPS_PLUS_SNAT) { Error::invalid_request(&format!( @@ -608,10 +602,6 @@ impl DataStore { } match collection.runtime_state.nexus_state { - state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) => Error::unavail(&format!( - "tried to attach {kind} IP while instance was changing state: \ - detach will be safe to retry once start/stop completes" - )), state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { Error::internal_error(&format!("failed to detach {kind} IP")) }, diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 3b655e5bb9..b9989fe31c 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -74,9 +74,9 @@ impl InstanceAndActiveVmm { &self, ) -> omicron_common::api::external::InstanceState { if let Some(vmm) = &self.vmm { - vmm.runtime.state.0 + vmm.runtime.state.into() } else { - self.instance.runtime().nexus_state.0 + self.instance.runtime().nexus_state.into() } } } @@ -89,11 +89,13 @@ impl From<(Instance, Option)> for InstanceAndActiveVmm { impl From for omicron_common::api::external::Instance { fn from(value: InstanceAndActiveVmm) -> Self { - let (run_state, time_run_state_updated) = if let Some(vmm) = value.vmm { - (vmm.runtime.state, vmm.runtime.time_state_updated) + let run_state: omicron_common::api::external::InstanceState; + let time_run_state_updated: chrono::DateTime; + (run_state, time_run_state_updated) = if let Some(vmm) = value.vmm { + (vmm.runtime.state.into(), vmm.runtime.time_state_updated) } else { ( - value.instance.runtime_state.nexus_state.clone(), + value.instance.runtime_state.nexus_state.into(), value.instance.runtime_state.time_updated, ) }; @@ -109,7 +111,7 @@ impl From for omicron_common::api::external::Instance { .parse() .expect("found invalid hostname in the database"), runtime: omicron_common::api::external::InstanceRuntimeState { - run_state: *run_state.state(), + run_state, time_run_state_updated, }, } @@ -196,8 +198,8 @@ impl DataStore { })?; bail_unless!( - instance.runtime().nexus_state.state() - == &api::external::InstanceState::Creating, + instance.runtime().nexus_state + == nexus_db_model::InstanceState::Creating, "newly-created Instance has unexpected state: {:?}", instance.runtime().nexus_state ); @@ -477,13 +479,12 @@ impl DataStore { // instance must be "stopped" or "failed" in order to delete it. The // delete operation sets "time_deleted" (just like with other objects) // and also sets the state to "destroyed". - use api::external::InstanceState as ApiInstanceState; use db::model::InstanceState as DbInstanceState; use db::schema::{disk, instance}; - let stopped = DbInstanceState::new(ApiInstanceState::Stopped); - let failed = DbInstanceState::new(ApiInstanceState::Failed); - let destroyed = DbInstanceState::new(ApiInstanceState::Destroyed); + let stopped = DbInstanceState::NoVmm; + let failed = DbInstanceState::Failed; + let destroyed = DbInstanceState::Destroyed; let ok_to_delete_instance_states = vec![stopped, failed]; let detached_label = api::external::DiskState::Detached.label(); diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 3ea2945b2f..3076afa39f 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -33,7 +33,6 @@ use diesel::prelude::*; use diesel::result::Error as DieselError; use nexus_db_model::ServiceNetworkInterface; use nexus_types::identity::Resource; -use omicron_common::api::external; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -681,8 +680,7 @@ impl DataStore { .filter(dsl::time_deleted.is_null()) .select(Instance::as_select()) }; - let stopped = - db::model::InstanceState::new(external::InstanceState::Stopped); + let stopped = db::model::InstanceState::NoVmm; // This is the actual query to update the target interface. // Unlike Postgres, CockroachDB doesn't support inserting or updating a view @@ -713,7 +711,6 @@ impl DataStore { self.transaction_retry_wrapper("instance_update_network_interface") .transaction(&conn, |conn| { let err = err.clone(); - let stopped = stopped.clone(); let update_target_query = update_target_query.clone(); async move { let instance_runtime = @@ -759,7 +756,6 @@ impl DataStore { self.transaction_retry_wrapper("instance_update_network_interface") .transaction(&conn, |conn| { let err = err.clone(); - let stopped = stopped.clone(); let update_target_query = update_target_query.clone(); async move { let instance_state = @@ -897,7 +893,7 @@ mod tests { .addr_iter() .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) .take(10); - let mut macs = external::MacAddr::iter_system(); + let mut macs = omicron_common::api::external::MacAddr::iter_system(); let mut service_nics = Vec::new(); for (i, ip) in ip_range.enumerate() { let name = format!("service-nic-{i}"); @@ -905,7 +901,7 @@ mod tests { Uuid::new_v4(), Uuid::new_v4(), NEXUS_VPC_SUBNET.clone(), - external::IdentityMetadataCreateParams { + omicron_common::api::external::IdentityMetadataCreateParams { name: name.parse().unwrap(), description: name, }, diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index b8fb47de26..bcb615411e 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -9,9 +9,9 @@ use crate::authz; use crate::context::OpContext; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::model::InstanceState as DbInstanceState; use crate::db::model::Vmm; use crate::db::model::VmmRuntimeState; +use crate::db::model::VmmState as DbVmmState; use crate::db::pagination::paginated; use crate::db::schema::vmm::dsl; use crate::db::update_and_check::UpdateAndCheck; @@ -22,7 +22,6 @@ use diesel::prelude::*; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; @@ -55,10 +54,7 @@ impl DataStore { opctx: &OpContext, vmm_id: &Uuid, ) -> UpdateResult { - let valid_states = vec![ - DbInstanceState::new(ApiInstanceState::Destroyed), - DbInstanceState::new(ApiInstanceState::Failed), - ]; + let valid_states = vec![DbVmmState::Destroyed, DbVmmState::Failed]; let updated = diesel::update(dsl::vmm) .filter(dsl::id.eq(*vmm_id)) @@ -190,7 +186,7 @@ impl DataStore { pagparams: &DataPageParams<'_, Uuid>, ) -> ListResultVec { use crate::db::schema::instance::dsl as instance_dsl; - let destroyed = DbInstanceState::new(ApiInstanceState::Destroyed); + let destroyed = DbVmmState::Destroyed; paginated(dsl::vmm, dsl::id, pagparams) // In order to be considered "abandoned", a VMM must be: // - in the `Destroyed` state diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 7d5e254aac..9bb6a44ea7 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -29,34 +29,28 @@ use diesel::RunQueryDsl; use nexus_db_model::InstanceState as DbInstanceState; use nexus_db_model::IpAttachState; use nexus_db_model::IpAttachStateEnum; +use nexus_db_model::VmmState as DbVmmState; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external; -use omicron_common::api::external::InstanceState as ApiInstanceState; use uuid::Uuid; // Broadly, we want users to be able to attach/detach at will // once an instance is created and functional. -pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = [ - DbInstanceState(ApiInstanceState::Stopped), - DbInstanceState(ApiInstanceState::Running), - DbInstanceState(ApiInstanceState::Creating), -]; -pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = [ - DbInstanceState(ApiInstanceState::Stopped), - DbInstanceState(ApiInstanceState::Running), -]; +pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = + [DbInstanceState::NoVmm, DbInstanceState::Vmm, DbInstanceState::Creating]; +pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = + [DbInstanceState::NoVmm, DbInstanceState::Vmm]; // If we're in a state which will naturally resolve to either // stopped/running, we want users to know that the request can be // retried safely via Error::unavail. // TODO: We currently stop if there's a migration or other state change. // There may be a good case for RPWing // external_ip_state -> { NAT RPW, sled-agent } in future. -pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbInstanceState; 5] = [ - DbInstanceState(ApiInstanceState::Starting), - DbInstanceState(ApiInstanceState::Stopping), - DbInstanceState(ApiInstanceState::Creating), - DbInstanceState(ApiInstanceState::Rebooting), - DbInstanceState(ApiInstanceState::Migrating), +pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbVmmState; 4] = [ + DbVmmState::Starting, + DbVmmState::Stopping, + DbVmmState::Rebooting, + DbVmmState::Migrating, ]; /// The maximum number of disks that can be attached to an instance. diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 69c1827b6d..e7ce4ca61a 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -41,26 +41,26 @@ use uuid::Uuid; // States an instance must be in to operate on its network interfaces, in // most situations. -static INSTANCE_STOPPED: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Stopped)); +const INSTANCE_STOPPED: db::model::InstanceState = + db::model::InstanceState::NoVmm; -static INSTANCE_FAILED: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Failed)); +const INSTANCE_FAILED: db::model::InstanceState = + db::model::InstanceState::Failed; // An instance can be in the creating state while we manipulate its // interfaces. The intention is for this only to be the case during sagas. -static INSTANCE_CREATING: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Creating)); +const INSTANCE_CREATING: db::model::InstanceState = + db::model::InstanceState::Creating; // A sentinel value for the instance state when the instance actually does // not exist. -static INSTANCE_DESTROYED: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Destroyed)); +const INSTANCE_DESTROYED: db::model::InstanceState = + db::model::InstanceState::Destroyed; // A sentinel value for the instance state when the instance has an active // VMM, irrespective of that VMM's actual state. -static INSTANCE_RUNNING: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Running)); +const INSTANCE_RUNNING: db::model::InstanceState = + db::model::InstanceState::Vmm; static NO_INSTANCE_SENTINEL_STRING: Lazy = Lazy::new(|| String::from(NO_INSTANCE_SENTINEL)); @@ -1853,6 +1853,7 @@ mod tests { use crate::db::model; use crate::db::model::IncompleteNetworkInterface; use crate::db::model::Instance; + use crate::db::model::InstanceState; use crate::db::model::NetworkInterface; use crate::db::model::Project; use crate::db::model::VpcSubnet; @@ -1929,55 +1930,29 @@ mod tests { db_datastore: &DataStore, ) -> Instance { let instance = create_instance(opctx, project_id, db_datastore).await; - instance_set_state( - db_datastore, - instance, - external::InstanceState::Stopped, - ) - .await + instance_set_state(db_datastore, instance, InstanceState::NoVmm).await } async fn instance_set_state( db_datastore: &DataStore, mut instance: Instance, - state: external::InstanceState, + state: InstanceState, ) -> Instance { - let new_runtime = model::InstanceRuntimeState { - nexus_state: model::InstanceState::new(state), - gen: instance.runtime_state.gen.next().into(), - ..instance.runtime_state.clone() + let propolis_id = match state { + InstanceState::Vmm => Some(Uuid::new_v4()), + _ => None, }; - let res = db_datastore - .instance_update_runtime(&instance.id(), &new_runtime) - .await; - assert!(matches!(res, Ok(true)), "Failed to change instance state"); - instance.runtime_state = new_runtime; - instance - } - /// Sets or clears the active Propolis ID in the supplied instance record. - /// This can be used to exercise the "does this instance have an active - /// VMM?" test that determines in part whether an instance's network - /// interfaces can change. - /// - /// Note that this routine does not construct a VMM record for the - /// corresponding ID, so any functions that expect such a record to exist - /// will fail in strange and exciting ways. - async fn instance_set_active_vmm( - db_datastore: &DataStore, - mut instance: Instance, - propolis_id: Option, - ) -> Instance { let new_runtime = model::InstanceRuntimeState { + nexus_state: state, propolis_id, gen: instance.runtime_state.gen.next().into(), ..instance.runtime_state.clone() }; - let res = db_datastore .instance_update_runtime(&instance.id(), &new_runtime) .await; - assert!(matches!(res, Ok(true)), "Failed to change instance VMM ref"); + assert!(matches!(res, Ok(true)), "Failed to change instance state"); instance.runtime_state = new_runtime; instance } @@ -2102,13 +2077,13 @@ mod tests { &self.db_datastore, ) .await, - external::InstanceState::Stopped, + InstanceState::NoVmm, ) .await } async fn create_running_instance(&self) -> Instance { - let instance = instance_set_state( + instance_set_state( &self.db_datastore, create_instance( &self.opctx, @@ -2116,14 +2091,7 @@ mod tests { &self.db_datastore, ) .await, - external::InstanceState::Starting, - ) - .await; - - instance_set_active_vmm( - &self.db_datastore, - instance, - Some(Uuid::new_v4()), + InstanceState::Vmm, ) .await } diff --git a/nexus/src/app/background/abandoned_vmm_reaper.rs b/nexus/src/app/background/abandoned_vmm_reaper.rs index b24c543575..4685012e28 100644 --- a/nexus/src/app/background/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/abandoned_vmm_reaper.rs @@ -219,15 +219,14 @@ mod tests { use chrono::Utc; use nexus_db_model::ByteCount; use nexus_db_model::Generation; - use nexus_db_model::InstanceState; use nexus_db_model::Resources; use nexus_db_model::SledResource; use nexus_db_model::SledResourceKind; use nexus_db_model::Vmm; use nexus_db_model::VmmRuntimeState; + use nexus_db_model::VmmState; use nexus_test_utils::resource_helpers; use nexus_test_utils_macros::nexus_test; - use omicron_common::api::external::InstanceState as ApiInstanceState; use uuid::Uuid; type ControlPlaneTestContext = @@ -269,9 +268,7 @@ mod tests { propolis_ip: "::1".parse().unwrap(), propolis_port: 12345.into(), runtime: VmmRuntimeState { - state: InstanceState::new( - ApiInstanceState::Destroyed - ), + state: VmmState::Destroyed, time_state_updated: Utc::now(), gen: Generation::new(), } diff --git a/nexus/src/app/background/instance_watcher.rs b/nexus/src/app/background/instance_watcher.rs index d473ea8e99..c4eda68594 100644 --- a/nexus/src/app/background/instance_watcher.rs +++ b/nexus/src/app/background/instance_watcher.rs @@ -142,7 +142,7 @@ impl InstanceWatcher { let new_runtime_state: SledInstanceState = state.into(); check.outcome = - CheckOutcome::Success(new_runtime_state.vmm_state.state); + CheckOutcome::Success(new_runtime_state.vmm_state.state.into()); slog::debug!( opctx.log, "updating instance state"; diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 63b080b436..1132f1f5b8 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -20,6 +20,7 @@ use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; +use nexus_db_model::VmmState as DbVmmState; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; @@ -43,6 +44,7 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; use omicron_common::api::internal::nexus; +use omicron_common::api::internal::nexus::VmmState; use omicron_common::api::internal::shared::SourceNatConfig; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::CloseFrame; @@ -477,7 +479,7 @@ impl super::Nexus { let (instance, vmm) = (state.instance(), state.vmm()); if vmm.is_none() - || vmm.as_ref().unwrap().runtime.state.0 != InstanceState::Running + || vmm.as_ref().unwrap().runtime.state != DbVmmState::Running { return Err(Error::invalid_request( "instance must be running before it can migrate", @@ -707,16 +709,16 @@ impl super::Nexus { let (instance, vmm) = (state.instance(), state.vmm()); if let Some(vmm) = vmm { - match vmm.runtime.state.0 { - InstanceState::Starting - | InstanceState::Running - | InstanceState::Rebooting => { + match vmm.runtime.state { + DbVmmState::Starting + | DbVmmState::Running + | DbVmmState::Rebooting => { debug!(self.log, "asked to start an active instance"; "instance_id" => %authz_instance.id()); return Ok(state); } - InstanceState::Stopped => { + DbVmmState::Stopped => { let propolis_id = instance .runtime() .propolis_id @@ -733,7 +735,7 @@ impl super::Nexus { _ => { return Err(Error::conflict(&format!( "instance is in state {} but must be {} to be started", - vmm.runtime.state.0, + vmm.runtime.state, InstanceState::Stopped ))); } @@ -841,9 +843,9 @@ impl super::Nexus { requested: &InstanceStateChangeRequest, ) -> Result { let effective_state = if let Some(vmm) = vmm_state { - vmm.runtime.state.0 + vmm.runtime.state.into() } else { - instance_state.runtime().nexus_state.0 + instance_state.runtime().nexus_state.into() }; // Requests that operate on active instances have to be directed to the @@ -1362,7 +1364,7 @@ impl super::Nexus { "error" => ?reason); let new_runtime = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Failed), + nexus_state: db::model::InstanceState::Failed, // TODO(#4226): Clearing the Propolis ID is required to allow the // instance to be deleted, but this doesn't actually terminate the @@ -1647,24 +1649,22 @@ impl super::Nexus { let (instance, vmm) = (state.instance(), state.vmm()); if let Some(vmm) = vmm { - match vmm.runtime.state.0 { - InstanceState::Running - | InstanceState::Rebooting - | InstanceState::Migrating - | InstanceState::Repairing => { + match vmm.runtime.state { + DbVmmState::Running + | DbVmmState::Rebooting + | DbVmmState::Migrating => { Ok(SocketAddr::new(vmm.propolis_ip.ip(), vmm.propolis_port.into())) } - InstanceState::Creating - | InstanceState::Starting - | InstanceState::Stopping - | InstanceState::Stopped - | InstanceState::Failed => { + DbVmmState::Starting + | DbVmmState::Stopping + | DbVmmState::Stopped + | DbVmmState::Failed => { Err(Error::invalid_request(format!( "cannot connect to serial console of instance in state \"{}\"", - vmm.runtime.state.0, + vmm.runtime.state, ))) } - InstanceState::Destroyed => Err(Error::invalid_request( + DbVmmState::Destroyed => Err(Error::invalid_request( "cannot connect to serial console of destroyed instance", )), } @@ -2092,7 +2092,7 @@ pub(crate) async fn notify_instance_updated( if result.is_ok() { let propolis_terminated = matches!( new_runtime_state.vmm_state.state, - InstanceState::Destroyed | InstanceState::Failed + VmmState::Destroyed | VmmState::Failed ); if propolis_terminated { diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index b941739393..ba9854c146 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -9,14 +9,12 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::Nexus; use chrono::Utc; use nexus_db_model::{ - ByteCount, ExternalIp, IpAttachState, Ipv4NatEntry, - SledReservationConstraints, SledResource, + ByteCount, ExternalIp, InstanceState, IpAttachState, Ipv4NatEntry, + SledReservationConstraints, SledResource, VmmState, }; use nexus_db_queries::authz; use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; use nexus_db_queries::{authn, context::OpContext, db, db::DataStore}; -use omicron_common::api::external::InstanceState; use omicron_common::api::external::{Error, NameOrId}; use serde::{Deserialize, Serialize}; use steno::ActionError; @@ -125,7 +123,7 @@ pub async fn destroy_vmm_record( prev_record: &db::model::Vmm, ) -> Result<(), anyhow::Error> { let new_runtime = db::model::VmmRuntimeState { - state: db::model::InstanceState(InstanceState::Destroyed), + state: db::model::VmmState::Destroyed, time_state_updated: Utc::now(), gen: prev_record.runtime.gen.next().into(), }; @@ -201,6 +199,16 @@ pub async fn instance_ip_move_state( } } +/// Yields the sled on which an instance is found to be running so that IP +/// attachment and detachment operations can be propagated there. +/// +/// # Preconditions +/// +/// To synchronize correctly with other concurrent operations on an instance, +/// the calling saga must have placed the IP it is attaching or detaching into +/// the Attaching or Detaching state so that concurrent attempts to start the +/// instance will notice that the IP state is in flux and ask the caller to +/// retry. pub async fn instance_ip_get_instance_state( sagactx: &NexusActionContext, serialized_authn: &authn::saga::Serialized, @@ -220,15 +228,20 @@ pub async fn instance_ip_get_instance_state( .await .map_err(ActionError::action_failed)?; - let found_state = inst_and_vmm.instance().runtime_state.nexus_state.0; + let found_vmm_state = + inst_and_vmm.vmm().as_ref().map(|vmm| vmm.runtime.state); + let found_instance_state = + inst_and_vmm.instance().runtime_state.nexus_state; let mut sled_id = inst_and_vmm.sled_id(); + slog::debug!( + osagactx.log(), "evaluating instance state for IP attach/detach"; + "instance_state" => ?found_instance_state, + "vmm_state" => ?found_vmm_state + ); + // Arriving here means we started in a correct state (running/stopped). // We need to consider how we interact with the other sagas/ops: - // - starting: our claim on an IP will block it from moving past - // DPD_ensure and instance_start will undo. If we complete - // before then, it can move past and will fill in routes/opte. - // Act as though we have no sled_id. // - stopping: this is not sagaized, and the propolis/sled-agent might // go away. Act as though stopped if we catch it here, // otherwise convert OPTE ensure to 'service unavailable' @@ -236,30 +249,91 @@ pub async fn instance_ip_get_instance_state( // - deleting: can only be called from stopped -- we won't push to dpd // or sled-agent, and IP record might be deleted or forcibly // detached. Catch here just in case. - match found_state { - InstanceState::Stopped - | InstanceState::Starting - | InstanceState::Stopping => { + // - starting: see below. + match (found_instance_state, found_vmm_state) { + // If there's no VMM, the instance is definitely not on any sled. + (InstanceState::NoVmm, _) => { sled_id = None; } - InstanceState::Running => {} - state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state.into()) => { + + // If the instance is running normally or rebooting, it's resident on + // the sled given by its VMM record. + ( + InstanceState::Vmm, + Some(VmmState::Running) | Some(VmmState::Rebooting), + ) => {} + + // If the VMM is in the Stopping, Migrating, or Starting states, its + // sled assignment is in doubt, so report a transient state error and + // ask the caller to retry. + // + // Although an instance with a Starting VMM has a sled assignment, + // there's no way to tell at this point whether or not there's a + // concurrent instance-start saga that has passed the point where it + // sends IP assignments to the instance's new sled: + // + // - If the start saga is still in progress and hasn't pushed any IP + // information to the instance's new sled yet, then either of two + // things can happen: + // - This function's caller can finish modifying IPs before the start + // saga propagates IP information to the sled. In this case the + // calling saga should do nothing--the start saga will send the + // right IP set to the sled. + // - If the start saga "wins" the race, it will see that the instance + // still has an attaching/detaching IP and bail out. + // - If the start saga is already done, and Nexus is just waiting for + // the VMM to report that it's Running, the calling saga needs to + // send the IP change to the instance's sled. + // + // There's no way to distinguish these cases, so if a VMM is Starting, + // block the attach/detach. + ( + InstanceState::Vmm, + Some(state @ VmmState::Starting) + | Some(state @ VmmState::Migrating) + | Some(state @ VmmState::Stopping) + | Some(state @ VmmState::Stopped), + ) => { return Err(ActionError::action_failed(Error::unavail(&format!( "can't {verb} in transient state {state}" - )))) + )))); } - InstanceState::Destroyed => { + (InstanceState::Destroyed, _) => { return Err(ActionError::action_failed(Error::not_found_by_id( omicron_common::api::external::ResourceType::Instance, &authz_instance.id(), ))) } - // Final cases are repairing/failed. - _ => { + (InstanceState::Creating, _) => { + return Err(ActionError::action_failed(Error::invalid_request( + "cannot modify instance IPs, instance is still being created", + ))) + } + (InstanceState::Failed, _) + | (InstanceState::Vmm, Some(VmmState::Failed)) => { return Err(ActionError::action_failed(Error::invalid_request( "cannot modify instance IPs, instance is in unhealthy state", ))) } + + // This case represents an inconsistency in the database. It should + // never happen, but don't blow up Nexus if it somehow does. + (InstanceState::Vmm, None) => { + return Err(ActionError::action_failed(Error::internal_error( + &format!( + "instance {} is in the 'VMM' state but has no VMM ID", + authz_instance.id(), + ), + ))); + } + (InstanceState::Vmm, Some(VmmState::Destroyed)) => { + return Err(ActionError::action_failed(Error::internal_error( + &format!( + "instance {} points to destroyed VMM", + authz_instance.id(), + ), + ))); + } } Ok(sled_id) diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index a6771f65a0..f336a01f0c 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -18,7 +18,6 @@ use nexus_db_queries::{authn, authz, db}; use nexus_defaults::DEFAULT_PRIMARY_NIC_NAME; use nexus_types::external_api::params::InstanceDiskAttachment; use omicron_common::api::external::IdentityMetadataCreateParams; -use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::{Error, InternalContext}; @@ -994,7 +993,7 @@ async fn sic_delete_instance_record( }; let runtime_state = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Failed), + nexus_state: db::model::InstanceState::Failed, // Must update the generation, or the database query will fail. // // The runtime state of the instance record is only changed as a result @@ -1029,13 +1028,13 @@ async fn sic_move_to_stopped( let instance_record = sagactx.lookup::("instance_record")?; - // Create a new generation of the isntance record with the Stopped state and + // Create a new generation of the instance record with the no-VMM state and // try to write it back to the database. If this node is replayed, or the // instance has already changed state by the time this step is reached, this // update will (correctly) be ignored because its generation number is out // of date. let new_state = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Stopped), + nexus_state: db::model::InstanceState::NoVmm, gen: db::model::Generation::from( instance_record.runtime_state.gen.next(), ), diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index 3332b71274..f8edf37dc4 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -420,6 +420,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { let params = new_test_params(&opctx, datastore, use_float).await; @@ -509,6 +515,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { test_helpers::action_failure_can_unwind::( nexus, @@ -535,6 +547,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { test_helpers::action_failure_can_unwind_idempotently::< SagaInstanceIpAttach, @@ -561,9 +579,15 @@ pub(crate) mod test { let opctx = test_helpers::test_opctx(cptestctx); let datastore = &nexus.db_datastore; let _project_id = ip_manip_test_setup(&client).await; - let _instance = + let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { let params = new_test_params(&opctx, datastore, use_float).await; let dag = create_saga_dag::(params).unwrap(); diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs index 2f1d76c853..9625d77bf9 100644 --- a/nexus/src/app/sagas/instance_ip_detach.rs +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -391,6 +391,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { @@ -484,6 +490,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { @@ -512,6 +524,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { @@ -540,9 +558,15 @@ pub(crate) mod test { let opctx = test_helpers::test_opctx(cptestctx); let datastore = &nexus.db_datastore; let _project_id = ip_manip_test_setup(&client).await; - let _instance = + let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 1cfd170faf..db1d838014 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -755,8 +755,8 @@ mod tests { let new_instance = new_state.instance(); let new_vmm = new_state.vmm().as_ref(); assert_eq!( - new_instance.runtime().nexus_state.0, - omicron_common::api::external::InstanceState::Stopped + new_instance.runtime().nexus_state, + nexus_db_model::InstanceState::NoVmm, ); assert!(new_instance.runtime().propolis_id.is_none()); assert!(new_vmm.is_none()); diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index e7caedfc9c..d67ff02c20 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -16,7 +16,7 @@ use crate::app::sagas::declare_saga_actions; use chrono::Utc; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; use nexus_db_queries::{authn, authz, db}; -use omicron_common::api::external::{Error, InstanceState}; +use omicron_common::api::external::Error; use serde::{Deserialize, Serialize}; use slog::info; use steno::{ActionError, Node}; @@ -260,9 +260,7 @@ async fn sis_move_to_starting( // be running before Propolis thinks it has started.) None => { let new_runtime = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new( - InstanceState::Running, - ), + nexus_state: db::model::InstanceState::Vmm, propolis_id: Some(propolis_id), time_updated: Utc::now(), gen: db_instance.runtime().gen.next().into(), @@ -300,7 +298,7 @@ async fn sis_move_to_starting_undo( "instance_id" => %instance_id); let new_runtime = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Stopped), + nexus_state: db::model::InstanceState::NoVmm, propolis_id: None, gen: db_instance.runtime_state.gen.next().into(), ..db_instance.runtime_state @@ -762,10 +760,9 @@ mod test { .as_ref() .expect("running instance should have a vmm") .runtime - .state - .0; + .state; - assert_eq!(vmm_state, InstanceState::Running); + assert_eq!(vmm_state, nexus_db_model::VmmState::Running); } #[nexus_test(server = crate::Server)] @@ -818,8 +815,8 @@ mod test { assert!(new_db_instance.runtime().propolis_id.is_none()); assert_eq!( - new_db_instance.runtime().nexus_state.0, - InstanceState::Stopped + new_db_instance.runtime().nexus_state, + nexus_db_model::InstanceState::NoVmm ); assert!(test_helpers::no_virtual_provisioning_resource_records_exist(cptestctx).await); @@ -861,10 +858,9 @@ mod test { .as_ref() .expect("running instance should have a vmm") .runtime - .state - .0; + .state; - assert_eq!(vmm_state, InstanceState::Running); + assert_eq!(vmm_state, nexus_db_model::VmmState::Running); } /// Tests that if a start saga unwinds because sled agent returned failure @@ -930,7 +926,7 @@ mod test { assert_eq!( db_instance.instance().runtime_state.nexus_state, - nexus_db_model::InstanceState(InstanceState::Stopped) + nexus_db_model::InstanceState::NoVmm ); assert!(db_instance.vmm().is_none()); diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index f8b56b3522..41e1793fab 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -2041,13 +2041,9 @@ mod test { .as_ref() .expect("running instance should have a sled") .runtime - .state - .0; + .state; - assert_eq!( - new_state, - omicron_common::api::external::InstanceState::Running - ); + assert_eq!(new_state, nexus_db_model::VmmState::Running); instance_state } diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 89d2e274c5..bf73855ea7 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -1021,7 +1021,6 @@ async fn dbinit_equals_sum_of_all_up() { observed_schema.pretty_assert_eq(&expected_schema); assert_eq!(observed_data, expected_data); - crdb.cleanup().await.unwrap(); logctx.cleanup_successful(); } @@ -1054,6 +1053,17 @@ const SLED2: Uuid = Uuid::from_u128(0x2222513d_5c3d_4647_83b0_8f3515da7be1); // "7AC4" -> "Rack" const RACK1: Uuid = Uuid::from_u128(0x11117ac4_5c3d_4647_83b0_8f3515da7be1); +// "6701" -> "Proj"ect +const PROJECT: Uuid = Uuid::from_u128(0x11116701_5c3d_4647_83b0_8f3515da7be1); + +// "1257" -> "Inst"ance +const INSTANCE1: Uuid = Uuid::from_u128(0x11111257_5c3d_4647_83b0_8f3515da7be1); +const INSTANCE2: Uuid = Uuid::from_u128(0x22221257_5c3d_4647_83b0_8f3515da7be1); +const INSTANCE3: Uuid = Uuid::from_u128(0x33331257_5c3d_4647_83b0_8f3515da7be1); + +// "67060115" -> "Prop"olis +const PROPOLIS: Uuid = Uuid::from_u128(0x11116706_5c3d_4647_83b0_8f3515da7be1); + fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { Box::pin(async move { // Create two silos @@ -1219,6 +1229,93 @@ fn after_37_0_1(client: &Client) -> BoxFuture<'_, ()> { }) } +fn before_70_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async move { + client + .batch_execute(&format!( + " + INSERT INTO instance (id, name, description, time_created, + time_modified, time_deleted, project_id, user_data, state, + time_state_updated, state_generation, active_propolis_id, + target_propolis_id, migration_id, ncpus, memory, hostname, + boot_on_fault, updater_id, updater_gen) VALUES + + ('{INSTANCE1}', 'inst1', '', now(), now(), NULL, '{PROJECT}', '', + 'stopped', now(), 1, NULL, NULL, NULL, 2, 1073741824, 'inst1', false, + NULL, 1), + ('{INSTANCE2}', 'inst2', '', now(), now(), NULL, '{PROJECT}', '', + 'running', now(), 1, '{PROPOLIS}', NULL, NULL, 2, 1073741824, 'inst2', + false, NULL, 1), + ('{INSTANCE3}', 'inst3', '', now(), now(), NULL, '{PROJECT}', '', + 'failed', now(), 1, NULL, NULL, NULL, 2, 1073741824, 'inst3', false, + NULL, 1); + " + )) + .await + .expect("failed to create instances"); + + client + .batch_execute(&format!( + " + INSERT INTO vmm (id, time_created, time_deleted, instance_id, state, + time_state_updated, state_generation, sled_id, propolis_ip, + propolis_port) VALUES + + ('{PROPOLIS}', now(), NULL, '{INSTANCE2}', 'running', now(), 1, + '{SLED1}', 'fd00:1122:3344:200::1', '12400'); + " + )) + .await + .expect("failed to create VMMs"); + }) +} + +fn after_70_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async { + let rows = client + .query("SELECT state FROM instance ORDER BY id", &[]) + .await + .expect("failed to load instance states"); + let instance_states = process_rows(&rows); + + assert_eq!( + instance_states[0].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("instance_state_v2", "no_vmm")) + )] + ); + assert_eq!( + instance_states[1].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("instance_state_v2", "vmm")) + )] + ); + assert_eq!( + instance_states[2].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("instance_state_v2", "failed")) + )] + ); + + let rows = client + .query("SELECT state FROM vmm ORDER BY id", &[]) + .await + .expect("failed to load VMM states"); + let vmm_states = process_rows(&rows); + + assert_eq!( + vmm_states[0].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("vmm_state", "running")) + )] + ); + }) +} + // Lazily initializes all migration checks. The combination of Rust function // pointers and async makes defining a static table fairly painful, so we're // using lazy initialization instead. @@ -1240,6 +1337,10 @@ fn get_migration_checks() -> BTreeMap { SemverVersion(semver::Version::parse("37.0.1").unwrap()), DataMigrationFns { before: None, after: after_37_0_1 }, ); + map.insert( + SemverVersion(semver::Version::parse("70.0.0").unwrap()), + DataMigrationFns { before: Some(before_70_0_0), after: after_70_0_0 }, + ); map } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 828378eaba..637334483d 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3179,81 +3179,6 @@ "time_updated" ] }, - "InstanceState": { - "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", - "oneOf": [ - { - "description": "The instance is being created.", - "type": "string", - "enum": [ - "creating" - ] - }, - { - "description": "The instance is currently starting up.", - "type": "string", - "enum": [ - "starting" - ] - }, - { - "description": "The instance is currently running.", - "type": "string", - "enum": [ - "running" - ] - }, - { - "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", - "type": "string", - "enum": [ - "stopping" - ] - }, - { - "description": "The instance is currently stopped.", - "type": "string", - "enum": [ - "stopped" - ] - }, - { - "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", - "type": "string", - "enum": [ - "rebooting" - ] - }, - { - "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", - "type": "string", - "enum": [ - "migrating" - ] - }, - { - "description": "The instance is attempting to recover from a failure.", - "type": "string", - "enum": [ - "repairing" - ] - }, - { - "description": "The instance has encountered a failure.", - "type": "string", - "enum": [ - "failed" - ] - }, - { - "description": "The instance has been deleted.", - "type": "string", - "enum": [ - "destroyed" - ] - } - ] - }, "IpKind": { "type": "string", "enum": [ @@ -4996,7 +4921,7 @@ "description": "The last state reported by this VMM.", "allOf": [ { - "$ref": "#/components/schemas/InstanceState" + "$ref": "#/components/schemas/VmmState" } ] }, @@ -5012,6 +4937,67 @@ "time_updated" ] }, + "VmmState": { + "description": "One of the states that a VMM can be in.", + "oneOf": [ + { + "description": "The VMM is initializing and has not started running guest CPUs yet.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The VMM has finished initializing and may be running guest CPUs.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The VMM is shutting down.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The VMM's guest has stopped, and the guest will not run again, but the VMM process may not have released all of its resources yet.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The VMM is being restarted or its guest OS is rebooting.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The VMM is part of a live migration.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The VMM process reported an internal failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The VMM process has been destroyed and its resources have been released.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index b975f16484..68513345e2 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3138,81 +3138,6 @@ "time_updated" ] }, - "InstanceState": { - "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", - "oneOf": [ - { - "description": "The instance is being created.", - "type": "string", - "enum": [ - "creating" - ] - }, - { - "description": "The instance is currently starting up.", - "type": "string", - "enum": [ - "starting" - ] - }, - { - "description": "The instance is currently running.", - "type": "string", - "enum": [ - "running" - ] - }, - { - "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", - "type": "string", - "enum": [ - "stopping" - ] - }, - { - "description": "The instance is currently stopped.", - "type": "string", - "enum": [ - "stopped" - ] - }, - { - "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", - "type": "string", - "enum": [ - "rebooting" - ] - }, - { - "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", - "type": "string", - "enum": [ - "migrating" - ] - }, - { - "description": "The instance is attempting to recover from a failure.", - "type": "string", - "enum": [ - "repairing" - ] - }, - { - "description": "The instance has encountered a failure.", - "type": "string", - "enum": [ - "failed" - ] - }, - { - "description": "The instance has been deleted.", - "type": "string", - "enum": [ - "destroyed" - ] - } - ] - }, "InstanceStateRequested": { "description": "Requestable running state of an Instance.\n\nA subset of [`omicron_common::api::external::InstanceState`].", "oneOf": [ @@ -4560,7 +4485,7 @@ "description": "The last state reported by this VMM.", "allOf": [ { - "$ref": "#/components/schemas/InstanceState" + "$ref": "#/components/schemas/VmmState" } ] }, @@ -4576,6 +4501,67 @@ "time_updated" ] }, + "VmmState": { + "description": "One of the states that a VMM can be in.", + "oneOf": [ + { + "description": "The VMM is initializing and has not started running guest CPUs yet.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The VMM has finished initializing and may be running guest CPUs.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The VMM is shutting down.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The VMM's guest has stopped, and the guest will not run again, but the VMM process may not have released all of its resources yet.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The VMM is being restarted or its guest OS is rebooting.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The VMM is part of a live migration.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The VMM process reported an internal failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The VMM process has been destroyed and its resources have been released.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b759f86f1b..7bda66c5f2 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -954,15 +954,33 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_project_by_silo ON omicron.public.proje * Instances */ -CREATE TYPE IF NOT EXISTS omicron.public.instance_state AS ENUM ( +CREATE TYPE IF NOT EXISTS omicron.public.instance_state_v2 AS ENUM ( + /* The instance exists in the DB but its create saga is still in flight. */ 'creating', + + /* + * The instance has no active VMM. Corresponds to the "stopped" external + * state. + */ + 'no_vmm', + + /* The instance's state is derived from its active VMM's state. */ + 'vmm', + + /* Something bad happened while trying to interact with the instance. */ + 'failed', + + /* The instance has been destroyed. */ + 'destroyed' +); + +CREATE TYPE IF NOT EXISTS omicron.public.vmm_state AS ENUM ( 'starting', 'running', 'stopping', 'stopped', 'rebooting', 'migrating', - 'repairing', 'failed', 'destroyed' ); @@ -989,8 +1007,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( /* user data for instance initialization systems (e.g. cloud-init) */ user_data BYTES NOT NULL, - /* The state of the instance when it has no active VMM. */ - state omicron.public.instance_state NOT NULL, + /* The last-updated time and generation for the instance's state. */ time_state_updated TIMESTAMPTZ NOT NULL, state_generation INT NOT NULL, @@ -1014,8 +1031,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( updater_id UUID, /* Generation of the instance updater lock */ - updater_gen INT NOT NULL DEFAULT 0 + updater_gen INT NOT NULL DEFAULT 0, + /* + * The internal instance state. If this is 'vmm', the externally-visible + * instance state is derived from its active VMM's state. This column is + * distant from its generation number and update time because it is + * deleted and recreated by the schema upgrade process; see the + * `separate-instance-and-vmm-states` schema change for details. + */ + state omicron.public.instance_state_v2 NOT NULL, + + CONSTRAINT vmm_iff_active_propolis CHECK ( + ((state = 'vmm') AND (active_propolis_id IS NOT NULL)) OR + ((state != 'vmm') AND (active_propolis_id IS NULL)) + ) ); -- Names for instances within a project should be unique @@ -3477,12 +3507,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.vmm ( time_created TIMESTAMPTZ NOT NULL, time_deleted TIMESTAMPTZ, instance_id UUID NOT NULL, - state omicron.public.instance_state NOT NULL, time_state_updated TIMESTAMPTZ NOT NULL, state_generation INT NOT NULL, sled_id UUID NOT NULL, propolis_ip INET NOT NULL, - propolis_port INT4 NOT NULL CHECK (propolis_port BETWEEN 0 AND 65535) DEFAULT 12400 + propolis_port INT4 NOT NULL CHECK (propolis_port BETWEEN 0 AND 65535) DEFAULT 12400, + state omicron.public.vmm_state NOT NULL ); CREATE INDEX IF NOT EXISTS lookup_vmms_by_sled_id ON omicron.public.vmm ( @@ -4045,7 +4075,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '69.0.0', NULL) + (TRUE, NOW(), NOW(), '70.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/separate-instance-and-vmm-states/README.adoc b/schema/crdb/separate-instance-and-vmm-states/README.adoc new file mode 100644 index 0000000000..f005ef0b29 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/README.adoc @@ -0,0 +1,64 @@ +# Overview + +This schema change splits the "instance state" enum that instances and VMMs +share into two enums, one for instance states and one for VMM states. Variants +used by only one of these objects only appear in the corresponding enum. This +upgrade also adds a database-level constraint that requires that an instance's +state reports that it has an active VMM if and only if it has an active Propolis +ID. + +This change is mechanically tricky for two reasons. First, the states instances +and VMMs have after an upgrade depends on the state that they have before the +upgrade. (While this upgrade is supposed to take place offline, past experience +with instances that are stuck in a non-Stopped state shows that we can't take +for granted that all instances and VMMs will be Stopped at upgrade time.) +Second, Postgres and/or CRDB don't support all the schema change primitives we +might use to deprecate the old state column. Specifically: + +* CockroachDB doesn't support altering column types without enabling an + experimental flag + (see https://github.com/cockroachdb/cockroach/issues/49329?version=v22.1). +* Postgres doesn't support removing enum variants (adding and renaming are OK), + so we can't shrink and directly reuse the existing instance state enum without + leaving a set of "reserved"/"unused" variants around. +* Even if it did, Postgres doesn't support the `IF EXISTS` qualifier for many + `ALTER TYPE` and `ALTER TABLE` statements, e.g. `ALTER TABLE RENAME COLUMN` + and `ALTER TYPE RENAME TO`. There are ways to work around this (e.g. put the + statement into a user-defined function or code block and catch the relevant + exceptions from it), but CockroachDB v22.1.9 doesn't support UDFs (support + was added in v22.2). + +These limitations make it hard to change the schema idempotently. To get around +this, the change uses the following general procedure to change a column's type +from one enum to another: + +. Create a new enum with the variants of interest. +. Create a new temporary column to hold the old object state. (Adding a column + supports `IF NOT EXISTS`). +. Copy the old object state to the temporary column. +. Drop the old column (this supports `IF EXISTS`). +. Recreate the state column with the new type. +. Populate the column's values using the data saved in the temporary column. +. Add a `NOT NULL` qualifier to the new column. +. Drop the temporary column. + +Note that deleting and recreating columns this way (instead of modfying them in +place) changes their column indices in the affected table. These columns need to +be moved to the (current) ends of the table definitions in dbinit.sql, or the +schema upgrade tests will fail. + +# Upgrade steps + +The individual transactions in this upgrade do the following: + +* `up01` and `up02` drop views that depend on the `state` column in the `vmm` + table. +* `up03` through `up10` change the `instance` table's state enum using the + procedure described above. +* `up11` through `up18` upgrade the `vmm` table. +* `up19` and `up21` recreate the views deleted at the beginning of this + procedure. +* `up20` deletes the now-unused `instance_state` enum. +* `up22` adds a constraint to the `instance` table that requires that an + instance be in the `vmm` state if and only if it has a non-NULL active + Propolis ID. diff --git a/schema/crdb/separate-instance-and-vmm-states/up01.sql b/schema/crdb/separate-instance-and-vmm-states/up01.sql new file mode 100644 index 0000000000..1d8fee0b05 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up01.sql @@ -0,0 +1 @@ +DROP VIEW IF EXISTS omicron.public.sled_instance; diff --git a/schema/crdb/separate-instance-and-vmm-states/up02.sql b/schema/crdb/separate-instance-and-vmm-states/up02.sql new file mode 100644 index 0000000000..aebe0119f5 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up02.sql @@ -0,0 +1 @@ +DROP VIEW IF EXISTS omicron.public.v2p_mapping_view; diff --git a/schema/crdb/separate-instance-and-vmm-states/up03.sql b/schema/crdb/separate-instance-and-vmm-states/up03.sql new file mode 100644 index 0000000000..d7f40f8c5c --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up03.sql @@ -0,0 +1,7 @@ +CREATE TYPE IF NOT EXISTS omicron.public.instance_state_v2 AS ENUM ( + 'creating', + 'no_vmm', + 'vmm', + 'failed', + 'destroyed' +); diff --git a/schema/crdb/separate-instance-and-vmm-states/up04.sql b/schema/crdb/separate-instance-and-vmm-states/up04.sql new file mode 100644 index 0000000000..c2e2e59191 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up04.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance ADD COLUMN IF NOT EXISTS downlevel_state omicron.public.instance_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up05.sql b/schema/crdb/separate-instance-and-vmm-states/up05.sql new file mode 100644 index 0000000000..89d57a3260 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up05.sql @@ -0,0 +1,2 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.instance SET downlevel_state = state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up06.sql b/schema/crdb/separate-instance-and-vmm-states/up06.sql new file mode 100644 index 0000000000..b3d61de712 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up06.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance DROP COLUMN IF EXISTS state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up07.sql b/schema/crdb/separate-instance-and-vmm-states/up07.sql new file mode 100644 index 0000000000..8a21a819be --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up07.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance ADD COLUMN IF NOT EXISTS state omicron.public.instance_state_v2; diff --git a/schema/crdb/separate-instance-and-vmm-states/up08.sql b/schema/crdb/separate-instance-and-vmm-states/up08.sql new file mode 100644 index 0000000000..82346ee6a4 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up08.sql @@ -0,0 +1,8 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.instance SET state = CASE + WHEN downlevel_state = 'creating' THEN 'creating' + WHEN downlevel_state = 'failed' THEN 'failed' + WHEN downlevel_state = 'destroyed' THEN 'destroyed' + WHEN active_propolis_id IS NOT NULL THEN 'vmm' + ELSE 'no_vmm' +END; diff --git a/schema/crdb/separate-instance-and-vmm-states/up09.sql b/schema/crdb/separate-instance-and-vmm-states/up09.sql new file mode 100644 index 0000000000..1036c339b8 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up09.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance ALTER COLUMN state SET NOT NULL; diff --git a/schema/crdb/separate-instance-and-vmm-states/up10.sql b/schema/crdb/separate-instance-and-vmm-states/up10.sql new file mode 100644 index 0000000000..fbce77e29c --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up10.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance DROP COLUMN IF EXISTS downlevel_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up11.sql b/schema/crdb/separate-instance-and-vmm-states/up11.sql new file mode 100644 index 0000000000..39aa085b6d --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up11.sql @@ -0,0 +1,10 @@ +CREATE TYPE IF NOT EXISTS omicron.public.vmm_state AS ENUM ( + 'starting', + 'running', + 'stopping', + 'stopped', + 'rebooting', + 'migrating', + 'failed', + 'destroyed' +); diff --git a/schema/crdb/separate-instance-and-vmm-states/up12.sql b/schema/crdb/separate-instance-and-vmm-states/up12.sql new file mode 100644 index 0000000000..8bfa1e7623 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up12.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm ADD COLUMN IF NOT EXISTS downlevel_state omicron.public.instance_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up13.sql b/schema/crdb/separate-instance-and-vmm-states/up13.sql new file mode 100644 index 0000000000..8f906bcb27 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up13.sql @@ -0,0 +1,2 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.vmm SET downlevel_state = state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up14.sql b/schema/crdb/separate-instance-and-vmm-states/up14.sql new file mode 100644 index 0000000000..65d2e99f38 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up14.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm DROP COLUMN IF EXISTS state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up15.sql b/schema/crdb/separate-instance-and-vmm-states/up15.sql new file mode 100644 index 0000000000..3f3f80d508 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up15.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm ADD COLUMN IF NOT EXISTS state omicron.public.vmm_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up16.sql b/schema/crdb/separate-instance-and-vmm-states/up16.sql new file mode 100644 index 0000000000..0e7b8f8a82 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up16.sql @@ -0,0 +1,2 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.vmm SET state = downlevel_state::text::vmm_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up17.sql b/schema/crdb/separate-instance-and-vmm-states/up17.sql new file mode 100644 index 0000000000..7cc912ba99 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up17.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm ALTER COLUMN state SET NOT NULL; diff --git a/schema/crdb/separate-instance-and-vmm-states/up18.sql b/schema/crdb/separate-instance-and-vmm-states/up18.sql new file mode 100644 index 0000000000..349f05d7ec --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up18.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm DROP COLUMN IF EXISTS downlevel_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up19.sql b/schema/crdb/separate-instance-and-vmm-states/up19.sql new file mode 100644 index 0000000000..b1a96ece52 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up19.sql @@ -0,0 +1,23 @@ +CREATE OR REPLACE VIEW omicron.public.sled_instance +AS SELECT + instance.id, + instance.name, + silo.name as silo_name, + project.name as project_name, + vmm.sled_id as active_sled_id, + instance.time_created, + instance.time_modified, + instance.migration_id, + instance.ncpus, + instance.memory, + vmm.state +FROM + omicron.public.instance AS instance + JOIN omicron.public.project AS project ON + instance.project_id = project.id + JOIN omicron.public.silo AS silo ON + project.silo_id = silo.id + JOIN omicron.public.vmm AS vmm ON + instance.active_propolis_id = vmm.id +WHERE + instance.time_deleted IS NULL AND vmm.time_deleted IS NULL; diff --git a/schema/crdb/separate-instance-and-vmm-states/up20.sql b/schema/crdb/separate-instance-and-vmm-states/up20.sql new file mode 100644 index 0000000000..7575559a2f --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up20.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.instance_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up21.sql b/schema/crdb/separate-instance-and-vmm-states/up21.sql new file mode 100644 index 0000000000..a0fe7b7a48 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up21.sql @@ -0,0 +1,42 @@ +CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view +AS +WITH VmV2pMappings AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id + JOIN omicron.public.sled s ON vmm.sled_id = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'instance' + AND (vmm.state = 'running' OR vmm.state = 'starting') + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +), +ProbeV2pMapping AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.probe p ON n.parent_id = p.id + JOIN omicron.public.sled s ON p.sled = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'probe' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +) +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings +UNION +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping; diff --git a/schema/crdb/separate-instance-and-vmm-states/up22.sql b/schema/crdb/separate-instance-and-vmm-states/up22.sql new file mode 100644 index 0000000000..cf884a9c68 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up22.sql @@ -0,0 +1,5 @@ +ALTER TABLE omicron.public.instance +ADD CONSTRAINT IF NOT EXISTS vmm_iff_active_propolis CHECK ( + ((state = 'vmm') AND (active_propolis_id IS NOT NULL)) OR + ((state != 'vmm') AND (active_propolis_id IS NULL)) +) diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index d7ee8982e0..62af337c4c 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -6,9 +6,8 @@ use crate::params::InstanceMigrationSourceParams; use chrono::{DateTime, Utc}; -use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, SledInstanceState, VmmRuntimeState, + InstanceRuntimeState, SledInstanceState, VmmRuntimeState, VmmState, }; use propolis_client::types::{ InstanceState as PropolisApiState, InstanceStateMonitorResponse, @@ -35,7 +34,7 @@ impl From for PropolisInstanceState { } } -impl From for ApiInstanceState { +impl From for VmmState { fn from(value: PropolisInstanceState) -> Self { use propolis_client::types::InstanceState as State; match value.0 { @@ -43,25 +42,28 @@ impl From for ApiInstanceState { // when an instance has an active VMM. A Propolis that is "creating" // its virtual machine objects is "starting" from the external API's // perspective. - State::Creating | State::Starting => ApiInstanceState::Starting, - State::Running => ApiInstanceState::Running, - State::Stopping => ApiInstanceState::Stopping, + State::Creating | State::Starting => VmmState::Starting, + State::Running => VmmState::Running, + State::Stopping => VmmState::Stopping, // A Propolis that is stopped but not yet destroyed should still // appear to be Stopping from an external API perspective, since // they cannot be restarted yet. Instances become logically Stopped // once Propolis reports that the VM is Destroyed (see below). - State::Stopped => ApiInstanceState::Stopping, - State::Rebooting => ApiInstanceState::Rebooting, - State::Migrating => ApiInstanceState::Migrating, - State::Repairing => ApiInstanceState::Repairing, - State::Failed => ApiInstanceState::Failed, + State::Stopped => VmmState::Stopping, + State::Rebooting => VmmState::Rebooting, + State::Migrating => VmmState::Migrating, + State::Failed => VmmState::Failed, // Nexus needs to learn when a VM has entered the "destroyed" state // so that it can release its resource reservation. When this // happens, this module also clears the active VMM ID from the // instance record, which will accordingly set the Nexus-owned // instance state to Stopped, preventing this state from being used // as an externally-visible instance state. - State::Destroyed => ApiInstanceState::Destroyed, + State::Destroyed => VmmState::Destroyed, + // Propolis never actually uses the Repairing state, so this should + // be unreachable, but since these states come from another process, + // program defensively and convert Repairing to Running. + State::Repairing => VmmState::Running, } } } @@ -170,11 +172,11 @@ pub enum PublishedVmmState { Rebooting, } -impl From for ApiInstanceState { +impl From for VmmState { fn from(value: PublishedVmmState) -> Self { match value { - PublishedVmmState::Stopping => ApiInstanceState::Stopping, - PublishedVmmState::Rebooting => ApiInstanceState::Rebooting, + PublishedVmmState::Stopping => VmmState::Stopping, + PublishedVmmState::Rebooting => VmmState::Rebooting, } } } @@ -525,7 +527,7 @@ mod test { }; let vmm = VmmRuntimeState { - state: ApiInstanceState::Starting, + state: VmmState::Starting, gen: Generation::new(), time_updated: now, }; @@ -535,7 +537,7 @@ mod test { fn make_migration_source_instance() -> InstanceStates { let mut state = make_instance(); - state.vmm.state = ApiInstanceState::Migrating; + state.vmm.state = VmmState::Migrating; state.instance.migration_id = Some(Uuid::new_v4()); state.instance.dst_propolis_id = Some(Uuid::new_v4()); state @@ -543,7 +545,7 @@ mod test { fn make_migration_target_instance() -> InstanceStates { let mut state = make_instance(); - state.vmm.state = ApiInstanceState::Migrating; + state.vmm.state = VmmState::Migrating; state.instance.migration_id = Some(Uuid::new_v4()); state.propolis_id = Uuid::new_v4(); state.instance.dst_propolis_id = Some(state.propolis_id); @@ -661,7 +663,7 @@ mod test { // The Stopped state is translated internally to Stopping to prevent // external viewers from perceiving that the instance is stopped before // the VMM is fully retired. - assert_eq!(state.vmm.state, ApiInstanceState::Stopping); + assert_eq!(state.vmm.state, VmmState::Stopping); assert!(state.vmm.gen > prev.vmm.gen); let prev = state.clone(); @@ -672,7 +674,7 @@ mod test { )); assert_state_change_has_gen_change(&prev, &state); assert_eq!(state.instance.gen, prev.instance.gen); - assert_eq!(state.vmm.state, ApiInstanceState::Destroyed); + assert_eq!(state.vmm.state, VmmState::Destroyed); assert!(state.vmm.gen > prev.vmm.gen); } @@ -695,7 +697,7 @@ mod test { )); assert_state_change_has_gen_change(&prev, &state); assert_eq!(state.instance.gen, prev.instance.gen); - assert_eq!(state.vmm.state, ApiInstanceState::Failed); + assert_eq!(state.vmm.state, VmmState::Failed); assert!(state.vmm.gen > prev.vmm.gen); } @@ -734,7 +736,7 @@ mod test { assert!(state.instance.migration_id.is_none()); assert!(state.instance.dst_propolis_id.is_none()); assert!(state.instance.gen > prev.instance.gen); - assert_eq!(state.vmm.state, ApiInstanceState::Running); + assert_eq!(state.vmm.state, VmmState::Running); assert!(state.vmm.gen > prev.vmm.gen); // Pretend Nexus set some new migration IDs. @@ -766,7 +768,7 @@ mod test { state.instance.dst_propolis_id.unwrap(), prev.instance.dst_propolis_id.unwrap() ); - assert_eq!(state.vmm.state, ApiInstanceState::Migrating); + assert_eq!(state.vmm.state, VmmState::Migrating); assert!(state.vmm.gen > prev.vmm.gen); assert_eq!(state.instance.gen, prev.instance.gen); @@ -778,7 +780,7 @@ mod test { observed.migration_status = ObservedMigrationStatus::Succeeded; assert!(state.apply_propolis_observation(&observed).is_none()); assert_state_change_has_gen_change(&prev, &state); - assert_eq!(state.vmm.state, ApiInstanceState::Migrating); + assert_eq!(state.vmm.state, VmmState::Migrating); assert!(state.vmm.gen > prev.vmm.gen); assert_eq!(state.instance.migration_id, prev.instance.migration_id); assert_eq!( diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 271eceb556..c6c567595d 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1539,9 +1539,9 @@ mod tests { use illumos_utils::zone::__mock_MockZones::__id::Context as MockZonesIdContext; use internal_dns::resolver::Resolver; use omicron_common::api::external::{ - ByteCount, Generation, Hostname, InstanceCpuCount, InstanceState, + ByteCount, Generation, Hostname, InstanceCpuCount, }; - use omicron_common::api::internal::nexus::InstanceProperties; + use omicron_common::api::internal::nexus::{InstanceProperties, VmmState}; use omicron_common::FileKv; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::net::Ipv6Addr; @@ -1781,7 +1781,7 @@ mod tests { time_updated: Default::default(), }, vmm_runtime: VmmRuntimeState { - state: InstanceState::Creating, + state: VmmState::Starting, gen: Generation::new(), time_updated: Default::default(), }, @@ -1880,7 +1880,7 @@ mod tests { TIMEOUT_DURATION, state_rx.wait_for(|maybe_state| match maybe_state { ReceivedInstanceState::InstancePut(sled_inst_state) => { - sled_inst_state.vmm_state.state == InstanceState::Running + sled_inst_state.vmm_state.state == VmmState::Running } _ => false, }), @@ -1953,7 +1953,7 @@ mod tests { .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); if let ReceivedInstanceState::InstancePut(SledInstanceState { - vmm_state: VmmRuntimeState { state: InstanceState::Running, .. }, + vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() { @@ -2035,7 +2035,7 @@ mod tests { .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); if let ReceivedInstanceState::InstancePut(SledInstanceState { - vmm_state: VmmRuntimeState { state: InstanceState::Running, .. }, + vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() { @@ -2138,7 +2138,7 @@ mod tests { TIMEOUT_DURATION, state_rx.wait_for(|maybe_state| match maybe_state { ReceivedInstanceState::InstancePut(sled_inst_state) => { - sled_inst_state.vmm_state.state == InstanceState::Running + sled_inst_state.vmm_state.state == VmmState::Running } _ => false, }), diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index bbc3e440ab..f5be31bd37 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -421,11 +421,11 @@ mod test { use omicron_common::api::external::DiskState; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; - use omicron_common::api::external::InstanceState; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::VmmRuntimeState; + use omicron_common::api::internal::nexus::VmmState; use omicron_test_utils::dev::test_setup_log; use uuid::Uuid; @@ -442,7 +442,7 @@ mod test { }; let vmm_state = VmmRuntimeState { - state: InstanceState::Starting, + state: VmmState::Starting, gen: Generation::new(), time_updated: Utc::now(), }; @@ -478,7 +478,7 @@ mod test { let r1 = instance.object.current(); info!(logctx.log, "new instance"; "state" => ?r1); - assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); // There's no asynchronous transition going on yet so a @@ -508,7 +508,7 @@ mod test { ); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(rnext.instance_state.propolis_id.is_none()); - assert_eq!(rnext.vmm_state.state, InstanceState::Destroyed); + assert_eq!(rnext.vmm_state.state, VmmState::Destroyed); assert!(rx.try_next().is_err()); logctx.cleanup_successful(); @@ -524,7 +524,7 @@ mod test { let r1 = instance.object.current(); info!(logctx.log, "new instance"; "state" => ?r1); - assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); // There's no asynchronous transition going on yet so a @@ -553,7 +553,7 @@ mod test { let rnext = instance.object.current(); assert_eq!(rnext.vmm_state.gen, rprev.vmm_state.gen); assert_eq!(rnext.vmm_state.time_updated, rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Starting); + assert_eq!(rnext.vmm_state.state, VmmState::Starting); rprev = rnext; // Now poke the instance. It should transition to Running. @@ -563,8 +563,8 @@ mod test { assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_none()); assert!(rx.try_next().is_err()); - assert_eq!(rprev.vmm_state.state, InstanceState::Starting); - assert_eq!(rnext.vmm_state.state, InstanceState::Running); + assert_eq!(rprev.vmm_state.state, VmmState::Starting); + assert_eq!(rnext.vmm_state.state, VmmState::Running); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); rprev = rnext; @@ -596,7 +596,7 @@ mod test { let rnext = instance.object.current(); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Stopping); rprev = rnext; // Propolis publishes its own transition to Stopping before it publishes @@ -606,8 +606,8 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_some()); - assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); - assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); + assert_eq!(rprev.vmm_state.state, VmmState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Stopping); rprev = rnext; // The Stopping-to-Stopped transition is masked from external viewers of @@ -618,8 +618,8 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_some()); - assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); - assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); + assert_eq!(rprev.vmm_state.state, VmmState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Stopping); rprev = rnext; // ...and Stopped (internally) goes to Destroyed. This transition is @@ -629,8 +629,8 @@ mod test { let rnext = instance.object.current(); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); - assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); - assert_eq!(rnext.vmm_state.state, InstanceState::Destroyed); + assert_eq!(rprev.vmm_state.state, VmmState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Destroyed); assert!(rnext.instance_state.gen > rprev.instance_state.gen); logctx.cleanup_successful(); } @@ -645,7 +645,7 @@ mod test { let r1 = instance.object.current(); info!(logctx.log, "new instance"; "state" => ?r1); - assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); assert!(instance .transition(InstanceStateRequested::Running) @@ -670,7 +670,7 @@ mod test { let (rprev, rnext) = (rnext, instance.object.current()); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Rebooting); + assert_eq!(rnext.vmm_state.state, VmmState::Rebooting); instance.transition_finish(); let (rprev, rnext) = (rnext, instance.object.current()); @@ -681,7 +681,7 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Rebooting); + assert_eq!(rnext.vmm_state.state, VmmState::Rebooting); assert!(instance.object.desired().is_some()); instance.transition_finish(); let (rprev, rnext) = (rnext, instance.object.current()); @@ -693,7 +693,7 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Running); + assert_eq!(rnext.vmm_state.state, VmmState::Running); logctx.cleanup_successful(); } diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 8b00adce60..ed88dbcc6f 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -14,10 +14,9 @@ use chrono::Utc; use nexus_client; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; -use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, SledInstanceState, + InstanceRuntimeState, SledInstanceState, VmmState, }; use propolis_client::types::{ InstanceMigrateStatusResponse as PropolisMigrateStatus, @@ -32,7 +31,7 @@ use crate::common::instance::{Action as InstanceAction, InstanceStates}; #[derive(Clone, Debug)] enum MonitorChange { - InstanceState(PropolisInstanceState), + PropolisState(PropolisInstanceState), MigrateStatus(PropolisMigrateStatus), } @@ -68,7 +67,7 @@ struct SimInstanceInner { impl SimInstanceInner { /// Pushes a Propolis instance state transition to the state change queue. fn queue_propolis_state(&mut self, propolis_state: PropolisInstanceState) { - self.queue.push_back(MonitorChange::InstanceState(propolis_state)); + self.queue.push_back(MonitorChange::PropolisState(propolis_state)); } /// Pushes a Propolis migration status to the state change queue. @@ -85,7 +84,7 @@ impl SimInstanceInner { self.queue .iter() .filter_map(|entry| match entry { - MonitorChange::InstanceState(state) => Some(state), + MonitorChange::PropolisState(state) => Some(state), _ => None, }) .last() @@ -111,7 +110,7 @@ impl SimInstanceInner { self ))); } - if self.state.vmm().state != ApiInstanceState::Migrating { + if self.state.vmm().state != VmmState::Migrating { return Err(Error::invalid_request(&format!( "can't request migration in for a vmm that wasn't \ created in the migrating state (current state: {:?})", @@ -142,32 +141,25 @@ impl SimInstanceInner { } InstanceStateRequested::Running => { match self.next_resting_state() { - // It's only valid to request the Running state after - // successfully registering a VMM, and a registered VMM - // should never be in the Creating state. - ApiInstanceState::Creating => unreachable!( - "VMMs should never try to reach the Creating state" - ), - ApiInstanceState::Starting => { + VmmState::Starting => { self.queue_propolis_state( PropolisInstanceState::Running, ); } - ApiInstanceState::Running - | ApiInstanceState::Rebooting - | ApiInstanceState::Migrating => {} + VmmState::Running + | VmmState::Rebooting + | VmmState::Migrating => {} // Propolis forbids direct transitions from a stopped state // back to a running state. Callers who want to restart a // stopped instance must recreate it. - ApiInstanceState::Stopping - | ApiInstanceState::Stopped - | ApiInstanceState::Repairing - | ApiInstanceState::Failed - | ApiInstanceState::Destroyed => { + VmmState::Stopping + | VmmState::Stopped + | VmmState::Failed + | VmmState::Destroyed => { return Err(Error::invalid_request(&format!( "can't request state Running with pending resting \ - state {} (current state: {:?})", + state {:?} (current state: {:?})", self.next_resting_state(), self ))) @@ -176,13 +168,10 @@ impl SimInstanceInner { } InstanceStateRequested::Stopped => { match self.next_resting_state() { - ApiInstanceState::Creating => unreachable!( - "VMMs should never try to reach the Creating state" - ), - ApiInstanceState::Starting => { + VmmState::Starting => { self.state.terminate_rudely(); } - ApiInstanceState::Running => { + VmmState::Running => { self.state.transition_vmm( PublishedVmmState::Stopping, Utc::now(), @@ -199,13 +188,13 @@ impl SimInstanceInner { } // Idempotently allow requests to stop an instance that is // already stopping. - ApiInstanceState::Stopping - | ApiInstanceState::Stopped - | ApiInstanceState::Destroyed => {} + VmmState::Stopping + | VmmState::Stopped + | VmmState::Destroyed => {} _ => { return Err(Error::invalid_request(&format!( "can't request state Stopped with pending resting \ - state {} (current state: {:?})", + state {:?} (current state: {:?})", self.next_resting_state(), self ))) @@ -213,10 +202,10 @@ impl SimInstanceInner { } } InstanceStateRequested::Reboot => match self.next_resting_state() { - ApiInstanceState::Running => { + VmmState::Running => { // Further requests to reboot are ignored if the instance // is currently rebooting or about to reboot. - if self.state.vmm().state != ApiInstanceState::Rebooting + if self.state.vmm().state != VmmState::Rebooting && !self.reboot_pending() { self.state.transition_vmm( @@ -233,7 +222,7 @@ impl SimInstanceInner { } _ => { return Err(Error::invalid_request(&format!( - "can't request Reboot with pending resting state {} \ + "can't request Reboot with pending resting state {:?} \ (current state: {:?})", self.next_resting_state(), self @@ -249,7 +238,7 @@ impl SimInstanceInner { fn execute_desired_transition(&mut self) -> Option { if let Some(change) = self.queue.pop_front() { match change { - MonitorChange::InstanceState(state) => { + MonitorChange::PropolisState(state) => { if matches!(state, PropolisInstanceState::Destroyed) { self.destroyed = true; } @@ -305,25 +294,26 @@ impl SimInstanceInner { /// Returns the "resting" state the simulated instance will reach if its /// queue is drained. - fn next_resting_state(&self) -> ApiInstanceState { + fn next_resting_state(&self) -> VmmState { if self.queue.is_empty() { self.state.vmm().state } else { if let Some(last_state) = self.last_queued_instance_state() { - use ApiInstanceState as ApiState; use PropolisInstanceState as PropolisState; match last_state { PropolisState::Creating | PropolisState::Starting => { - ApiState::Starting + VmmState::Starting + } + PropolisState::Running => VmmState::Running, + PropolisState::Stopping => VmmState::Stopping, + PropolisState::Stopped => VmmState::Stopped, + PropolisState::Rebooting => VmmState::Rebooting, + PropolisState::Migrating => VmmState::Migrating, + PropolisState::Failed => VmmState::Failed, + PropolisState::Destroyed => VmmState::Destroyed, + PropolisState::Repairing => { + unreachable!("Propolis doesn't use the Repairing state") } - PropolisState::Running => ApiState::Running, - PropolisState::Stopping => ApiState::Stopping, - PropolisState::Stopped => ApiState::Stopped, - PropolisState::Rebooting => ApiState::Rebooting, - PropolisState::Migrating => ApiState::Migrating, - PropolisState::Repairing => ApiState::Repairing, - PropolisState::Failed => ApiState::Failed, - PropolisState::Destroyed => ApiState::Destroyed, } } else { self.state.vmm().state @@ -337,7 +327,7 @@ impl SimInstanceInner { self.queue.iter().any(|s| { matches!( s, - MonitorChange::InstanceState(PropolisInstanceState::Rebooting) + MonitorChange::PropolisState(PropolisInstanceState::Rebooting) ) }) } @@ -419,7 +409,7 @@ impl Simulatable for SimInstance { fn new(current: SledInstanceState) -> Self { assert!(matches!( current.vmm_state.state, - ApiInstanceState::Starting | ApiInstanceState::Migrating), + VmmState::Starting | VmmState::Migrating), "new VMMs should always be registered in the Starting or Migrating \ state (supplied state: {:?})", current.vmm_state.state From 07e9ab9cfa39e639380cf1e1881e7f9fcbf1c924 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 5 Jun 2024 23:17:52 +0100 Subject: [PATCH 14/16] Update OPTE to 0.31.262 (417f74e) (#5852) This PR includes one soundness fix over the current OPTE version, as well as dependency updates. * Correctly scope lock use in xde_detach (oxidecomputer/opte#545) * Lock file maintenance (oxidecomputer/opte#544) * Lock file maintenance (oxidecomputer/opte#541) --- Cargo.lock | 12 ++++++------ Cargo.toml | 4 ++-- tools/opte_version | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b73743ef3..b684da1dda 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3450,7 +3450,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" +source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" [[package]] name = "illumos-utils" @@ -3864,7 +3864,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" +source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" dependencies = [ "quote", "syn 2.0.64", @@ -6075,7 +6075,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" +source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" dependencies = [ "cfg-if", "dyn-clone", @@ -6092,7 +6092,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" +source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6104,7 +6104,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" +source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6178,7 +6178,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=d6177ca84f23e60a661461bb4cece475689502d2#d6177ca84f23e60a661461bb4cece475689502d2" +source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" dependencies = [ "cfg-if", "illumos-sys-hdrs", diff --git a/Cargo.toml b/Cargo.toml index f9b4906779..489c7a1552 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -357,14 +357,14 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "d6177ca84f23e60a661461bb4cece475689502d2", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "417f74e94978c23f3892ac328c3387f3ecd9bb29", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "d6177ca84f23e60a661461bb4cece475689502d2" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "417f74e94978c23f3892ac328c3387f3ecd9bb29" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } diff --git a/tools/opte_version b/tools/opte_version index 6126a52eb4..529b93110f 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.31.259 +0.31.262 From 9d67e427c2073c7dffe65758f43fe10fc9888d35 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 5 Jun 2024 16:49:40 -0700 Subject: [PATCH 15/16] Add `VmmState::SagaUnwound` (#5855) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This branch is part of ongoing work on the `instance-update` saga (see PR #5749); I've factored it out into a separate PR. This is largely because this branch makes mechanical changes to a bunch of different files that aren't directly related to the core change of #5749, and I'd like to avoid the additional merge conflicts that are likely when these changes remain un-merged for a long time. --- Depends on #5854 and should merge only after that PR. As part of #5749, it is desirable to distinguish between *why* a VMM record was marked as `Destroyed`, in order to determine which saga is responsible for cleaning up that VMM's resources. The `instance-update` saga must be the only entity that can set an instance's VMM IDs (active and target) to NULL. Presently, however, the `instance-start` and `instance-migrate` sagas may also do this when they unwind. This is a requirement to avoid situations where a failed `instance-update` saga cannot unwind, because the instance's generation number has changed while the saga was executing. We want to ensure that if a start or migrate request fails, the instance will appear to be in the correct post-state as soon as the relevant API call returns. In order to do this, without having to also guarantee that an instance update has occurred, we introduce a new VMM state, `SagaUnwound`, with the following rules: - It is legal to start or migrate an instance if the `active_propolis_id` or `destination_propolis_id` (respectively) is either `NULL` or refers to a VMM that’s in the `SagaUnwound` state (the new VMM ID directly replaces the old ID). - If an instance has an `active_propolis_id` in the `SagaUnwound` state, then the instance appears to be `Stopped`. - If an instance has a `destination_propolis_id` in the `SagaUnwound` state, nothing special happens–the instance’s state is still derived from its active VMM’s state. - The update saga treats `SagaUnwound` VMMs as identical to `Destroyed` VMMs for purposes of deciding whether to remove a VMM ID from an instance. This branch adds a new `VmmState::SagaUnwound` variant. The `SagaUnwound` state is an internal implementation detail that shouldn't be exposed to an operator or in the external API. Sled-agents will never report that a VMM is in this state. Instead, this state is set my the `instance-start` and `instance-migrate` sagas when they unwind. When determining the API instance state from an instance and active VMM query so that the `SagaUnwound` state is mapped to `Destroyed`. Closes #5848, which this replaces. --- nexus/db-model/src/schema_versions.rs | 3 ++- nexus/db-model/src/vmm_state.rs | 13 ++++++++++--- nexus/src/app/instance.rs | 4 ++-- nexus/src/app/sagas/instance_common.rs | 13 ++++++++----- nexus/src/app/sagas/instance_migrate.rs | 2 +- nexus/src/app/sagas/instance_start.rs | 2 +- schema/crdb/add-saga-unwound-vmm-state/up.sql | 2 ++ schema/crdb/dbinit.sql | 5 +++-- 8 files changed, 29 insertions(+), 15 deletions(-) create mode 100644 schema/crdb/add-saga-unwound-vmm-state/up.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 09039c952b..b598288c4d 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(70, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(71, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(71, "add-saga-unwound-vmm-state"), KnownVersion::new(70, "separate-instance-and-vmm-states"), KnownVersion::new(69, "expose-stage0"), KnownVersion::new(68, "filter-v2p-mapping-by-instance-state"), diff --git a/nexus/db-model/src/vmm_state.rs b/nexus/db-model/src/vmm_state.rs index f737f48f69..121daaf7dd 100644 --- a/nexus/db-model/src/vmm_state.rs +++ b/nexus/db-model/src/vmm_state.rs @@ -24,6 +24,7 @@ impl_enum_type!( Migrating => b"migrating" Failed => b"failed" Destroyed => b"destroyed" + SagaUnwound => b"saga_unwound" ); impl VmmState { @@ -37,6 +38,7 @@ impl VmmState { VmmState::Migrating => "migrating", VmmState::Failed => "failed", VmmState::Destroyed => "destroyed", + VmmState::SagaUnwound => "saga_unwound", } } } @@ -58,7 +60,7 @@ impl From for omicron_common::api::internal::nexus::VmmState { VmmState::Rebooting => Output::Rebooting, VmmState::Migrating => Output::Migrating, VmmState::Failed => Output::Failed, - VmmState::Destroyed => Output::Destroyed, + VmmState::Destroyed | VmmState::SagaUnwound => Output::Destroyed, } } } @@ -74,7 +76,7 @@ impl From for sled_agent_client::types::VmmState { VmmState::Rebooting => Output::Rebooting, VmmState::Migrating => Output::Migrating, VmmState::Failed => Output::Failed, - VmmState::Destroyed => Output::Destroyed, + VmmState::Destroyed | VmmState::SagaUnwound => Output::Destroyed, } } } @@ -104,7 +106,12 @@ impl From for omicron_common::api::external::InstanceState { VmmState::Starting => Output::Starting, VmmState::Running => Output::Running, VmmState::Stopping => Output::Stopping, - VmmState::Stopped => Output::Stopped, + // `SagaUnwound` should map to `Stopped` so that an `instance_view` + // API call that produces an instance with an unwound VMM will appear to + // be `Stopped`. This is because instances with unwound VMMs can + // be started by a subsequent instance-start saga, just like + // instances whose internal state actually is `Stopped`. + VmmState::Stopped | VmmState::SagaUnwound => Output::Stopped, VmmState::Rebooting => Output::Rebooting, VmmState::Migrating => Output::Migrating, VmmState::Failed => Output::Failed, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1132f1f5b8..27f62036b1 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1664,8 +1664,8 @@ impl super::Nexus { vmm.runtime.state, ))) } - DbVmmState::Destroyed => Err(Error::invalid_request( - "cannot connect to serial console of destroyed instance", + DbVmmState::Destroyed | DbVmmState::SagaUnwound => Err(Error::invalid_request( + "cannot connect to serial console of instance in state \"Stopped\"", )), } } else { diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index ba9854c146..14263df0ff 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -111,19 +111,22 @@ pub async fn create_and_insert_vmm_record( Ok(vmm) } -/// Given a previously-inserted VMM record, set its state to Destroyed and then -/// delete it. +/// Given a previously-inserted VMM record, set its state to `SagaUnwound` and +/// then delete it. /// /// This function succeeds idempotently if called with the same parameters, /// provided that the VMM record was not changed by some other actor after the /// calling saga inserted it. -pub async fn destroy_vmm_record( +/// +/// This function is intended to be called when a saga which created a VMM +/// record unwinds. +pub async fn unwind_vmm_record( datastore: &DataStore, opctx: &OpContext, prev_record: &db::model::Vmm, ) -> Result<(), anyhow::Error> { let new_runtime = db::model::VmmRuntimeState { - state: db::model::VmmState::Destroyed, + state: db::model::VmmState::SagaUnwound, time_state_updated: Utc::now(), gen: prev_record.runtime.gen.next().into(), }; @@ -252,7 +255,7 @@ pub async fn instance_ip_get_instance_state( // - starting: see below. match (found_instance_state, found_vmm_state) { // If there's no VMM, the instance is definitely not on any sled. - (InstanceState::NoVmm, _) => { + (InstanceState::NoVmm, _) | (_, Some(VmmState::SagaUnwound)) => { sled_id = None; } diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index db1d838014..1434064666 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -235,7 +235,7 @@ async fn sim_destroy_vmm_record( info!(osagactx.log(), "destroying vmm record for migration unwind"; "propolis_id" => %vmm.id); - super::instance_common::destroy_vmm_record( + super::instance_common::unwind_vmm_record( osagactx.datastore(), &opctx, &vmm, diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index d67ff02c20..0013a63d1a 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -200,7 +200,7 @@ async fn sis_destroy_vmm_record( ); let vmm = sagactx.lookup::("vmm_record")?; - super::instance_common::destroy_vmm_record( + super::instance_common::unwind_vmm_record( osagactx.datastore(), &opctx, &vmm, diff --git a/schema/crdb/add-saga-unwound-vmm-state/up.sql b/schema/crdb/add-saga-unwound-vmm-state/up.sql new file mode 100644 index 0000000000..65ab5b5c85 --- /dev/null +++ b/schema/crdb/add-saga-unwound-vmm-state/up.sql @@ -0,0 +1,2 @@ +ALTER TYPE omicron.public.vmm_state + ADD VALUE IF NOT EXISTS 'saga_unwound' AFTER 'destroyed'; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 7bda66c5f2..9dfad4f393 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -982,7 +982,8 @@ CREATE TYPE IF NOT EXISTS omicron.public.vmm_state AS ENUM ( 'rebooting', 'migrating', 'failed', - 'destroyed' + 'destroyed', + 'saga_unwound' ); /* @@ -4075,7 +4076,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '70.0.0', NULL) + (TRUE, NOW(), NOW(), '71.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 01bc9ad754eb87df6b55a58541559e2754ac7a8c Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Wed, 5 Jun 2024 17:30:24 -0700 Subject: [PATCH 16/16] honor `XTASK_BIN` in `tools/install_*_prerequisites.sh` (#5860) Merging #5482 introduced breakage I didn't account for: being able to run `install_runner_prerequisites.sh` without having Cargo available, which is used by the a4x2 testbed. This change introduces the `XTASK_BIN` environment variable which will be used instead of `cargo xtask` if set. The companion PR in a4x2 is https://github.com/oxidecomputer/testbed/pull/49/files. Tested that all these work: - `./tools/install_builder_prerequisites.sh -y` - `XTASK_BIN=target/debug/xtask ./tools/install_builder_prerequisites.sh -y` (after building xtask) - `XTASK_BIN=target/debug/nonexistent-file ./tools/install_builder_prerequisites.sh -y` (correctly fails with ENOENT) --- tools/install_builder_prerequisites.sh | 11 ++++++++++- tools/install_runner_prerequisites.sh | 11 ++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/install_builder_prerequisites.sh b/tools/install_builder_prerequisites.sh index ead36ca2a9..d2d67f4cdf 100755 --- a/tools/install_builder_prerequisites.sh +++ b/tools/install_builder_prerequisites.sh @@ -85,6 +85,15 @@ function retry exit $retry_rc } +function xtask +{ + if [ -z ${XTASK_BIN+x} ]; then + cargo xtask "$@" + else + "$XTASK_BIN" "$@" + fi +} + # Packages to be installed on all OSes: # # - libpq, the PostgreSQL client lib. @@ -191,7 +200,7 @@ retry install_packages # - Packaging: When constructing packages on Helios, these utilities # are packaged into zones which may be launched by the sled agent. -retry cargo xtask download \ +retry xtask download \ cockroach \ clickhouse \ console \ diff --git a/tools/install_runner_prerequisites.sh b/tools/install_runner_prerequisites.sh index c863afcbd4..a53d339b69 100755 --- a/tools/install_runner_prerequisites.sh +++ b/tools/install_runner_prerequisites.sh @@ -84,6 +84,15 @@ function retry exit $retry_rc } +function xtask +{ + if [ -z ${XTASK_BIN+x} ]; then + cargo xtask "$@" + else + "$XTASK_BIN" "$@" + fi +} + # Packages to be installed Helios: # # - libpq, the PostgreSQL client lib. @@ -154,7 +163,7 @@ if [[ "${HOST_OS}" == "SunOS" ]]; then # Grab the SoftNPU machinery (ASIC simulator, scadm, P4 program, etc.) # # "cargo xtask virtual-hardware create" will use those to setup the softnpu zone - retry cargo xtask download softnpu + retry xtask download softnpu fi echo "All runner prerequisites installed successfully"