From 77a5f469c99663763a576332fa892a444d17a899 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Wed, 17 Jan 2024 22:38:59 -0600 Subject: [PATCH 01/91] Bump web console (#4830) https://github.com/oxidecomputer/console/compare/367142c5...644a45b8 * [644a45b8](https://github.com/oxidecomputer/console/commit/644a45b8) oxidecomputer/console#1882 * [69927db1](https://github.com/oxidecomputer/console/commit/69927db1) oxidecomputer/console#1879 * [a3165dff](https://github.com/oxidecomputer/console/commit/a3165dff) oxidecomputer/console#1883 * [4f706c2b](https://github.com/oxidecomputer/console/commit/4f706c2b) oxidecomputer/console#1877 * [7999343d](https://github.com/oxidecomputer/console/commit/7999343d) oxidecomputer/console#1880 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index f9347b6dbf..e76c29c9a0 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="367142c5ed711e6dcfc59586277775020625bd6a" -SHA2="7e061165950fc064811cc5f26d7b7bd102c0df63797ef05cf73d737c2fdceb87" +COMMIT="644a45b8e4ab673ad51754e372277abc3ddfd036" +SHA2="a059917d826081df04efd44186f6dfeef0099fc53f1e8618796ea990a510f4b0" From 078c85fc73ee1758e4b9595db52f1eccb88177ad Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 18 Jan 2024 05:29:53 +0000 Subject: [PATCH 02/91] Update taiki-e/install-action digest to e7dd06a (#4837) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`4f8e324` -> `e7dd06a`](https://togithub.com/taiki-e/install-action/compare/4f8e324...e7dd06a) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 8711af8f9d..9dd17c985d 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@4f8e32492b3baed061f7836e6a4d40eb19e49b71 # v2 + uses: taiki-e/install-action@e7dd06a5731075458d8bbd3465396374ad0d20cb # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From d25af209c53c74db404e129083e37ecbb19bf7ea Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Thu, 18 Jan 2024 07:15:29 -0800 Subject: [PATCH 03/91] more specific address reservation errors (#4834) --- nexus/db-model/src/ipv4net.rs | 37 +++++++++++++++++++---------- nexus/db-model/src/ipv6net.rs | 26 ++++++++++++++++----- nexus/db-model/src/lib.rs | 40 +++++++++++++++++++++++++------- nexus/db-model/src/vpc_subnet.rs | 37 +++++++++++++++-------------- 4 files changed, 95 insertions(+), 45 deletions(-) diff --git a/nexus/db-model/src/ipv4net.rs b/nexus/db-model/src/ipv4net.rs index cc4af0461e..abd5d6a3aa 100644 --- a/nexus/db-model/src/ipv4net.rs +++ b/nexus/db-model/src/ipv4net.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::vpc_subnet::RequestAddressError; use diesel::backend::Backend; use diesel::deserialize::{self, FromSql}; use diesel::pg::Pg; @@ -32,18 +33,30 @@ NewtypeDeref! { () pub struct Ipv4Net(external::Ipv4Net); } impl Ipv4Net { /// Check if an address is a valid user-requestable address for this subnet - pub fn check_requestable_addr(&self, addr: Ipv4Addr) -> bool { - self.contains(addr) - && ( - // First N addresses are reserved - self.iter() - .take(NUM_INITIAL_RESERVED_IP_ADDRESSES) - .all(|this| this != addr) - ) - && ( - // Last address in the subnet is reserved - addr != self.broadcast() - ) + pub fn check_requestable_addr( + &self, + addr: Ipv4Addr, + ) -> Result<(), RequestAddressError> { + if !self.contains(addr) { + return Err(RequestAddressError::OutsideSubnet( + addr.into(), + self.0 .0.into(), + )); + } + // Only the first N addresses are reserved + if self + .iter() + .take(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .any(|this| this == addr) + { + return Err(RequestAddressError::Reserved); + } + // Last address in the subnet is reserved + if addr == self.broadcast() { + return Err(RequestAddressError::Broadcast); + } + + Ok(()) } } diff --git a/nexus/db-model/src/ipv6net.rs b/nexus/db-model/src/ipv6net.rs index 1297844761..3954b4145e 100644 --- a/nexus/db-model/src/ipv6net.rs +++ b/nexus/db-model/src/ipv6net.rs @@ -15,6 +15,8 @@ use serde::Deserialize; use serde::Serialize; use std::net::Ipv6Addr; +use crate::RequestAddressError; + #[derive( Clone, Copy, @@ -83,13 +85,25 @@ impl Ipv6Net { } /// Check if an address is a valid user-requestable address for this subnet - pub fn check_requestable_addr(&self, addr: Ipv6Addr) -> bool { + pub fn check_requestable_addr( + &self, + addr: Ipv6Addr, + ) -> Result<(), RequestAddressError> { + if !self.contains(addr) { + return Err(RequestAddressError::OutsideSubnet( + addr.into(), + self.0 .0.into(), + )); + } // Only the first N addresses are reserved - self.contains(addr) - && self - .iter() - .take(NUM_INITIAL_RESERVED_IP_ADDRESSES) - .all(|this| this != addr) + if self + .iter() + .take(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .any(|this| this == addr) + { + return Err(RequestAddressError::Reserved); + } + Ok(()) } } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 6b89e5a270..8fdf05e876 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -407,10 +407,13 @@ impl DatabaseString for ProjectRole { #[cfg(test)] mod tests { + use crate::RequestAddressError; + use super::VpcSubnet; use ipnetwork::Ipv4Network; use ipnetwork::Ipv6Network; use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::api::external::IpNet; use omicron_common::api::external::Ipv4Net; use omicron_common::api::external::Ipv6Net; use std::net::IpAddr; @@ -515,18 +518,37 @@ mod tests { #[test] fn test_ip_subnet_check_requestable_address() { let subnet = super::Ipv4Net(Ipv4Net("192.168.0.0/16".parse().unwrap())); - assert!(subnet.check_requestable_addr("192.168.0.10".parse().unwrap())); - assert!(subnet.check_requestable_addr("192.168.1.0".parse().unwrap())); - assert!(!subnet.check_requestable_addr("192.168.0.0".parse().unwrap())); - assert!(subnet.check_requestable_addr("192.168.0.255".parse().unwrap())); - assert!( - !subnet.check_requestable_addr("192.168.255.255".parse().unwrap()) + subnet.check_requestable_addr("192.168.0.10".parse().unwrap()).unwrap(); + subnet.check_requestable_addr("192.168.1.0".parse().unwrap()).unwrap(); + let addr = "192.178.0.10".parse().unwrap(); + assert_eq!( + subnet.check_requestable_addr(addr), + Err(RequestAddressError::OutsideSubnet( + addr.into(), + IpNet::from(subnet.0).into() + )) + ); + assert_eq!( + subnet.check_requestable_addr("192.168.0.0".parse().unwrap()), + Err(RequestAddressError::Reserved) + ); + + subnet + .check_requestable_addr("192.168.0.255".parse().unwrap()) + .unwrap(); + + assert_eq!( + subnet.check_requestable_addr("192.168.255.255".parse().unwrap()), + Err(RequestAddressError::Broadcast) ); let subnet = super::Ipv6Net(Ipv6Net("fd00::/64".parse().unwrap())); - assert!(subnet.check_requestable_addr("fd00::a".parse().unwrap())); - assert!(!subnet.check_requestable_addr("fd00::1".parse().unwrap())); - assert!(subnet.check_requestable_addr("fd00::1:1".parse().unwrap())); + subnet.check_requestable_addr("fd00::a".parse().unwrap()).unwrap(); + assert_eq!( + subnet.check_requestable_addr("fd00::1".parse().unwrap()), + Err(RequestAddressError::Reserved) + ); + subnet.check_requestable_addr("fd00::1:1".parse().unwrap()).unwrap(); } /// Does some basic smoke checks on an impl of `DatabaseString` diff --git a/nexus/db-model/src/vpc_subnet.rs b/nexus/db-model/src/vpc_subnet.rs index 2cc74c177b..99f2c5e3ac 100644 --- a/nexus/db-model/src/vpc_subnet.rs +++ b/nexus/db-model/src/vpc_subnet.rs @@ -14,6 +14,7 @@ use nexus_types::external_api::params; use nexus_types::external_api::views; use nexus_types::identity::Resource; use omicron_common::api::external; +use omicron_common::nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use serde::Deserialize; use serde::Serialize; use std::net::IpAddr; @@ -73,27 +74,27 @@ impl VpcSubnet { &self, addr: IpAddr, ) -> Result<(), external::Error> { - let subnet = match addr { - IpAddr::V4(addr) => { - if self.ipv4_block.check_requestable_addr(addr) { - return Ok(()); - } - ipnetwork::IpNetwork::V4(self.ipv4_block.0 .0) - } - IpAddr::V6(addr) => { - if self.ipv6_block.check_requestable_addr(addr) { - return Ok(()); - } - ipnetwork::IpNetwork::V6(self.ipv6_block.0 .0) - } - }; - Err(external::Error::invalid_request(&format!( - "Address '{}' not in subnet '{}' or is reserved for rack services", - addr, subnet, - ))) + match addr { + IpAddr::V4(addr) => self.ipv4_block.check_requestable_addr(addr), + IpAddr::V6(addr) => self.ipv6_block.check_requestable_addr(addr), + } + .map_err(|e| external::Error::invalid_request(e.to_string())) } } +#[derive(thiserror::Error, Debug, PartialEq)] +pub enum RequestAddressError { + #[error("{} is outside subnet {}", .0, .1)] + OutsideSubnet(IpAddr, ipnetwork::IpNetwork), + #[error( + "The first {} addresses of a subnet are reserved", + NUM_INITIAL_RESERVED_IP_ADDRESSES + )] + Reserved, + #[error("Cannot request a broadcast address")] + Broadcast, +} + impl From for views::VpcSubnet { fn from(subnet: VpcSubnet) -> Self { Self { From 6b20cd25cae650c35f0a5c1d02f8ab5d86a6223d Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 18 Jan 2024 08:06:16 -0800 Subject: [PATCH 04/91] initial draft of blueprints (#4804) --- Cargo.lock | 30 +- Cargo.toml | 4 + clients/nexus-client/Cargo.toml | 1 + clients/nexus-client/src/lib.rs | 21 +- clients/sled-agent-client/src/lib.rs | 60 +- common/src/address.rs | 22 + common/src/api/external/mod.rs | 1 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/nexus.rs | 289 ++++++ dev-tools/omdb/tests/usage_errors.out | 1 + internal-dns/src/config.rs | 26 +- nexus/Cargo.toml | 3 + nexus/db-model/src/external_ip.rs | 4 +- nexus/db-model/src/inventory.rs | 21 +- nexus/db-model/src/schema.rs | 5 + nexus/db-queries/src/authz/api_resources.rs | 59 ++ nexus/db-queries/src/authz/omicron.polar | 18 + nexus/db-queries/src/authz/oso_generic.rs | 2 + .../src/authz/policy_test/resource_builder.rs | 1 + .../src/authz/policy_test/resources.rs | 8 + .../db-queries/src/db/datastore/inventory.rs | 2 +- .../src/db/datastore/network_interface.rs | 4 +- nexus/db-queries/src/db/datastore/zpool.rs | 33 + nexus/db-queries/tests/output/authz-roles.out | 28 + nexus/deployment/Cargo.toml | 23 + nexus/deployment/src/blueprint_builder.rs | 683 +++++++++++++ nexus/deployment/src/ip_allocator.rs | 120 +++ nexus/deployment/src/lib.rs | 120 +++ nexus/deployment/src/planner.rs | 230 +++++ nexus/inventory/src/collector.rs | 9 +- nexus/src/app/deployment.rs | 364 +++++++ nexus/src/app/instance.rs | 5 +- nexus/src/app/instance_network.rs | 8 +- nexus/src/app/mod.rs | 6 + nexus/src/app/vpc.rs | 11 +- nexus/src/internal_api/http_entrypoints.rs | 205 ++++ nexus/test-utils/src/lib.rs | 1 + nexus/tests/integration_tests/instances.rs | 31 +- nexus/tests/integration_tests/sleds.rs | 22 +- nexus/types/Cargo.toml | 1 + nexus/types/src/deployment.rs | 564 +++++++++++ nexus/types/src/external_api/params.rs | 3 + nexus/types/src/external_api/views.rs | 2 +- nexus/types/src/identity.rs | 2 +- nexus/types/src/inventory.rs | 10 +- nexus/types/src/lib.rs | 1 + openapi/nexus-internal.json | 922 +++++++++++++++++- sled-agent/src/bin/sled-agent-sim.rs | 1 + sled-agent/src/params.rs | 2 +- sled-agent/src/rack_setup/plan/service.rs | 6 +- sled-agent/src/sim/config.rs | 7 +- 51 files changed, 3853 insertions(+), 150 deletions(-) create mode 100644 nexus/deployment/Cargo.toml create mode 100644 nexus/deployment/src/blueprint_builder.rs create mode 100644 nexus/deployment/src/ip_allocator.rs create mode 100644 nexus/deployment/src/lib.rs create mode 100644 nexus/deployment/src/planner.rs create mode 100644 nexus/src/app/deployment.rs create mode 100644 nexus/types/src/deployment.rs diff --git a/Cargo.lock b/Cargo.lock index 5fd9103488..bf05be9eba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3477,9 +3477,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6" +checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "ipnetwork" @@ -4069,6 +4069,7 @@ dependencies = [ "chrono", "futures", "ipnetwork", + "nexus-types", "omicron-common", "omicron-passwords", "omicron-workspace-hack", @@ -4203,6 +4204,26 @@ dependencies = [ "serde_json", ] +[[package]] +name = "nexus-deployment" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "internal-dns", + "ipnet", + "ipnetwork", + "nexus-inventory", + "nexus-types", + "omicron-common", + "omicron-test-utils", + "omicron-workspace-hack", + "sled-agent-client", + "slog", + "thiserror", + "uuid", +] + [[package]] name = "nexus-inventory" version = "0.1.0" @@ -4313,6 +4334,7 @@ dependencies = [ "sled-agent-client", "steno", "strum", + "thiserror", "uuid", ] @@ -4763,6 +4785,7 @@ dependencies = [ "hubtools", "hyper", "hyper-rustls", + "illumos-utils", "internal-dns", "ipnetwork", "itertools 0.12.0", @@ -4772,6 +4795,7 @@ dependencies = [ "nexus-db-model", "nexus-db-queries", "nexus-defaults", + "nexus-deployment", "nexus-inventory", "nexus-test-interface", "nexus-test-utils", @@ -4821,6 +4845,7 @@ dependencies = [ "slog", "slog-async", "slog-dtrace", + "slog-error-chain", "slog-term", "sp-sim", "steno", @@ -4873,6 +4898,7 @@ dependencies = [ "serde_json", "sled-agent-client", "slog", + "slog-error-chain", "strum", "subprocess", "tabled", diff --git a/Cargo.toml b/Cargo.toml index d45eb7ef70..0ca70097cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ members = [ "nexus/db-model", "nexus/db-queries", "nexus/defaults", + "nexus/deployment", "nexus/inventory", "nexus/test-interface", "nexus/test-utils-macros", @@ -114,6 +115,7 @@ default-members = [ "nexus/db-model", "nexus/db-queries", "nexus/defaults", + "nexus/deployment", "nexus/inventory", "nexus/types", "oximeter/collector", @@ -228,6 +230,7 @@ installinator-artifact-client = { path = "clients/installinator-artifact-client" installinator-common = { path = "installinator-common" } internal-dns = { path = "internal-dns" } ipcc = { path = "ipcc" } +ipnet = "2.9" ipnetwork = { version = "0.20", features = ["schemars"] } itertools = "0.12.0" key-manager = { path = "key-manager" } @@ -244,6 +247,7 @@ nexus-client = { path = "clients/nexus-client" } nexus-db-model = { path = "nexus/db-model" } nexus-db-queries = { path = "nexus/db-queries" } nexus-defaults = { path = "nexus/defaults" } +nexus-deployment = { path = "nexus/deployment" } nexus-inventory = { path = "nexus/inventory" } omicron-certificates = { path = "certificates" } omicron-passwords = { path = "passwords" } diff --git a/clients/nexus-client/Cargo.toml b/clients/nexus-client/Cargo.toml index 2734142f9f..965e2a7dfb 100644 --- a/clients/nexus-client/Cargo.toml +++ b/clients/nexus-client/Cargo.toml @@ -8,6 +8,7 @@ license = "MPL-2.0" chrono.workspace = true futures.workspace = true ipnetwork.workspace = true +nexus-types.workspace = true omicron-common.workspace = true omicron-passwords.workspace = true progenitor.workspace = true diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 3ecba7e710..1e1cbc31e7 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -22,6 +22,11 @@ progenitor::generate_api!( slog::debug!(log, "client response"; "result" => ?result); }), replace = { + // It's kind of unfortunate to pull in such a complex and unstable type + // as "blueprint" this way, but we have really useful functionality + // (e.g., diff'ing) that's implemented on our local type. + Blueprint = nexus_types::deployment::Blueprint, + Generation = omicron_common::api::external::Generation, Ipv4Network = ipnetwork::Ipv4Network, Ipv6Network = ipnetwork::Ipv6Network, IpNetwork = ipnetwork::IpNetwork, @@ -91,7 +96,7 @@ impl From ) -> Self { Self { dst_propolis_id: s.dst_propolis_id, - gen: s.gen.into(), + gen: s.gen, migration_id: s.migration_id, propolis_id: s.propolis_id, time_updated: s.time_updated, @@ -103,11 +108,7 @@ impl From for types::VmmRuntimeState { fn from(s: omicron_common::api::internal::nexus::VmmRuntimeState) -> Self { - Self { - gen: s.gen.into(), - state: s.state.into(), - time_updated: s.time_updated, - } + Self { gen: s.gen, state: s.state.into(), time_updated: s.time_updated } } } @@ -145,19 +146,13 @@ impl From } } -impl From for types::Generation { - fn from(s: omicron_common::api::external::Generation) -> Self { - Self(i64::from(&s) as u64) - } -} - impl From for types::DiskRuntimeState { fn from(s: omicron_common::api::internal::nexus::DiskRuntimeState) -> Self { Self { disk_state: s.disk_state.into(), - gen: s.gen.into(), + gen: s.gen, time_updated: s.time_updated, } } diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index ee2214c3c2..39de64ec62 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -27,12 +27,15 @@ progenitor::generate_api!( replace = { ByteCount = omicron_common::api::external::ByteCount, Generation = omicron_common::api::external::Generation, + MacAddr = omicron_common::api::external::MacAddr, Name = omicron_common::api::external::Name, SwitchLocation = omicron_common::api::external::SwitchLocation, Ipv6Network = ipnetwork::Ipv6Network, IpNetwork = ipnetwork::IpNetwork, PortFec = omicron_common::api::internal::shared::PortFec, PortSpeed = omicron_common::api::internal::shared::PortSpeed, + SourceNatConfig = omicron_common::api::internal::shared::SourceNatConfig, + Vni = omicron_common::api::external::Vni, } ); @@ -65,6 +68,24 @@ impl types::OmicronZoneType { types::OmicronZoneType::Oximeter { .. } => "oximeter", } } + + /// Identifies whether this is an NTP zone + pub fn is_ntp(&self) -> bool { + match self { + types::OmicronZoneType::BoundaryNtp { .. } + | types::OmicronZoneType::InternalNtp { .. } => true, + + types::OmicronZoneType::Clickhouse { .. } + | types::OmicronZoneType::ClickhouseKeeper { .. } + | types::OmicronZoneType::CockroachDb { .. } + | types::OmicronZoneType::Crucible { .. } + | types::OmicronZoneType::CruciblePantry { .. } + | types::OmicronZoneType::ExternalDns { .. } + | types::OmicronZoneType::InternalDns { .. } + | types::OmicronZoneType::Nexus { .. } + | types::OmicronZoneType::Oximeter { .. } => false, + } + } } impl omicron_common::api::external::ClientError for types::Error { @@ -243,31 +264,6 @@ impl From for omicron_common::api::external::DiskState { } } -impl From for types::Vni { - fn from(v: omicron_common::api::external::Vni) -> Self { - Self(u32::from(v)) - } -} - -impl From for omicron_common::api::external::Vni { - fn from(s: types::Vni) -> Self { - Self::try_from(s.0).unwrap() - } -} - -impl From for types::MacAddr { - fn from(s: omicron_common::api::external::MacAddr) -> Self { - Self::try_from(s.0.to_string()) - .unwrap_or_else(|e| panic!("{}: {}", s.0, e)) - } -} - -impl From for omicron_common::api::external::MacAddr { - fn from(s: types::MacAddr) -> Self { - s.parse().unwrap() - } -} - impl From for types::Ipv4Net { fn from(n: omicron_common::api::external::Ipv4Net) -> Self { Self::try_from(n.to_string()).unwrap_or_else(|e| panic!("{}: {}", n, e)) @@ -424,7 +420,7 @@ impl From use omicron_common::api::internal::nexus::HostIdentifier::*; match s { Ip(net) => Self::Ip(net.into()), - Vpc(vni) => Self::Vpc(vni.into()), + Vpc(vni) => Self::Vpc(vni), } } } @@ -505,23 +501,15 @@ impl From kind: s.kind.into(), name: s.name, ip: s.ip, - mac: s.mac.into(), + mac: s.mac, subnet: s.subnet.into(), - vni: s.vni.into(), + vni: s.vni, primary: s.primary, slot: s.slot, } } } -impl From - for types::SourceNatConfig -{ - fn from(s: omicron_common::api::internal::shared::SourceNatConfig) -> Self { - Self { ip: s.ip, first_port: s.first_port, last_port: s.last_port } - } -} - /// Exposes additional [`Client`] interfaces for use by the test suite. These /// are bonus endpoints, not generated in the real client. #[async_trait] diff --git a/common/src/address.rs b/common/src/address.rs index 78eaee0bb4..0c8df33868 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -168,6 +168,15 @@ pub const RSS_RESERVED_ADDRESSES: u16 = 32; // The maximum number of addresses per sled reserved for control plane services. pub const CP_SERVICES_RESERVED_ADDRESSES: u16 = 0xFFFF; +// Number of addresses reserved (by the Nexus deployment planner) for allocation +// by the sled itself. This is currently used for the first two addresses of +// the sled subnet, which are used for the sled global zone and the switch zone, +// if any. Note that RSS does not honor this yet (in fact, per the above +// RSS_RESERVED_ADDRESSES, it will _only_ choose from this range). And +// historically, systems did not have this reservation at all. So it's not safe +// to assume that addresses in this subnet are available. +pub const SLED_RESERVED_ADDRESSES: u16 = 32; + /// Wraps an [`Ipv6Network`] with a compile-time prefix length. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Hash, PartialEq, Eq)] #[schemars(rename = "Ipv6Subnet")] @@ -279,6 +288,19 @@ impl ReservedRackSubnet { } } +/// Return the list of DNS servers for the rack, given any address in the AZ +/// subnet +pub fn get_internal_dns_server_addresses(addr: Ipv6Addr) -> Vec { + let az_subnet = Ipv6Subnet::::new(addr); + let reserved_rack_subnet = ReservedRackSubnet::new(az_subnet); + let dns_subnets = + &reserved_rack_subnet.get_dns_subnets()[0..DNS_REDUNDANCY]; + dns_subnets + .iter() + .map(|dns_subnet| IpAddr::from(dns_subnet.dns_address().ip())) + .collect() +} + const SLED_AGENT_ADDRESS_INDEX: usize = 1; const SWITCH_ZONE_ADDRESS_INDEX: usize = 2; diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 899f15a04b..68fcb0f9fa 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -717,6 +717,7 @@ pub enum ResourceType { BackgroundTask, BgpConfig, BgpAnnounceSet, + Blueprint, Fleet, Silo, SiloUser, diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 7544374906..e08d5f9477 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -33,6 +33,7 @@ serde.workspace = true serde_json.workspace = true sled-agent-client.workspace = true slog.workspace = true +slog-error-chain.workspace = true strum.workspace = true tabled.workspace = true textwrap.workspace = true diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index df5248b52d..fef069d536 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -11,11 +11,13 @@ use chrono::SecondsFormat; use chrono::Utc; use clap::Args; use clap::Subcommand; +use futures::TryStreamExt; use nexus_client::types::ActivationReason; use nexus_client::types::BackgroundTask; use nexus_client::types::CurrentStatus; use nexus_client::types::LastResult; use serde::Deserialize; +use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use tabled::Tabled; use uuid::Uuid; @@ -36,6 +38,8 @@ pub struct NexusArgs { enum NexusCommands { /// print information about background tasks BackgroundTasks(BackgroundTasksArgs), + /// print information about blueprints + Blueprints(BlueprintsArgs), } #[derive(Debug, Args)] @@ -54,6 +58,64 @@ enum BackgroundTasksCommands { Show, } +#[derive(Debug, Args)] +struct BlueprintsArgs { + #[command(subcommand)] + command: BlueprintsCommands, +} + +#[derive(Debug, Subcommand)] +enum BlueprintsCommands { + /// List all blueprints + List, + /// Show a blueprint + Show(BlueprintIdArgs), + /// Diff two blueprint + Diff(BlueprintIdsArgs), + /// Delete a blueprint + Delete(BlueprintIdArgs), + /// Set the current target blueprint + Target(BlueprintsTargetArgs), + /// Generate an initial blueprint from a specific inventory collection + GenerateFromCollection(CollectionIdArgs), + /// Generate a new blueprint + Regenerate, +} + +#[derive(Debug, Args)] +struct BlueprintIdArgs { + /// id of a blueprint + blueprint_id: Uuid, +} + +#[derive(Debug, Args)] +struct BlueprintIdsArgs { + /// id of first blueprint + blueprint1_id: Uuid, + /// id of second blueprint + blueprint2_id: Uuid, +} + +#[derive(Debug, Args)] +struct CollectionIdArgs { + /// id of an inventory collection + collection_id: Uuid, +} + +#[derive(Debug, Args)] +struct BlueprintsTargetArgs { + #[command(subcommand)] + command: BlueprintTargetCommands, +} + +#[derive(Debug, Subcommand)] +enum BlueprintTargetCommands { + /// Show the current target blueprint + Show, + /// Change the current target blueprint + Set(BlueprintIdArgs), +} + impl NexusArgs { /// Run a `omdb nexus` subcommand. pub(crate) async fn run_cmd( @@ -93,6 +155,40 @@ impl NexusArgs { NexusCommands::BackgroundTasks(BackgroundTasksArgs { command: BackgroundTasksCommands::Show, }) => cmd_nexus_background_tasks_show(&client).await, + + NexusCommands::Blueprints(BlueprintsArgs { + command: BlueprintsCommands::List, + }) => cmd_nexus_blueprints_list(&client).await, + NexusCommands::Blueprints(BlueprintsArgs { + command: BlueprintsCommands::Show(args), + }) => cmd_nexus_blueprints_show(&client, args).await, + NexusCommands::Blueprints(BlueprintsArgs { + command: BlueprintsCommands::Diff(args), + }) => cmd_nexus_blueprints_diff(&client, args).await, + NexusCommands::Blueprints(BlueprintsArgs { + command: BlueprintsCommands::Delete(args), + }) => cmd_nexus_blueprints_delete(&client, args).await, + NexusCommands::Blueprints(BlueprintsArgs { + command: + BlueprintsCommands::Target(BlueprintsTargetArgs { + command: BlueprintTargetCommands::Show, + }), + }) => cmd_nexus_blueprints_target_show(&client).await, + NexusCommands::Blueprints(BlueprintsArgs { + command: + BlueprintsCommands::Target(BlueprintsTargetArgs { + command: BlueprintTargetCommands::Set(args), + }), + }) => cmd_nexus_blueprints_target_set(&client, args).await, + NexusCommands::Blueprints(BlueprintsArgs { + command: BlueprintsCommands::Regenerate, + }) => cmd_nexus_blueprints_regenerate(&client).await, + NexusCommands::Blueprints(BlueprintsArgs { + command: BlueprintsCommands::GenerateFromCollection(args), + }) => { + cmd_nexus_blueprints_generate_from_collection(&client, args) + .await + } } } } @@ -629,3 +725,196 @@ fn reason_code(reason: ActivationReason) -> char { ActivationReason::Timeout => 'T', } } + +async fn cmd_nexus_blueprints_list( + client: &nexus_client::Client, +) -> Result<(), anyhow::Error> { + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct BlueprintRow { + #[tabled(rename = "T")] + is_target: &'static str, + id: String, + parent: String, + time_created: String, + } + + let target_id = match client.blueprint_target_view().await { + Ok(result) => Some(result.into_inner().target_id), + Err(error) => { + // This request will fail if there's no target configured, so it's + // not necessarily a big deal. + eprintln!( + "warn: failed to fetch current target: {}", + InlineErrorChain::new(&error), + ); + None + } + }; + + let rows: Vec = client + .blueprint_list_stream(None, None) + .try_collect::>() + .await + .context("listing blueprints")? + .into_iter() + .map(|blueprint| { + let is_target = match target_id { + Some(target_id) if target_id == blueprint.id => "*", + _ => "", + }; + + BlueprintRow { + is_target, + id: blueprint.id.to_string(), + parent: blueprint + .parent_blueprint_id + .map(|s| s.to_string()) + .unwrap_or_else(|| String::from("")), + time_created: humantime::format_rfc3339_millis( + blueprint.time_created.into(), + ) + .to_string(), + } + }) + .collect(); + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + Ok(()) +} + +async fn cmd_nexus_blueprints_show( + client: &nexus_client::Client, + args: &BlueprintIdArgs, +) -> Result<(), anyhow::Error> { + let blueprint = client + .blueprint_view(&args.blueprint_id) + .await + .with_context(|| format!("fetching blueprint {}", args.blueprint_id))?; + println!("blueprint {}", blueprint.id); + println!( + "parent: {}", + blueprint + .parent_blueprint_id + .map(|u| u.to_string()) + .unwrap_or_else(|| String::from("")) + ); + println!( + "created by {}{}", + blueprint.creator, + if blueprint.creator.parse::().is_ok() { + " (likely a Nexus instance)" + } else { + "" + } + ); + println!( + "created at {}", + humantime::format_rfc3339_millis(blueprint.time_created.into(),) + ); + println!("comment: {}", blueprint.comment); + println!("zones:\n"); + for (sled_id, sled_zones) in &blueprint.omicron_zones { + println!( + " sled {}: Omicron zones at generation {}", + sled_id, sled_zones.generation + ); + for z in &sled_zones.zones { + println!(" {} {}", z.id, z.zone_type.label()); + } + } + + Ok(()) +} + +async fn cmd_nexus_blueprints_diff( + client: &nexus_client::Client, + args: &BlueprintIdsArgs, +) -> Result<(), anyhow::Error> { + let b1 = client.blueprint_view(&args.blueprint1_id).await.with_context( + || format!("fetching blueprint {}", args.blueprint1_id), + )?; + let b2 = client.blueprint_view(&args.blueprint2_id).await.with_context( + || format!("fetching blueprint {}", args.blueprint2_id), + )?; + println!("{}", b1.diff(&b2)); + Ok(()) +} + +async fn cmd_nexus_blueprints_delete( + client: &nexus_client::Client, + args: &BlueprintIdArgs, +) -> Result<(), anyhow::Error> { + let _ = client + .blueprint_delete(&args.blueprint_id) + .await + .with_context(|| format!("deleting blueprint {}", args.blueprint_id))?; + println!("blueprint {} deleted", args.blueprint_id); + Ok(()) +} + +async fn cmd_nexus_blueprints_target_show( + client: &nexus_client::Client, +) -> Result<(), anyhow::Error> { + let target = client + .blueprint_target_view() + .await + .context("fetching target blueprint")?; + println!("target blueprint: {}", target.target_id); + println!("set at: {}", target.time_set); + println!("enabled: {}", target.enabled); + Ok(()) +} + +async fn cmd_nexus_blueprints_target_set( + client: &nexus_client::Client, + args: &BlueprintIdArgs, +) -> Result<(), anyhow::Error> { + // Try to preserve the value of "enabled", if possible. + let enabled = client + .blueprint_target_view() + .await + .map(|current| current.into_inner().enabled) + .unwrap_or(true); + client + .blueprint_target_set(&nexus_client::types::BlueprintTargetSet { + target_id: args.blueprint_id, + enabled, + }) + .await + .with_context(|| { + format!("setting target to blueprint {}", args.blueprint_id) + })?; + eprintln!("set target blueprint to {}", args.blueprint_id); + Ok(()) +} + +async fn cmd_nexus_blueprints_generate_from_collection( + client: &nexus_client::Client, + args: &CollectionIdArgs, +) -> Result<(), anyhow::Error> { + let blueprint = client + .blueprint_generate_from_collection( + &nexus_client::types::CollectionId { + collection_id: args.collection_id, + }, + ) + .await + .context("creating blueprint from collection id")?; + eprintln!("created blueprint {} from collection id", blueprint.id); + Ok(()) +} + +async fn cmd_nexus_blueprints_regenerate( + client: &nexus_client::Client, +) -> Result<(), anyhow::Error> { + let blueprint = + client.blueprint_regenerate().await.context("generating blueprint")?; + eprintln!("generated new blueprint {}", blueprint.id); + Ok(()) +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 3c5f099c61..2790b0ef83 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -289,6 +289,7 @@ Usage: omdb nexus [OPTIONS] Commands: background-tasks print information about background tasks + blueprints print information about blueprints help Print this message or the help of the given subcommand(s) Options: diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 92f37f6124..bf1d9b763b 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -83,7 +83,7 @@ pub enum ZoneVariant { /// Used to construct the DNS name for a control plane host #[derive(Clone, Debug, PartialEq, PartialOrd)] -enum Host { +pub enum Host { /// Used to construct an AAAA record for a sled. Sled(Uuid), @@ -92,6 +92,10 @@ enum Host { } impl Host { + pub fn for_zone(id: Uuid, variant: ZoneVariant) -> Host { + Host::Zone { id, variant } + } + /// Returns the DNS name for this host, ignoring the zone part of the DNS /// name pub(crate) fn dns_name(&self) -> String { @@ -105,6 +109,12 @@ impl Host { } } } + + /// Returns the full-qualified DNS name, including the zone name of the + /// control plane's internal DNS zone + pub fn fqdn(&self) -> String { + format!("{}.{}", self.dns_name(), DNS_ZONE) + } } /// Builder for assembling DNS data for the control plane's DNS zone @@ -168,8 +178,12 @@ pub struct Zone { } impl Zone { + pub(crate) fn to_host(&self) -> Host { + Host::Zone { id: self.id, variant: self.variant } + } + pub(crate) fn dns_name(&self) -> String { - Host::Zone { id: self.id, variant: self.variant }.dns_name() + self.to_host().dns_name() } } @@ -393,7 +407,7 @@ impl DnsConfigBuilder { prio: 0, weight: 0, port, - target: format!("{}.{}", zone.dns_name(), DNS_ZONE), + target: zone.to_host().fqdn(), }) }) .collect(); @@ -412,11 +426,7 @@ impl DnsConfigBuilder { prio: 0, weight: 0, port, - target: format!( - "{}.{}", - Host::Sled(sled.0).dns_name(), - DNS_ZONE - ), + target: Host::Sled(sled.0).fqdn(), }) }) .collect(); diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index c50f482be4..3feb333ee3 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -30,6 +30,7 @@ headers.workspace = true hex.workspace = true http.workspace = true hyper.workspace = true +illumos-utils.workspace = true internal-dns.workspace = true ipnetwork.workspace = true macaddr.workspace = true @@ -63,6 +64,7 @@ sled-agent-client.workspace = true slog.workspace = true slog-async.workspace = true slog-dtrace.workspace = true +slog-error-chain.workspace = true slog-term.workspace = true steno.workspace = true tempfile.workspace = true @@ -76,6 +78,7 @@ uuid.workspace = true nexus-defaults.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true +nexus-deployment.workspace = true nexus-inventory.workspace = true nexus-types.workspace = true omicron-common.workspace = true diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 6b3f8d5110..e95185658f 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -100,7 +100,9 @@ pub struct FloatingIp { pub project_id: Uuid, } -impl From for sled_agent_client::types::SourceNatConfig { +impl From + for omicron_common::api::internal::shared::SourceNatConfig +{ fn from(eip: ExternalIp) -> Self { Self { ip: eip.ip.ip(), diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 4e3e5fad56..17d74be0aa 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -942,7 +942,7 @@ impl InvOmicronZone { let nic = match (self.nic_id, nic_row) { (Some(expected_id), Some(nic_row)) => { ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); - Ok(nic_row.into_network_interface_for_zone(self.id)) + Ok(nic_row.into_network_interface_for_zone(self.id)?) } (None, None) => Err(anyhow!( "expected zone to have an associated NIC, but it doesn't" @@ -1125,13 +1125,9 @@ impl InvOmicronZoneNic { id: nic.id, name: Name::from(nic.name.clone()), ip: IpNetwork::from(nic.ip), - mac: MacAddr::from( - omicron_common::api::external::MacAddr::from( - nic.mac.clone(), - ), - ), + mac: MacAddr::from(nic.mac), subnet: IpNetwork::from(nic.subnet.clone()), - vni: SqlU32::from(nic.vni.0), + vni: SqlU32::from(u32::from(nic.vni)), is_primary: nic.primary, slot: SqlU8::from(nic.slot), })) @@ -1143,19 +1139,20 @@ impl InvOmicronZoneNic { pub fn into_network_interface_for_zone( self, zone_id: Uuid, - ) -> nexus_types::inventory::NetworkInterface { - nexus_types::inventory::NetworkInterface { + ) -> Result { + Ok(nexus_types::inventory::NetworkInterface { id: self.id, ip: self.ip.ip(), kind: nexus_types::inventory::NetworkInterfaceKind::Service( zone_id, ), - mac: (*self.mac).into(), + mac: *self.mac, name: self.name.into(), primary: self.is_primary, slot: *self.slot, - vni: nexus_types::inventory::Vni::from(*self.vni), + vni: omicron_common::api::external::Vni::try_from(*self.vni) + .context("parsing VNI")?, subnet: self.subnet.into(), - } + }) } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index ed819cba80..68991f1d75 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -938,6 +938,11 @@ table! { } } +allow_tables_to_appear_in_same_query! { + zpool, + physical_disk +} + table! { dataset (id) { id -> Uuid, diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 8485b8f11f..444a00d5ad 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -250,6 +250,57 @@ impl ApiResourceWithRolesType for Fleet { // TODO: refactor synthetic resources below +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct BlueprintConfig; + +pub const BLUEPRINT_CONFIG: BlueprintConfig = BlueprintConfig; + +impl oso::PolarClass for BlueprintConfig { + fn get_polar_class_builder() -> oso::ClassBuilder { + oso::Class::builder() + .with_equality_check() + .add_attribute_getter("fleet", |_: &BlueprintConfig| FLEET) + } +} + +impl AuthorizedResource for BlueprintConfig { + fn load_roles<'a, 'b, 'c, 'd, 'e, 'f>( + &'a self, + opctx: &'b OpContext, + datastore: &'c DataStore, + authn: &'d authn::Context, + roleset: &'e mut RoleSet, + ) -> futures::future::BoxFuture<'f, Result<(), Error>> + where + 'a: 'f, + 'b: 'f, + 'c: 'f, + 'd: 'f, + 'e: 'f, + { + // There are no roles on the BlueprintConfig, only permissions. But we + // still need to load the Fleet-related roles to verify that the actor + // has the "admin" role on the Fleet (possibly conferred from a Silo + // role). + load_roles_for_resource_tree(&FLEET, opctx, datastore, authn, roleset) + .boxed() + } + + fn on_unauthorized( + &self, + _: &Authz, + error: Error, + _: AnyActor, + _: Action, + ) -> Error { + error + } + + fn polar_class(&self) -> oso::Class { + Self::get_polar_class() + } +} + /// ConsoleSessionList is a synthetic resource used for modeling who has access /// to create sessions. #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -842,6 +893,14 @@ authz_resource! { // Miscellaneous resources nested directly below "Fleet" +authz_resource! { + name = "Blueprint", + parent = "Fleet", + primary_key = Uuid, + roles_allowed = false, + polar_snippet = FleetChild, +} + authz_resource! { name = "ConsoleSession", parent = "Fleet", diff --git a/nexus/db-queries/src/authz/omicron.polar b/nexus/db-queries/src/authz/omicron.polar index 87fdf72f6a..f9382401fd 100644 --- a/nexus/db-queries/src/authz/omicron.polar +++ b/nexus/db-queries/src/authz/omicron.polar @@ -365,6 +365,24 @@ resource DnsConfig { has_relation(fleet: Fleet, "parent_fleet", dns_config: DnsConfig) if dns_config.fleet = fleet; +# Describes the policy for accessing blueprints +resource BlueprintConfig { + permissions = [ + "list_children", # list blueprints + "create_child", # create blueprint + "read", # read the current target + "modify", # change the current target + ]; + + relations = { parent_fleet: Fleet }; + "create_child" if "admin" on "parent_fleet"; + "modify" if "admin" on "parent_fleet"; + "list_children" if "viewer" on "parent_fleet"; + "read" if "viewer" on "parent_fleet"; +} +has_relation(fleet: Fleet, "parent_fleet", list: BlueprintConfig) + if list.fleet = fleet; + # Describes the policy for reading and modifying low-level inventory resource Inventory { permissions = [ "read", "modify" ]; diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/db-queries/src/authz/oso_generic.rs index 6098379287..9b842216b4 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/db-queries/src/authz/oso_generic.rs @@ -103,6 +103,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { Action::get_polar_class(), AnyActor::get_polar_class(), AuthenticatedActor::get_polar_class(), + BlueprintConfig::get_polar_class(), Database::get_polar_class(), DnsConfig::get_polar_class(), Fleet::get_polar_class(), @@ -137,6 +138,7 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { SiloImage::init(), // Fleet-level resources AddressLot::init(), + Blueprint::init(), LoopbackAddress::init(), Certificate::init(), ConsoleSession::init(), diff --git a/nexus/db-queries/src/authz/policy_test/resource_builder.rs b/nexus/db-queries/src/authz/policy_test/resource_builder.rs index f10c969038..dc18b2e47f 100644 --- a/nexus/db-queries/src/authz/policy_test/resource_builder.rs +++ b/nexus/db-queries/src/authz/policy_test/resource_builder.rs @@ -243,6 +243,7 @@ macro_rules! impl_dyn_authorized_resource_for_global { } impl_dyn_authorized_resource_for_global!(authz::oso_generic::Database); +impl_dyn_authorized_resource_for_global!(authz::BlueprintConfig); impl_dyn_authorized_resource_for_global!(authz::ConsoleSessionList); impl_dyn_authorized_resource_for_global!(authz::DeviceAuthRequestList); impl_dyn_authorized_resource_for_global!(authz::DnsConfig); diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index 8bdd97923b..9cc4e28790 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -64,6 +64,7 @@ pub async fn make_resources( // Global resources builder.new_resource(authz::DATABASE); builder.new_resource_with_users(authz::FLEET).await; + builder.new_resource(authz::BLUEPRINT_CONFIG); builder.new_resource(authz::CONSOLE_SESSION_LIST); builder.new_resource(authz::DNS_CONFIG); builder.new_resource(authz::DEVICE_AUTH_REQUEST_LIST); @@ -118,6 +119,13 @@ pub async fn make_resources( LookupType::ByName(device_access_token), )); + let blueprint_id = "b9e923f6-caf3-4c83-96f9-8ffe8c627dd2".parse().unwrap(); + builder.new_resource(authz::Blueprint::new( + authz::FLEET, + blueprint_id, + LookupType::ById(blueprint_id), + )); + let system_update_id = "9c86d713-1bc2-4927-9892-ada3eb6f5f62".parse().unwrap(); builder.new_resource(authz::SystemUpdate::new( diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index b7ff058234..bdacb0e7b9 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1231,7 +1231,7 @@ impl DataStore { /// Attempt to read the given collection while limiting queries to `limit` /// records and returning nothing if `limit` is not large enough. - async fn inventory_collection_read_all_or_nothing( + pub async fn inventory_collection_read_all_or_nothing( &self, opctx: &OpContext, id: Uuid, diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index be12ea5231..d715bf3889 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -78,9 +78,9 @@ impl From for sled_client_types::NetworkInterface { kind, name: nic.name.into(), ip: nic.ip.ip(), - mac: sled_client_types::MacAddr::from(nic.mac.0), + mac: nic.mac.0, subnet: sled_client_types::IpNet::from(ip_subnet), - vni: sled_client_types::Vni::from(nic.vni.0), + vni: nic.vni.0, primary: nic.primary, slot: u8::try_from(nic.slot).unwrap(), } diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index 5d6c0844ef..79e5f5a55a 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -5,21 +5,29 @@ //! [`DataStore`] methods on [`Zpool`]s. use super::DataStore; +use crate::authz; use crate::db; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; +use crate::db::datastore::OpContext; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Asset; use crate::db::model::Sled; use crate::db::model::Zpool; +use crate::db::pagination::paginated; +use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use diesel::upsert::excluded; +use nexus_db_model::PhysicalDiskKind; use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use uuid::Uuid; impl DataStore { /// Stores a new zpool in the database. @@ -57,4 +65,29 @@ impl DataStore { ), }) } + + /// Paginates through all zpools on U.2 disks in all sleds + pub async fn zpool_list_all_external( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + + use db::schema::physical_disk::dsl as dsl_physical_disk; + use db::schema::zpool::dsl as dsl_zpool; + paginated(dsl_zpool::zpool, dsl_zpool::id, pagparams) + .filter(dsl_zpool::time_deleted.is_null()) + .inner_join( + db::schema::physical_disk::table.on( + dsl_zpool::physical_disk_id.eq(dsl_physical_disk::id).and( + dsl_physical_disk::variant.eq(PhysicalDiskKind::U2), + ), + ), + ) + .select(Zpool::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 54fb6481a9..26cc13fc6a 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -26,6 +26,20 @@ resource: Fleet id "001de000-1334-4000-8000-000000000000" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: authz::BlueprintConfig + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: authz::ConsoleSessionList USER Q R LC RP M MP CC D @@ -922,6 +936,20 @@ resource: DeviceAccessToken "a-device-access-token" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! +resource: Blueprint id "b9e923f6-caf3-4c83-96f9-8ffe8c627dd2" + + USER Q R LC RP M MP CC D + fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ + fleet-collaborator ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + fleet-viewer ✘ ✔ ✔ ✔ ✘ ✘ ✘ ✘ + silo1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-admin ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-collaborator ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ + unauthenticated ! ! ! ! ! ! ! ! + resource: SystemUpdate id "9c86d713-1bc2-4927-9892-ada3eb6f5f62" USER Q R LC RP M MP CC D diff --git a/nexus/deployment/Cargo.toml b/nexus/deployment/Cargo.toml new file mode 100644 index 0000000000..b166f947bf --- /dev/null +++ b/nexus/deployment/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "nexus-deployment" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow.workspace = true +chrono.workspace = true +internal-dns.workspace = true +ipnet.workspace = true +ipnetwork.workspace = true +nexus-types.workspace = true +omicron-common.workspace = true +slog.workspace = true +thiserror.workspace = true +uuid.workspace = true + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +nexus-inventory.workspace = true +omicron-test-utils.workspace = true +sled-agent-client.workspace = true diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs new file mode 100644 index 0000000000..689e2d8e2c --- /dev/null +++ b/nexus/deployment/src/blueprint_builder.rs @@ -0,0 +1,683 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Low-level facility for generating Blueprints + +use crate::ip_allocator::IpAllocator; +use anyhow::anyhow; +use internal_dns::config::Host; +use internal_dns::config::ZoneVariant; +use ipnet::IpAdd; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::OmicronZoneConfig; +use nexus_types::deployment::OmicronZoneDataset; +use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::OmicronZonesConfig; +use nexus_types::deployment::Policy; +use nexus_types::deployment::SledResources; +use nexus_types::deployment::ZpoolName; +use nexus_types::inventory::Collection; +use omicron_common::address::get_internal_dns_server_addresses; +use omicron_common::address::get_sled_address; +use omicron_common::address::get_switch_zone_address; +use omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES; +use omicron_common::address::NTP_PORT; +use omicron_common::address::SLED_RESERVED_ADDRESSES; +use omicron_common::api::external::Generation; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::net::Ipv6Addr; +use std::net::SocketAddrV6; +use thiserror::Error; +use uuid::Uuid; + +/// Errors encountered while assembling blueprints +#[derive(Debug, Error)] +pub enum Error { + #[error("sled {sled_id}: ran out of available addresses for sled")] + OutOfAddresses { sled_id: Uuid }, + #[error("programming error in planner")] + Planner(#[from] anyhow::Error), +} + +/// Describes whether an idempotent "ensure" operation resulted in action taken +/// or no action was necessary +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum Ensure { + /// action was taken + Added, + /// no action was necessary + NotNeeded, +} + +/// Helper for assembling a blueprint +/// +/// There are two basic ways to assemble a new blueprint: +/// +/// 1. Build one directly from a collection. Such blueprints have no parent +/// blueprint. They are not customizable. Use +/// [`BlueprintBuilder::build_initial_from_collection`] for this. This would +/// generally only be used once in the lifetime of a rack, to assemble the +/// first blueprint. +/// +/// 2. Build one _from_ another blueprint, called the "parent", making changes +/// as desired. Use [`BlueprintBuilder::new_based_on`] for this. Once the +/// new blueprint is created, there is no dependency on the parent one. +/// However, the new blueprint can only be made the system's target if its +/// parent is the current target. +pub struct BlueprintBuilder<'a> { + /// previous blueprint, on which this one will be based + parent_blueprint: &'a Blueprint, + + // These fields are used to allocate resources from sleds. + policy: &'a Policy, + sled_ip_allocators: BTreeMap, + + // These fields will become part of the final blueprint. See the + // corresponding fields in `Blueprint`. + omicron_zones: BTreeMap, + zones_in_service: BTreeSet, + creator: String, + comments: Vec, +} + +impl<'a> BlueprintBuilder<'a> { + /// Directly construct a `Blueprint` from the contents of a particular + /// collection (representing no changes from the collection state) + pub fn build_initial_from_collection( + collection: &'a Collection, + policy: &'a Policy, + creator: &str, + ) -> Result { + let omicron_zones = policy + .sleds + .keys() + .map(|sled_id| { + let zones = collection + .omicron_zones + .get(sled_id) + .map(|z| z.zones.clone()) + .ok_or_else(|| { + // We should not find a sled that's supposed to be + // in-service but is not part of the inventory. It's + // not that that can't ever happen. This could happen + // when a sled is first being added to the system. Of + // course it could also happen if this sled agent failed + // our inventory request. But this is the initial + // blueprint (so this shouldn't be the "add sled" case) + // and we want to get it right (so we don't want to + // leave out sleds whose sled agent happened to be down + // when we tried to do this). The operator (or, more + // likely, a support person) will have to sort out + // what's going on if this happens. + Error::Planner(anyhow!( + "building initial blueprint: sled {:?} is \ + supposed to be in service but has no zones \ + in inventory", + sled_id + )) + })?; + Ok((*sled_id, zones)) + }) + .collect::>()?; + let zones_in_service = + collection.all_omicron_zones().map(|z| z.id).collect(); + Ok(Blueprint { + id: Uuid::new_v4(), + omicron_zones: omicron_zones, + zones_in_service, + parent_blueprint_id: None, + time_created: chrono::Utc::now(), + creator: creator.to_owned(), + comment: format!("from collection {}", collection.id), + }) + } + + /// Construct a new `BlueprintBuilder` based on a previous blueprint, + /// starting with no changes from that state + pub fn new_based_on( + parent_blueprint: &'a Blueprint, + policy: &'a Policy, + creator: &str, + ) -> BlueprintBuilder<'a> { + BlueprintBuilder { + parent_blueprint, + policy, + sled_ip_allocators: BTreeMap::new(), + omicron_zones: BTreeMap::new(), + zones_in_service: parent_blueprint.zones_in_service.clone(), + creator: creator.to_owned(), + comments: Vec::new(), + } + } + + /// Assemble a final [`Blueprint`] based on the contents of the builder + pub fn build(mut self) -> Blueprint { + // Collect the Omicron zones config for each in-service sled. + let omicron_zones = self + .policy + .sleds + .keys() + .map(|sled_id| { + // Start with self.omicron_zones, which contains entries for any + // sled whose zones config is changing in this blueprint. + let zones = self + .omicron_zones + .remove(sled_id) + // If it's not there, use the config from the parent + // blueprint. + .or_else(|| { + self.parent_blueprint + .omicron_zones + .get(sled_id) + .cloned() + }) + // If it's not there either, then this must be a new sled + // and we haven't added any zones to it yet. Use the + // standard initial config. + .unwrap_or_else(|| OmicronZonesConfig { + generation: Generation::new(), + zones: vec![], + }); + (*sled_id, zones) + }) + .collect(); + Blueprint { + id: Uuid::new_v4(), + omicron_zones: omicron_zones, + zones_in_service: self.zones_in_service, + parent_blueprint_id: Some(self.parent_blueprint.id), + time_created: chrono::Utc::now(), + creator: self.creator, + comment: self.comments.join(", "), + } + } + + /// Sets the blueprints "comment" + /// + /// This is a short human-readable string summarizing the changes reflected + /// in the blueprint. This is only intended for debugging. + pub fn comment(&mut self, comment: S) + where + String: From, + { + self.comments.push(String::from(comment)); + } + + pub fn sled_ensure_zone_ntp( + &mut self, + sled_id: Uuid, + ) -> Result { + // If there's already an NTP zone on this sled, do nothing. + let has_ntp = self + .parent_blueprint + .omicron_zones + .get(&sled_id) + .map(|found_zones| { + found_zones.zones.iter().any(|z| z.zone_type.is_ntp()) + }) + .unwrap_or(false); + if has_ntp { + return Ok(Ensure::NotNeeded); + } + + let sled_info = self.sled_resources(sled_id)?; + let sled_subnet = sled_info.subnet; + let ip = self.sled_alloc_ip(sled_id)?; + let ntp_address = SocketAddrV6::new(ip, NTP_PORT, 0, 0); + + // Construct the list of internal DNS servers. + // + // It'd be tempting to get this list from the other internal NTP + // servers but there may not be any of those. We could also + // construct this list manually from the set of internal DNS servers + // actually deployed. Instead, we take the same approach as RSS: + // these are at known, fixed addresses relative to the AZ subnet + // (which itself is a known-prefix parent subnet of the sled subnet). + let dns_servers = + get_internal_dns_server_addresses(sled_subnet.net().network()); + + // The list of boundary NTP servers is not necessarily stored + // anywhere (unless there happens to be another internal NTP zone + // lying around). Recompute it based on what boundary servers + // currently exist. + let ntp_servers = self + .parent_blueprint + .all_omicron_zones() + .filter_map(|(_, z)| { + if matches!(z.zone_type, OmicronZoneType::BoundaryNtp { .. }) { + Some(Host::for_zone(z.id, ZoneVariant::Other).fqdn()) + } else { + None + } + }) + .collect(); + + let zone = OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: ip, + zone_type: OmicronZoneType::InternalNtp { + address: ntp_address.to_string(), + ntp_servers, + dns_servers, + domain: None, + }, + }; + + self.sled_add_zone(sled_id, zone)?; + Ok(Ensure::Added) + } + + pub fn sled_ensure_zone_crucible( + &mut self, + sled_id: Uuid, + pool_name: ZpoolName, + ) -> Result { + // If this sled already has a Crucible zone on this pool, do nothing. + let has_crucible_on_this_pool = self + .parent_blueprint + .omicron_zones + .get(&sled_id) + .map(|found_zones| { + found_zones.zones.iter().any(|z| { + matches!( + &z.zone_type, + OmicronZoneType::Crucible { dataset, .. } + if dataset.pool_name == pool_name + ) + }) + }) + .unwrap_or(false); + if has_crucible_on_this_pool { + return Ok(Ensure::NotNeeded); + } + + let sled_info = self.sled_resources(sled_id)?; + if !sled_info.zpools.contains(&pool_name) { + return Err(Error::Planner(anyhow!( + "adding crucible zone for sled {:?}: \ + attempted to use unknown zpool {:?}", + sled_id, + pool_name + ))); + } + + let ip = self.sled_alloc_ip(sled_id)?; + let port = omicron_common::address::CRUCIBLE_PORT; + let address = SocketAddrV6::new(ip, port, 0, 0).to_string(); + let zone = OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: ip, + zone_type: OmicronZoneType::Crucible { + address, + dataset: OmicronZoneDataset { pool_name }, + }, + }; + self.sled_add_zone(sled_id, zone)?; + Ok(Ensure::Added) + } + + fn sled_add_zone( + &mut self, + sled_id: Uuid, + zone: OmicronZoneConfig, + ) -> Result<(), Error> { + // Check the sled id and return an appropriate error if it's invalid. + let _ = self.sled_resources(sled_id)?; + + if !self.zones_in_service.insert(zone.id) { + return Err(Error::Planner(anyhow!( + "attempted to add zone that already exists: {}", + zone.id + ))); + } + + let sled_zones = + self.omicron_zones.entry(sled_id).or_insert_with(|| { + if let Some(old_sled_zones) = + self.parent_blueprint.omicron_zones.get(&sled_id) + { + OmicronZonesConfig { + generation: old_sled_zones.generation.next(), + zones: old_sled_zones.zones.clone(), + } + } else { + // The first generation is reserved to mean the one + // containing no zones. See + // OMICRON_ZONES_CONFIG_INITIAL_GENERATION. So we start + // with the next one. + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![], + } + } + }); + + sled_zones.zones.push(zone); + Ok(()) + } + + /// Returns a newly-allocated underlay address suitable for use by Omicron + /// zones + fn sled_alloc_ip(&mut self, sled_id: Uuid) -> Result { + let sled_subnet = self.sled_resources(sled_id)?.subnet; + let allocator = + self.sled_ip_allocators.entry(sled_id).or_insert_with(|| { + let sled_subnet_addr = sled_subnet.net().network(); + let minimum = sled_subnet_addr + .saturating_add(u128::from(SLED_RESERVED_ADDRESSES)); + let maximum = sled_subnet_addr + .saturating_add(u128::from(CP_SERVICES_RESERVED_ADDRESSES)); + assert!(sled_subnet.net().contains(minimum)); + assert!(sled_subnet.net().contains(maximum)); + let mut allocator = IpAllocator::new(minimum, maximum); + + // We shouldn't need to explicitly reserve the sled's global + // zone and switch addresses because they should be out of our + // range, but we do so just to be sure. + let sled_gz_addr = *get_sled_address(sled_subnet).ip(); + assert!(sled_subnet.net().contains(sled_gz_addr)); + assert!(minimum > sled_gz_addr); + assert!(maximum > sled_gz_addr); + let switch_zone_addr = get_switch_zone_address(sled_subnet); + assert!(sled_subnet.net().contains(switch_zone_addr)); + assert!(minimum > switch_zone_addr); + assert!(maximum > switch_zone_addr); + + // Record each of the sled's zones' underlay addresses as + // allocated. + if let Some(sled_zones) = self.omicron_zones.get(&sled_id) { + for z in &sled_zones.zones { + allocator.reserve(z.underlay_address); + } + } + + allocator + }); + + allocator.alloc().ok_or_else(|| Error::OutOfAddresses { sled_id }) + } + + fn sled_resources(&self, sled_id: Uuid) -> Result<&SledResources, Error> { + self.policy.sleds.get(&sled_id).ok_or_else(|| { + Error::Planner(anyhow!( + "attempted to use sled that is not in service: {}", + sled_id + )) + }) + } +} + +#[cfg(test)] +pub mod test { + use super::BlueprintBuilder; + use ipnet::IpAdd; + use nexus_types::deployment::Policy; + use nexus_types::deployment::SledResources; + use nexus_types::deployment::ZpoolName; + use nexus_types::inventory::Collection; + use omicron_common::address::Ipv6Subnet; + use omicron_common::address::SLED_PREFIX; + use omicron_common::api::external::ByteCount; + use omicron_common::api::external::Generation; + use sled_agent_client::types::{ + Baseboard, Inventory, OmicronZoneConfig, OmicronZoneDataset, + OmicronZoneType, OmicronZonesConfig, SledRole, + }; + use std::collections::BTreeMap; + use std::collections::BTreeSet; + use std::net::Ipv6Addr; + use std::net::SocketAddrV6; + use std::str::FromStr; + use uuid::Uuid; + + /// Returns a collection and policy describing a pretty simple system + pub fn example() -> (Collection, Policy) { + let mut builder = nexus_inventory::CollectionBuilder::new("test-suite"); + + let sled_ids = [ + "72443b6c-b8bb-4ffa-ab3a-aeaa428ed79b", + "a5f3db3a-61aa-4f90-ad3e-02833c253bf5", + "0d168386-2551-44e8-98dd-ae7a7570f8a0", + ]; + let mut policy = Policy { sleds: BTreeMap::new() }; + for sled_id_str in sled_ids.iter() { + let sled_id: Uuid = sled_id_str.parse().unwrap(); + let sled_ip = policy_add_sled(&mut policy, sled_id); + let serial_number = format!("s{}", policy.sleds.len()); + builder + .found_sled_inventory( + "test-suite", + Inventory { + baseboard: Baseboard::Gimlet { + identifier: serial_number, + model: String::from("model1"), + revision: 0, + }, + reservoir_size: ByteCount::from(1024), + sled_role: SledRole::Gimlet, + sled_agent_address: SocketAddrV6::new( + sled_ip, 12345, 0, 0, + ) + .to_string(), + sled_id, + usable_hardware_threads: 10, + usable_physical_ram: ByteCount::from(1024 * 1024), + }, + ) + .unwrap(); + + let zpools = &policy.sleds.get(&sled_id).unwrap().zpools; + let ip1 = sled_ip.saturating_add(1); + let zones: Vec<_> = std::iter::once(OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: sled_ip.saturating_add(1), + zone_type: OmicronZoneType::InternalNtp { + address: SocketAddrV6::new(ip1, 12345, 0, 0).to_string(), + dns_servers: vec![], + domain: None, + ntp_servers: vec![], + }, + }) + .chain(zpools.iter().enumerate().map(|(i, zpool_name)| { + let ip = sled_ip.saturating_add(u128::try_from(i + 2).unwrap()); + OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: ip, + zone_type: OmicronZoneType::Crucible { + address: String::from("[::1]:12345"), + dataset: OmicronZoneDataset { + pool_name: zpool_name.clone(), + }, + }, + } + })) + .collect(); + + builder + .found_sled_omicron_zones( + "test-suite", + sled_id, + OmicronZonesConfig { + generation: Generation::new().next(), + zones, + }, + ) + .unwrap(); + } + + let collection = builder.build(); + + (collection, policy) + } + + pub fn policy_add_sled(policy: &mut Policy, sled_id: Uuid) -> Ipv6Addr { + let i = policy.sleds.len() + 1; + let sled_ip: Ipv6Addr = + format!("fd00:1122:3344:{}::1", i + 1).parse().unwrap(); + + let zpools: BTreeSet = [ + "oxp_be776cf5-4cba-4b7d-8109-3dfd020f22ee", + "oxp_aee23a17-b2ce-43f2-9302-c738d92cca28", + "oxp_f7940a6b-c865-41cf-ad61-1b831d594286", + ] + .iter() + .map(|name_str| { + ZpoolName::from_str(name_str).expect("not a valid zpool name") + }) + .collect(); + + let subnet = Ipv6Subnet::::new(sled_ip); + policy.sleds.insert(sled_id, SledResources { zpools, subnet }); + sled_ip + } + + #[test] + fn test_initial() { + // Test creating a blueprint from a collection and verifying that it + // describes no changes. + let (collection, policy) = example(); + let blueprint_initial = + BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "the_test", + ) + .expect("failed to create initial blueprint"); + + // Since collections don't include what was in service, we have to + // provide that ourselves. For our purposes though we don't care. + let zones_in_service = blueprint_initial.zones_in_service.clone(); + let diff = blueprint_initial + .diff_from_collection(&collection, &zones_in_service); + println!( + "collection -> initial blueprint (expected no changes):\n{}", + diff + ); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); + + // Test a no-op blueprint. + let builder = BlueprintBuilder::new_based_on( + &blueprint_initial, + &policy, + "test_basic", + ); + let blueprint = builder.build(); + let diff = blueprint_initial.diff(&blueprint); + println!( + "initial blueprint -> next blueprint (expected no changes):\n{}", + diff + ); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); + } + + #[test] + fn test_basic() { + let (collection, mut policy) = example(); + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "the_test", + ) + .expect("failed to create initial blueprint"); + + let mut builder = + BlueprintBuilder::new_based_on(&blueprint1, &policy, "test_basic"); + + // The initial blueprint should have internal NTP zones on all the + // existing sleds, plus Crucible zones on all pools. So if we ensure + // all these zones exist, we should see no change. + for (sled_id, sled_resources) in &policy.sleds { + builder.sled_ensure_zone_ntp(*sled_id).unwrap(); + for pool_name in &sled_resources.zpools { + builder + .sled_ensure_zone_crucible(*sled_id, pool_name.clone()) + .unwrap(); + } + } + + let blueprint2 = builder.build(); + let diff = blueprint1.diff(&blueprint2); + println!( + "initial blueprint -> next blueprint (expected no changes):\n{}", + diff + ); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); + + // The next step is adding these zones to a new sled. + let new_sled_id = Uuid::new_v4(); + let _ = policy_add_sled(&mut policy, new_sled_id); + let mut builder = + BlueprintBuilder::new_based_on(&blueprint2, &policy, "test_basic"); + builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); + let new_sled_resources = policy.sleds.get(&new_sled_id).unwrap(); + for pool_name in &new_sled_resources.zpools { + builder + .sled_ensure_zone_crucible(new_sled_id, pool_name.clone()) + .unwrap(); + } + + let blueprint3 = builder.build(); + let diff = blueprint2.diff(&blueprint3); + println!("expecting new NTP and Crucible zones:\n{}", diff); + + // No sleds were changed or removed. + assert_eq!(diff.sleds_changed().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + + // One sled was added. + let sleds: Vec<_> = diff.sleds_added().collect(); + assert_eq!(sleds.len(), 1); + let (sled_id, new_sled_zones) = sleds[0]; + assert_eq!(sled_id, new_sled_id); + // The generation number should be newer than the initial default. + assert!(new_sled_zones.generation > Generation::new()); + + // All zones' underlay addresses ought to be on the sled's subnet. + for z in &new_sled_zones.zones { + assert!(new_sled_resources + .subnet + .net() + .contains(z.underlay_address)); + } + + // Check for an NTP zone. Its sockaddr's IP should also be on the + // sled's subnet. + assert!(new_sled_zones.zones.iter().any(|z| { + if let OmicronZoneType::InternalNtp { address, .. } = &z.zone_type { + let sockaddr = address.parse::().unwrap(); + assert!(new_sled_resources + .subnet + .net() + .contains(*sockaddr.ip())); + true + } else { + false + } + })); + let crucible_pool_names = new_sled_zones + .zones + .iter() + .filter_map(|z| { + if let OmicronZoneType::Crucible { address, dataset } = + &z.zone_type + { + let sockaddr = address.parse::().unwrap(); + let ip = sockaddr.ip(); + assert!(new_sled_resources.subnet.net().contains(*ip)); + Some(dataset.pool_name.clone()) + } else { + None + } + }) + .collect::>(); + assert_eq!(crucible_pool_names, new_sled_resources.zpools); + } +} diff --git a/nexus/deployment/src/ip_allocator.rs b/nexus/deployment/src/ip_allocator.rs new file mode 100644 index 0000000000..a32fe936af --- /dev/null +++ b/nexus/deployment/src/ip_allocator.rs @@ -0,0 +1,120 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Very simple allocator for picking addresses from a sled's subnet + +use ipnet::IpAdd; +use std::net::Ipv6Addr; + +/// Very simple allocator for picking addresses from a sled's subnet +/// +/// The current implementation takes the max address seen so far and uses the +/// next one. This will never reuse old IPs. That avoids a bunch of +/// operational issues. It does mean we will eventually run out of IPs. But we +/// do have a big space right now (2^16). +// This overlaps with the bump allocator that's used in RSS. That one is not +// general enough to use here, though this one could potentially be used there. +pub struct IpAllocator { + last: Ipv6Addr, + maximum: Ipv6Addr, +} + +impl IpAllocator { + /// Make an allocator that allocates addresses from the range `(minimum, + /// maximum)` (exclusive). + pub fn new(minimum: Ipv6Addr, maximum: Ipv6Addr) -> IpAllocator { + IpAllocator { last: minimum, maximum } + } + + /// Mark the given address reserved so that it will never be returned by + /// `alloc()`. + /// + /// The given address can be outside the range provided to + /// `IpAllocator::new()`, in which case this reservation will be ignored. + pub fn reserve(&mut self, addr: Ipv6Addr) { + if addr < self.maximum && addr > self.last { + self.last = addr; + } + } + + /// Allocate an unused address from this allocator's range + pub fn alloc(&mut self) -> Option { + let next = self.last.saturating_add(1); + if next == self.last { + // We ran out of the entire IPv6 address space. + return None; + } + + if next >= self.maximum { + // We ran out of our allotted range. + return None; + } + + self.last = next; + Some(next) + } +} + +#[cfg(test)] +mod test { + use super::IpAllocator; + use std::collections::BTreeSet; + use std::net::Ipv6Addr; + + #[test] + fn test_basic() { + let range_start: Ipv6Addr = "fd00::d0".parse().unwrap(); + let range_end: Ipv6Addr = "fd00::e8".parse().unwrap(); + let reserved: BTreeSet = [ + // These first two are deliberately out of range. + "fd00::ff".parse().unwrap(), + "fd00::c0".parse().unwrap(), + "fd00::d3".parse().unwrap(), + "fd00::d7".parse().unwrap(), + ] + .iter() + .copied() + .collect(); + + let mut allocator = IpAllocator::new(range_start, range_end); + for r in &reserved { + allocator.reserve(*r); + } + + let mut allocated = BTreeSet::new(); + while let Some(addr) = allocator.alloc() { + println!("allocated: {}", addr); + assert!(!reserved.contains(&addr)); + assert!(!allocated.contains(&addr)); + allocated.insert(addr); + } + + assert_eq!( + allocated, + [ + // Because d7 was reserved, everything up to it is also skipped. + // It doesn't have to work that way, but it currently does. + "fd00::d8".parse().unwrap(), + "fd00::d9".parse().unwrap(), + "fd00::da".parse().unwrap(), + "fd00::db".parse().unwrap(), + "fd00::dc".parse().unwrap(), + "fd00::dd".parse().unwrap(), + "fd00::de".parse().unwrap(), + "fd00::df".parse().unwrap(), + "fd00::e0".parse().unwrap(), + "fd00::e1".parse().unwrap(), + "fd00::e2".parse().unwrap(), + "fd00::e3".parse().unwrap(), + "fd00::e4".parse().unwrap(), + "fd00::e5".parse().unwrap(), + "fd00::e6".parse().unwrap(), + "fd00::e7".parse().unwrap(), + ] + .iter() + .copied() + .collect() + ); + } +} diff --git a/nexus/deployment/src/lib.rs b/nexus/deployment/src/lib.rs new file mode 100644 index 0000000000..fd182ae613 --- /dev/null +++ b/nexus/deployment/src/lib.rs @@ -0,0 +1,120 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! # Omicron deployment management +//! +//! **This system is still under development. Some of what's below is +//! more aspirational than real.** +//! +//! ## Overview +//! +//! "Deployment management" here refers broadly to managing the lifecycle of +//! software components. That includes deployment, undeployment, upgrade, +//! bringing into service, and removing from service. It includes +//! dynamically-deployed components (like most Omicron zones, like Nexus and +//! CockroachDB) as well as components that are tied to fixed physical hardware +//! (like the host operating system and device firmware). This system will +//! potentially manage configuration, too. See RFD 418 for background and a +//! survey of considerations here. +//! +//! The basic idea is that you have: +//! +//! * **fleet policy**: describes things like how many CockroachDB nodes there +//! should be, how many Nexus nodes there should be, the target system version +//! that all software should be running, which sleds are currently in service, +//! etc. +//! +//! * **inventory \[collections\]**: describe what software is currently +//! running on which hardware components, including versions and +//! configuration. This includes all control-plane-managed software, +//! including device firmware, host operating system, Omicron zones, etc. +//! +//! * **\[deployment\] blueprints**: describe what software _should_ be running +//! on which hardware components, including versions and configuration. Like +//! inventory collections, the plan covers all control-plane-managed software +//! and configuration. Plans must be specific enough that multiple Nexus +//! instances can attempt to realize the same blueprint concurrently without +//! stomping on each other. (For example, it's not specific enough to say +//! "there should be one more CockroachDB node" or even "there should be six +//! CockroachDB nodes" because two Nexus instances might _both_ decide to +//! provision a new node and then we'd have too many.) Plans must also be +//! incremental enough that any execution of them should not break the system. +//! For example, if between two consecutive blueprints the version of every +//! CockroachDB node changed, then concurrent blueprint execution might try to +//! update them all at once, bringing the whole Cockroach cluster down. In +//! this case, we need to use a sequence of blueprints that each only updates +//! one node at a time to ensure that the system keeps working. +//! +//! At any given time, the system has exactly one _target_ blueprint. The +//! deployment system is always attempting to make reality match this +//! blueprint. The system can be aware of more than one deployment blueprint, +//! including past ones, later ones, those generated by Oxide support, etc. +//! +//! In terms of carrying it out, here's the basic idea: +//! +//! ```ignored +//! The Planner +//! +//! fleet policy (latest inventory) (latest blueprint) +//! \ | / +//! \ | / +//! +----------+ | +----------/ +//! | | | +//! v v v +//! +//! "planner" +//! (eventually a background task) +//! | +//! v no +//! is a new blueprint necessary? ------> done +//! | +//! | yes +//! v +//! generate a new blueprint +//! | +//! | +//! v +//! commit blueprint to database +//! | +//! | +//! v +//! done +//! +//! +//! The Executor (better name?) +//! +//! latest committed blueprint latest inventory +//! | | +//! | | +//! +----+ +----+ +//! | | +//! v v +//! +//! "executor" +//! (background task) +//! | +//! v +//! determine actions needed +//! take actions +//! ``` +//! +//! The **planner** evaluates whether the current (target) blueprint is +//! consistent with the current policy. If not, the task generates a new +//! blueprint that _is_ consistent with the current policy and attempts to make +//! that the new target. (Multiple Nexus instances could try to do this +//! concurrently. CockroachDB's strong consistency ensures that only one can +//! win. The other Nexus instances must go back to evaluating the winning +//! blueprint before trying to change it again -- otherwise two Nexus instances +//! might fight over two equivalent blueprints.) +//! +//! An **execution** task periodically evaluates whether the state reflected in +//! the latest inventory collection is consistent with the current target +//! blueprint. If not, it executes operations to bring reality into line with +//! the blueprint. This means provisioning new zones, removing old zones, +//! adding instances to DNS, removing instances from DNS, carrying out firmware +//! updates, etc. + +pub mod blueprint_builder; +mod ip_allocator; +pub mod planner; diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs new file mode 100644 index 0000000000..f228a7a150 --- /dev/null +++ b/nexus/deployment/src/planner.rs @@ -0,0 +1,230 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! High-level facilities for generating Blueprints +//! +//! See crate-level documentation for details. + +use crate::blueprint_builder::BlueprintBuilder; +use crate::blueprint_builder::Ensure; +use crate::blueprint_builder::Error; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::Policy; +use slog::{info, Logger}; + +pub struct Planner<'a> { + log: Logger, + policy: &'a Policy, + blueprint: BlueprintBuilder<'a>, +} + +impl<'a> Planner<'a> { + pub fn new_based_on( + log: Logger, + parent_blueprint: &'a Blueprint, + policy: &'a Policy, + creator: &str, + ) -> Planner<'a> { + let blueprint = + BlueprintBuilder::new_based_on(parent_blueprint, policy, creator); + Planner { log, policy, blueprint } + } + + pub fn plan(mut self) -> Result { + self.do_plan()?; + Ok(self.blueprint.build()) + } + + fn do_plan(&mut self) -> Result<(), Error> { + // The only thing this planner currently knows how to do is add services + // to a sled that's missing them. So let's see if we're in that case. + + // Internal DNS is a prerequisite for bringing up all other zones. At + // this point, we assume that internal DNS (as a service) is already + // functioning. At some point, this function will have to grow the + // ability to determine whether more internal DNS zones need to be + // added and where they should go. And the blueprint builder will need + // to grow the ability to provision one. + + for (sled_id, sled_info) in &self.policy.sleds { + // Check for an NTP zone. Every sled should have one. If it's not + // there, all we can do is provision that one zone. We have to wait + // for that to succeed and synchronize the clock before we can + // provision anything else. + if self.blueprint.sled_ensure_zone_ntp(*sled_id)? == Ensure::Added { + info!( + &self.log, + "found sled missing NTP zone (will add one)"; + "sled_id" => ?sled_id + ); + self.blueprint + .comment(&format!("sled {}: add NTP zone", sled_id)); + // Don't make any other changes to this sled. However, this + // change is compatible with any other changes to other sleds, + // so we can "continue" here rather than "break". + continue; + } + + // Every zpool on the sled should have a Crucible zone on it. + let mut ncrucibles_added = 0; + for zpool_name in &sled_info.zpools { + if self + .blueprint + .sled_ensure_zone_crucible(*sled_id, zpool_name.clone())? + == Ensure::Added + { + info!( + &self.log, + "found sled zpool missing Crucible zone (will add one)"; + "sled_id" => ?sled_id, + "zpool_name" => ?zpool_name, + ); + ncrucibles_added += 1; + } + } + + if ncrucibles_added > 0 { + // Don't make any other changes to this sled. However, this + // change is compatible with any other changes to other sleds, + // so we can "continue" here rather than "break". + // (Yes, it's currently the last thing in the loop, but being + // explicit here means we won't forget to do this when more code + // is added below.) + self.blueprint.comment(&format!("sled {}: add zones", sled_id)); + continue; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::Planner; + use crate::blueprint_builder::test::example; + use crate::blueprint_builder::test::policy_add_sled; + use crate::blueprint_builder::BlueprintBuilder; + use omicron_common::api::external::Generation; + use omicron_test_utils::dev::test_setup_log; + use sled_agent_client::types::OmicronZoneType; + + /// Runs through a basic sequence of blueprints for adding a sled + #[test] + fn test_basic_add_sled() { + let logctx = test_setup_log("planner_basic_add_sled"); + + // Use our example inventory collection. + let (collection, mut policy) = example(); + + // Build the initial blueprint. We don't bother verifying it here + // because there's a separate test for that. + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "the_test", + ) + .expect("failed to create initial blueprint"); + + // Now run the planner. It should do nothing because our initial + // system didn't have any issues that the planner currently knows how to + // fix. + let blueprint2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &policy, + "no-op?", + ) + .plan() + .expect("failed to plan"); + + let diff = blueprint1.diff(&blueprint2); + println!("1 -> 2 (expected no changes):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); + + // Now add a new sled. + let new_sled_id = + "7097f5b3-5896-4fff-bd97-63a9a69563a9".parse().unwrap(); + let _ = policy_add_sled(&mut policy, new_sled_id); + + // Check that the first step is to add an NTP zone + let blueprint3 = Planner::new_based_on( + logctx.log.clone(), + &blueprint2, + &policy, + "test: add NTP?", + ) + .plan() + .expect("failed to plan"); + + let diff = blueprint2.diff(&blueprint3); + println!("2 -> 3 (expect new NTP zone on new sled):\n{}", diff); + let sleds = diff.sleds_added().collect::>(); + let (sled_id, sled_zones) = sleds[0]; + // We have defined elsewhere that the first generation contains no + // zones. So the first one with zones must be newer. See + // OMICRON_ZONES_CONFIG_INITIAL_GENERATION. + assert!(sled_zones.generation > Generation::new()); + assert_eq!(sled_id, new_sled_id); + assert_eq!(sled_zones.zones.len(), 1); + assert!(matches!( + sled_zones.zones[0].zone_type, + OmicronZoneType::InternalNtp { .. } + )); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); + + // Check that the next step is to add Crucible zones + let blueprint4 = Planner::new_based_on( + logctx.log.clone(), + &blueprint3, + &policy, + "test: add Crucible zones?", + ) + .plan() + .expect("failed to plan"); + + let diff = blueprint3.diff(&blueprint4); + println!("3 -> 4 (expect Crucible zones):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + let sleds = diff.sleds_changed().collect::>(); + assert_eq!(sleds.len(), 1); + let (sled_id, sled_changes) = &sleds[0]; + assert_eq!( + sled_changes.generation_after, + sled_changes.generation_before.next() + ); + assert_eq!(*sled_id, new_sled_id); + assert_eq!(sled_changes.zones_removed().count(), 0); + assert_eq!(sled_changes.zones_changed().count(), 0); + let zones = sled_changes.zones_added().collect::>(); + assert_eq!(zones.len(), 3); + for zone in &zones { + let OmicronZoneType::Crucible { .. } = zone.zone_type else { + panic!("unexpectedly added a non-Crucible zone"); + }; + } + + // Check that there are no more steps + let blueprint5 = Planner::new_based_on( + logctx.log.clone(), + &blueprint4, + &policy, + "test: no-op?", + ) + .plan() + .expect("failed to plan"); + + let diff = blueprint4.diff(&blueprint5); + println!("4 -> 5 (expect no changes):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); + + logctx.cleanup_successful(); + } +} diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index ab9af3f9e0..ad5ae7d024 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -526,8 +526,13 @@ mod test { zone_id: Uuid, ) -> sim::Server { // Start a simulated sled agent. - let config = - sim::Config::for_testing(sled_id, sim::SimMode::Auto, None, None); + let config = sim::Config::for_testing( + sled_id, + sim::SimMode::Auto, + None, + None, + Some(vec![]), + ); let agent = sim::Server::start(&config, &log, false).await.unwrap(); // Pretend to put some zones onto this sled. We don't need to test this diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs new file mode 100644 index 0000000000..9439cdc6d5 --- /dev/null +++ b/nexus/src/app/deployment.rs @@ -0,0 +1,364 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Configuration of the deployment system + +use nexus_db_queries::authz; +use nexus_db_queries::authz::Action; +use nexus_db_queries::authz::ApiResource; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::pagination::Paginator; +use nexus_deployment::blueprint_builder::BlueprintBuilder; +use nexus_deployment::planner::Planner; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintTarget; +use nexus_types::deployment::BlueprintTargetSet; +use nexus_types::deployment::Policy; +use nexus_types::deployment::SledResources; +use nexus_types::deployment::ZpoolName; +use nexus_types::identity::Asset; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; +use slog_error_chain::InlineErrorChain; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::num::NonZeroU32; +use std::str::FromStr; +use uuid::Uuid; + +/// "limit" used in SQL queries that paginate through all sleds, zpools, etc. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; + +/// "limit" used in SQL queries that fetch inventory data. Unlike the batch +/// size above, this is a limit on the *total* number of records returned. If +/// it's too small, the whole operation will fail. See +/// oxidecomputer/omicron#4629. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +const SQL_LIMIT_INVENTORY: NonZeroU32 = + unsafe { NonZeroU32::new_unchecked(1000) }; + +/// Temporary in-memory store of blueprints +/// +/// Blueprints eventually need to be stored in the database. That will obviate +/// the need for this structure. +pub struct Blueprints { + all_blueprints: BTreeMap, + target: BlueprintTarget, +} + +impl Blueprints { + pub fn new() -> Blueprints { + Blueprints { + all_blueprints: BTreeMap::new(), + target: BlueprintTarget { + target_id: None, + enabled: false, + time_set: chrono::Utc::now(), + }, + } + } +} + +/// Common structure for collecting information that the planner needs +struct PlanningContext { + policy: Policy, + creator: String, +} + +impl super::Nexus { + // Once we store blueprints in the database, this function will likely just + // delegate to a corresponding datastore function. + pub async fn blueprint_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(Action::ListChildren, &authz::BLUEPRINT_CONFIG).await?; + Ok(self + .blueprints + .lock() + .unwrap() + .all_blueprints + .values() + .filter_map(|f| match pagparams.marker { + None => Some(f.clone()), + Some(marker) if f.id > *marker => Some(f.clone()), + _ => None, + }) + .collect()) + } + + // Once we store blueprints in the database, this function will likely just + // delegate to a corresponding datastore function. + pub async fn blueprint_view( + &self, + opctx: &OpContext, + blueprint_id: Uuid, + ) -> LookupResult { + let blueprint = authz::Blueprint::new( + authz::FLEET, + blueprint_id, + LookupType::ById(blueprint_id), + ); + opctx.authorize(Action::Read, &blueprint).await?; + self.blueprints + .lock() + .unwrap() + .all_blueprints + .get(&blueprint_id) + .cloned() + .ok_or_else(|| blueprint.not_found()) + } + + // Once we store blueprints in the database, this function will likely just + // delegate to a corresponding datastore function. + pub async fn blueprint_delete( + &self, + opctx: &OpContext, + blueprint_id: Uuid, + ) -> DeleteResult { + let blueprint = authz::Blueprint::new( + authz::FLEET, + blueprint_id, + LookupType::ById(blueprint_id), + ); + opctx.authorize(Action::Delete, &blueprint).await?; + + let mut blueprints = self.blueprints.lock().unwrap(); + if let Some(target_id) = blueprints.target.target_id { + if target_id == blueprint_id { + return Err(Error::conflict(format!( + "blueprint {} is the current target and cannot be deleted", + blueprint_id + ))); + } + } + + if blueprints.all_blueprints.remove(&blueprint_id).is_none() { + return Err(blueprint.not_found()); + } + + Ok(()) + } + + pub async fn blueprint_target_view( + &self, + opctx: &OpContext, + ) -> Result { + self.blueprint_target(opctx).await.map(|(target, _)| target) + } + + // This is a stand-in for a datastore function that fetches the current + // target information and the target blueprint's contents. This helper + // exists to combine the authz check with the lookup, which is what the + // datastore function will eventually do. + async fn blueprint_target( + &self, + opctx: &OpContext, + ) -> Result<(BlueprintTarget, Option), Error> { + opctx.authorize(Action::Read, &authz::BLUEPRINT_CONFIG).await?; + let blueprints = self.blueprints.lock().unwrap(); + Ok(( + blueprints.target.clone(), + blueprints.target.target_id.and_then(|target_id| { + blueprints.all_blueprints.get(&target_id).cloned() + }), + )) + } + + // Once we store blueprints in the database, this function will likely just + // delegate to a corresponding datastore function. + pub async fn blueprint_target_set( + &self, + opctx: &OpContext, + params: BlueprintTargetSet, + ) -> Result { + opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; + let new_target_id = params.target_id; + let enabled = params.enabled; + let mut blueprints = self.blueprints.lock().unwrap(); + if let Some(blueprint) = blueprints.all_blueprints.get(&new_target_id) { + if blueprint.parent_blueprint_id != blueprints.target.target_id { + return Err(Error::conflict(&format!( + "blueprint {:?}: parent is {:?}, which is not the current \ + target {:?}", + new_target_id, + blueprint + .parent_blueprint_id + .map(|p| p.to_string()) + .unwrap_or_else(|| String::from("")), + blueprints + .target + .target_id + .map(|p| p.to_string()) + .unwrap_or_else(|| String::from("")), + ))); + } + blueprints.target = BlueprintTarget { + target_id: Some(new_target_id), + enabled, + time_set: chrono::Utc::now(), + }; + + // When we add a background task executing the target blueprint, + // this is the point where we'd signal it to update its target. + Ok(blueprints.target.clone()) + } else { + Err(Error::not_found_by_id(ResourceType::Blueprint, &new_target_id)) + } + } + + async fn blueprint_planning_context( + &self, + opctx: &OpContext, + ) -> Result { + let creator = self.id.to_string(); + let datastore = self.datastore(); + + let sled_rows = { + let mut all_sleds = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = + datastore.sled_list(opctx, &p.current_pagparams()).await?; + paginator = + p.found_batch(&batch, &|s: &nexus_db_model::Sled| s.id()); + all_sleds.extend(batch); + } + all_sleds + }; + + let mut zpools_by_sled_id = { + let mut zpools = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = datastore + .zpool_list_all_external(opctx, &p.current_pagparams()) + .await?; + paginator = + p.found_batch(&batch, &|z: &nexus_db_model::Zpool| z.id()); + for z in batch { + let sled_zpool_names = + zpools.entry(z.sled_id).or_insert_with(BTreeSet::new); + // It's unfortunate that Nexus knows how Sled Agent + // constructs zpool names, but there's not currently an + // alternative. + let zpool_name_generated = + illumos_utils::zpool::ZpoolName::new_external(z.id()) + .to_string(); + let zpool_name = ZpoolName::from_str(&zpool_name_generated) + .map_err(|e| { + Error::internal_error(&format!( + "unexpectedly failed to parse generated \ + zpool name: {}: {}", + zpool_name_generated, e + )) + })?; + sled_zpool_names.insert(zpool_name); + } + } + zpools + }; + + let sleds = sled_rows + .into_iter() + .map(|sled_row| { + let sled_id = sled_row.id(); + let subnet = Ipv6Subnet::::new(sled_row.ip()); + let zpools = zpools_by_sled_id + .remove(&sled_id) + .unwrap_or_else(BTreeSet::new); + let sled_info = SledResources { subnet, zpools }; + (sled_id, sled_info) + }) + .collect(); + + Ok(PlanningContext { creator, policy: Policy { sleds } }) + } + + // Once we store blueprints in the database, this function will likely just + // delegate to a corresponding datastore function. + async fn blueprint_add( + &self, + opctx: &OpContext, + blueprint: Blueprint, + ) -> Result<(), Error> { + opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; + let mut blueprints = self.blueprints.lock().unwrap(); + assert!(blueprints + .all_blueprints + .insert(blueprint.id, blueprint) + .is_none()); + Ok(()) + } + + pub async fn blueprint_generate_from_collection( + &self, + opctx: &OpContext, + collection_id: Uuid, + ) -> CreateResult { + let collection = self + .datastore() + .inventory_collection_read_all_or_nothing( + opctx, + collection_id, + SQL_LIMIT_INVENTORY, + ) + .await?; + let planning_context = self.blueprint_planning_context(opctx).await?; + let blueprint = BlueprintBuilder::build_initial_from_collection( + &collection, + &planning_context.policy, + &planning_context.creator, + ) + .map_err(|error| { + Error::internal_error(&format!( + "error generating initial blueprint from collection {}: {}", + collection_id, + InlineErrorChain::new(&error) + )) + })?; + + self.blueprint_add(&opctx, blueprint.clone()).await?; + Ok(blueprint) + } + + pub async fn blueprint_create_regenerate( + &self, + opctx: &OpContext, + ) -> CreateResult { + let (_, maybe_parent) = self.blueprint_target(opctx).await?; + let Some(parent_blueprint) = maybe_parent else { + return Err(Error::conflict( + "cannot regenerate blueprint without existing target", + )); + }; + + let planning_context = self.blueprint_planning_context(opctx).await?; + let planner = Planner::new_based_on( + opctx.log.clone(), + &parent_blueprint, + &planning_context.policy, + &planning_context.creator, + ); + let blueprint = planner.plan().map_err(|error| { + Error::internal_error(&format!( + "error generating blueprint: {}", + InlineErrorChain::new(&error) + )) + })?; + + self.blueprint_add(&opctx, blueprint.clone()).await?; + Ok(blueprint) + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 4045269878..778c5e2fe1 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -39,8 +39,8 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; -use omicron_common::api::external::Vni; use omicron_common::api::internal::nexus; +use omicron_common::api::internal::shared::SourceNatConfig; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::CloseFrame; use propolis_client::support::tungstenite::Message as WebSocketMessage; @@ -52,7 +52,6 @@ use sled_agent_client::types::InstanceMigrationTargetParams; use sled_agent_client::types::InstanceProperties; use sled_agent_client::types::InstancePutMigrationIdsBody; use sled_agent_client::types::InstancePutStateBody; -use sled_agent_client::types::SourceNatConfig; use std::matches; use std::net::SocketAddr; use std::sync::Arc; @@ -1089,7 +1088,7 @@ impl super::Nexus { // matter which one we use because all NICs must be in the // same VPC; see the check in project_create_instance.) let firewall_rules = if let Some(nic) = nics.first() { - let vni = Vni::try_from(nic.vni.0)?; + let vni = nic.vni; let vpc = self .db_datastore .resolve_vni_to_vpc(opctx, db::model::Vni(vni)) diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 3db749f43b..8f97642c88 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -136,9 +136,9 @@ impl super::Nexus { let nic_id = nic.id; let mapping = SetVirtualNetworkInterfaceHost { virtual_ip: nic.ip, - virtual_mac: nic.mac.clone(), + virtual_mac: nic.mac, physical_host_ip, - vni: nic.vni.clone(), + vni: nic.vni, }; let log = self.log.clone(); @@ -225,7 +225,7 @@ impl super::Nexus { let nic_id = nic.id; let mapping = DeleteVirtualNetworkInterfaceHost { virtual_ip: nic.ip, - vni: nic.vni.clone(), + vni: nic.vni, }; let log = self.log.clone(); @@ -404,7 +404,7 @@ impl super::Nexus { first_port: target_ip.first_port, last_port: target_ip.last_port, sled_address: sled_address.into(), - vni: DbVni(network_interface.vni.clone().into()), + vni: DbVni(network_interface.vni), mac: nexus_db_model::MacAddr( omicron_common::api::external::MacAddr(mac_address), ), diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 5af45985db..defc4a05ea 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -37,6 +37,7 @@ mod address_lot; pub(crate) mod background; mod bgp; mod certificate; +mod deployment; mod device_auth; mod disk; mod external_dns; @@ -179,6 +180,10 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, + + /// information about blueprints (deployment configurations) + // This will go away once these are stored in the database. + blueprints: std::sync::Mutex, } impl Nexus { @@ -411,6 +416,7 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), + blueprints: std::sync::Mutex::new(deployment::Blueprints::new()), }; // TODO-cleanup all the extra Arcs here seems wrong diff --git a/nexus/src/app/vpc.rs b/nexus/src/app/vpc.rs index c47f499c41..3a6278053a 100644 --- a/nexus/src/app/vpc.rs +++ b/nexus/src/app/vpc.rs @@ -28,7 +28,6 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; -use omicron_common::api::external::Vni; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::internal::nexus::HostIdentifier; use sled_agent_client::types::NetworkInterface; @@ -259,7 +258,7 @@ impl super::Nexus { debug!(self.log, "resolved {} rules for sleds", rules_for_sled.len()); let sled_rules_request = sled_agent_client::types::VpcFirewallRulesEnsureBody { - vni: vpc.vni.0.into(), + vni: vpc.vni.0, rules: rules_for_sled, }; @@ -480,7 +479,7 @@ impl super::Nexus { let mut nics = HashSet::new(); let mut targets = Vec::with_capacity(rule.targets.len()); let mut push_target_nic = |nic: &NetworkInterface| { - if nics.insert((*nic.vni, (*nic.mac).clone())) { + if nics.insert((nic.vni, *nic.mac)) { targets.push(nic.clone()); } }; @@ -589,10 +588,8 @@ impl super::Nexus { .unwrap_or(&no_interfaces) { host_addrs.push( - HostIdentifier::Vpc(Vni::try_from( - *interface.vni, - )?) - .into(), + HostIdentifier::Vpc(interface.vni) + .into(), ) } } diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 9a20911893..63578e360a 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -25,6 +25,8 @@ use dropshot::ResultsPage; use dropshot::TypedBody; use hyper::Body; use nexus_db_model::Ipv4NatEntryView; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintTargetSet; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -34,6 +36,7 @@ use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::PaginatedById; use omicron_common::api::external::http_pagination::ScanById; use omicron_common::api::external::http_pagination::ScanParams; +use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::SledInstanceState; @@ -42,6 +45,7 @@ use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; use serde::Deserialize; +use serde::Serialize; use std::collections::BTreeMap; use std::sync::Arc; use uuid::Uuid; @@ -74,6 +78,14 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(bgtask_list)?; api.register(bgtask_view)?; + api.register(blueprint_list)?; + api.register(blueprint_view)?; + api.register(blueprint_delete)?; + api.register(blueprint_target_view)?; + api.register(blueprint_target_set)?; + api.register(blueprint_generate_from_collection)?; + api.register(blueprint_regenerate)?; + Ok(()) } @@ -591,3 +603,196 @@ async fn ipv4_nat_changeset( }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } + +// APIs for managing blueprints +// +// These are not (yet) intended for use by any other programs. Eventually, we +// will want this functionality part of the public API. But we don't want to +// commit to any of this yet. These properly belong in an RFD 399-style +// "Service and Support API". Absent that, we stick them here. + +/// Lists blueprints +#[endpoint { + method = GET, + path = "/deployment/blueprints/all", +}] +async fn blueprint_list( + rqctx: RequestContext>, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let query = query_params.into_inner(); + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let pagparams = data_page_params_for(&rqctx, &query)?; + let blueprints = nexus.blueprint_list(&opctx, &pagparams).await?; + Ok(HttpResponseOk(ScanById::results_page( + &query, + blueprints, + &|_, blueprint: &Blueprint| blueprint.id, + )?)) + }; + + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Fetches one blueprint +#[endpoint { + method = GET, + path = "/deployment/blueprints/all/{blueprint_id}", +}] +async fn blueprint_view( + rqctx: RequestContext>, + path_params: Path, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let blueprint = nexus.blueprint_view(&opctx, path.blueprint_id).await?; + Ok(HttpResponseOk(blueprint)) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Deletes one blueprint +#[endpoint { + method = DELETE, + path = "/deployment/blueprints/all/{blueprint_id}", +}] +async fn blueprint_delete( + rqctx: RequestContext>, + path_params: Path, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + nexus.blueprint_delete(&opctx, path.blueprint_id).await?; + Ok(HttpResponseDeleted()) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +// Managing the current target blueprint + +/// Describes what blueprint, if any, the system is currently working toward +#[derive(Debug, Serialize, JsonSchema)] +pub struct BlueprintTarget { + /// id of the blueprint that the system is trying to make real + pub target_id: Uuid, + /// policy: should the system actively work towards this blueprint + /// + /// This should generally be left enabled. + pub enabled: bool, + /// when this blueprint was made the target + pub time_set: chrono::DateTime, +} + +impl TryFrom for BlueprintTarget { + type Error = Error; + + fn try_from( + value: nexus_types::deployment::BlueprintTarget, + ) -> Result { + Ok(BlueprintTarget { + target_id: value.target_id.ok_or_else(|| { + Error::conflict("no target blueprint has been configured") + })?, + enabled: value.enabled, + time_set: value.time_set, + }) + } +} + +/// Fetches the current target blueprint, if any +#[endpoint { + method = GET, + path = "/deployment/blueprints/target", +}] +async fn blueprint_target_view( + rqctx: RequestContext>, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let target = nexus.blueprint_target_view(&opctx).await?; + Ok(HttpResponseOk(BlueprintTarget::try_from(target)?)) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Make the specified blueprint the new target +#[endpoint { + method = POST, + path = "/deployment/blueprints/target", +}] +async fn blueprint_target_set( + rqctx: RequestContext>, + target: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let target = target.into_inner(); + let result = nexus.blueprint_target_set(&opctx, target).await?; + Ok(HttpResponseOk( + BlueprintTarget::try_from(result) + .map_err(|e| Error::conflict(e.to_string()))?, + )) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +// Generating blueprints + +#[derive(Debug, Deserialize, JsonSchema)] +struct CollectionId { + collection_id: Uuid, +} + +/// Generates a new blueprint matching the specified inventory collection +#[endpoint { + method = POST, + path = "/deployment/blueprints/generate-from-collection", +}] +async fn blueprint_generate_from_collection( + rqctx: RequestContext>, + params: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let collection_id = params.into_inner().collection_id; + let result = nexus + .blueprint_generate_from_collection(&opctx, collection_id) + .await?; + Ok(HttpResponseOk(result)) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Generates a new blueprint for the current system, re-evaluating anything +/// that's changed since the last one was generated +#[endpoint { + method = POST, + path = "/deployment/blueprints/regenerate", +}] +async fn blueprint_regenerate( + rqctx: RequestContext>, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let result = nexus.blueprint_create_regenerate(&opctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 19d5f747d8..da21602cb1 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -1107,6 +1107,7 @@ pub async fn start_sled_agent( sim_mode, Some(nexus_address), Some(update_directory), + None, ); let server = sim::Server::start(&config, &log, true) .await diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 99ef165188..044f87f7c1 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -608,12 +608,7 @@ async fn test_instance_start_creates_networking_state( // TODO(#3107) Remove this bifurcation when Nexus programs all mappings // itself. if agent.id != sled_id { - assert_sled_v2p_mappings( - agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; } else { assert!(agent.v2p_mappings.lock().await.is_empty()); } @@ -807,12 +802,8 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { // all mappings explicitly (without skipping the instance's current // sled) this bifurcation should be removed. if sled_agent.id != original_sled_id { - assert_sled_v2p_mappings( - sled_agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) + .await; } else { assert!(sled_agent.v2p_mappings.lock().await.is_empty()); } @@ -860,12 +851,8 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { // agent will have updated any mappings there. Remove this bifurcation // when Nexus programs all mappings explicitly. if sled_agent.id != dst_sled_id { - assert_sled_v2p_mappings( - sled_agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) + .await; } } } @@ -4248,12 +4235,8 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) { // TODO(#3107) Remove this bifurcation when Nexus programs all mappings // itself. if sled_agent.id != sled_id { - assert_sled_v2p_mappings( - sled_agent, - &nics[0], - guest_nics[0].vni.clone().into(), - ) - .await; + assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) + .await; } else { assert!(sled_agent.v2p_mappings.lock().await.is_empty()); } diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index 5e399cbe84..b551cf51b5 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -101,10 +101,10 @@ async fn test_physical_disk_create_list_delete( let sleds_url = "/v1/system/hardware/sleds"; assert_eq!(sleds_list(&external_client, &sleds_url).await.len(), 1); - // Verify that there are no disks. + // The test framework may set up some disks initially. let disks_url = format!("/v1/system/hardware/sleds/{SLED_AGENT_UUID}/disks"); - assert!(physical_disks_list(&external_client, &disks_url).await.is_empty()); + let disks_initial = physical_disks_list(&external_client, &disks_url).await; // Insert a new disk using the internal API, observe it in the external API let sled_id = Uuid::from_str(&SLED_AGENT_UUID).unwrap(); @@ -118,14 +118,22 @@ async fn test_physical_disk_create_list_delete( ) .await; let disks = physical_disks_list(&external_client, &disks_url).await; - assert_eq!(disks.len(), 1); - assert_eq!(disks[0].vendor, "v"); - assert_eq!(disks[0].serial, "s"); - assert_eq!(disks[0].model, "m"); + assert_eq!(disks.len(), disks_initial.len() + 1); + let _new_disk = disks + .iter() + .find(|found_disk| { + found_disk.vendor == "v" + && found_disk.serial == "s" + && found_disk.model == "m" + }) + .expect("did not find the new disk"); // Delete that disk using the internal API, observe it in the external API delete_physical_disk(&internal_client, "v", "s", "m", sled_id).await; - assert!(physical_disks_list(&external_client, &disks_url).await.is_empty()); + assert_eq!( + physical_disks_list(&external_client, &disks_url).await, + disks_initial + ); } #[nexus_test] diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 90ec67c0e6..dff0f73be7 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -16,6 +16,7 @@ serde.workspace = true serde_json.workspace = true steno.workspace = true strum.workspace = true +thiserror.workspace = true uuid.workspace = true api_identity.workspace = true diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs new file mode 100644 index 0000000000..95404a2c17 --- /dev/null +++ b/nexus/types/src/deployment.rs @@ -0,0 +1,564 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types representing deployed software and configuration +//! +//! For more on this, see the crate-level documentation for `nexus/deployment`. +//! +//! This lives in nexus/types because it's used by both nexus/db-model and +//! nexus/deployment. (It could as well just live in nexus/db-model, but +//! nexus/deployment does not currently know about nexus/db-model and it's +//! convenient to separate these concerns.) + +use crate::inventory::Collection; +pub use crate::inventory::OmicronZoneConfig; +pub use crate::inventory::OmicronZoneDataset; +pub use crate::inventory::OmicronZoneType; +pub use crate::inventory::OmicronZonesConfig; +pub use crate::inventory::ZpoolName; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use omicron_common::api::external::Generation; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use uuid::Uuid; + +/// Fleet-wide deployment policy +/// +/// The **policy** represents the deployment controls that people (operators and +/// support engineers) can modify directly under normal operation. In the +/// limit, this would include things like: which sleds are supposed to be part +/// of the system, how many CockroachDB nodes should be part of the cluster, +/// what system version the system should be running, etc. It would _not_ +/// include things like which services should be running on which sleds or which +/// host OS version should be on each sled because that's up to the control +/// plane to decide. (To be clear, the intent is that for extenuating +/// circumstances, people could exercise control over such things, but that +/// would not be part of normal operation.) +/// +/// The current policy is pretty limited. It's aimed primarily at supporting +/// the add/remove sled use case. +pub struct Policy { + /// set of sleds that are supposed to be part of the control plane, along + /// with information about resources available to the planner + pub sleds: BTreeMap, +} + +/// Describes the resources available on each sled for the planner +pub struct SledResources { + /// zpools on this sled + /// + /// (used to allocate storage for control plane zones with persistent + /// storage) + pub zpools: BTreeSet, + + /// the IPv6 subnet of this sled on the underlay network + /// + /// (implicitly specifies the whole range of addresses that the planner can + /// use for control plane components) + pub subnet: Ipv6Subnet, +} + +/// Describes a complete set of software and configuration for the system +// Blueprints are a fundamental part of how the system modifies itself. Each +// blueprint completely describes all of the software and configuration +// that the control plane manages. See the nexus/deployment crate-level +// documentation for details. +// +// Blueprints are different from policy. Policy describes the things that an +// operator would generally want to control. The blueprint describes the +// details of implementing that policy that an operator shouldn't have to deal +// with. For example, the operator might write policy that says "I want +// 5 external DNS zones". The system could then generate a blueprint that +// _has_ 5 external DNS zones on 5 specific sleds. The blueprint includes all +// the details needed to achieve that, including which image these zones should +// run, which zpools their persistent data should be stored on, their public and +// private IP addresses, their internal DNS names, etc. +// +// It must be possible for multiple Nexus instances to execute the same +// blueprint concurrently and converge to the same thing. Thus, these _cannot_ +// be how a blueprint works: +// +// - "add a Nexus zone" -- two Nexus instances operating concurrently would +// add _two_ Nexus zones (which is wrong) +// - "ensure that there is a Nexus zone on this sled with this id" -- the IP +// addresses and images are left unspecified. Two Nexus instances could pick +// different IPs or images for the zone. +// +// This is why blueprints must be so detailed. The key principle here is that +// **all the work of ensuring that the system do the right thing happens in one +// process (the update planner in one Nexus instance). Once a blueprint has +// been committed, everyone is on the same page about how to execute it.** The +// intent is that this makes both planning and executing a lot easier. In +// particular, by the time we get to execution, all the hard choices have +// already been made. +// +// Currently, blueprints are limited to describing only the set of Omicron +// zones deployed on each host and some supporting configuration (e.g., DNS). +// This is aimed at supporting add/remove sleds. The plan is to grow this to +// include more of the system as we support more use cases. +#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +pub struct Blueprint { + /// unique identifier for this blueprint + pub id: Uuid, + + /// mapping: sled id -> zones deployed on each sled + /// A sled is considered part of the control plane cluster iff it has an + /// entry in this map. + pub omicron_zones: BTreeMap, + + /// Omicron zones considered in-service (which generally means that they + /// should appear in DNS) + pub zones_in_service: BTreeSet, + + /// which blueprint this blueprint is based on + pub parent_blueprint_id: Option, + + /// when this blueprint was generated (for debugging) + pub time_created: chrono::DateTime, + /// identity of the component that generated the blueprint (for debugging) + /// This would generally be the Uuid of a Nexus instance. + pub creator: String, + /// human-readable string describing why this blueprint was created + /// (for debugging) + pub comment: String, +} + +impl Blueprint { + /// Iterate over all the Omicron zones in the blueprint, along with + /// associated sled id + pub fn all_omicron_zones( + &self, + ) -> impl Iterator { + self.omicron_zones + .iter() + .flat_map(|(sled_id, z)| z.zones.iter().map(|z| (*sled_id, z))) + } + + /// Iterate over the ids of all sleds in the blueprint + pub fn sleds(&self) -> impl Iterator + '_ { + self.omicron_zones.keys().copied() + } + + /// Summarize the difference between two blueprints + pub fn diff<'a>(&'a self, other: &'a Blueprint) -> OmicronZonesDiff<'a> { + OmicronZonesDiff { + before_label: format!("blueprint {}", self.id), + before_zones: self.omicron_zones.clone(), + before_zones_in_service: &self.zones_in_service, + after_label: format!("blueprint {}", other.id), + after_zones: &other.omicron_zones, + after_zones_in_service: &other.zones_in_service, + } + } + + /// Summarize the difference between a collection and a blueprint + /// + /// This gives an idea about what would change about a running system if one + /// were to execute the blueprint. + /// + /// Note that collections do not currently include information about what + /// zones are in-service, so the caller must provide that information. + pub fn diff_from_collection<'a>( + &'a self, + collection: &'a Collection, + before_zones_in_service: &'a BTreeSet, + ) -> OmicronZonesDiff<'a> { + let before_zones = collection + .omicron_zones + .iter() + .map(|(sled_id, zones_found)| (*sled_id, zones_found.zones.clone())) + .collect(); + OmicronZonesDiff { + before_label: format!("collection {}", collection.id), + before_zones, + before_zones_in_service, + after_label: format!("blueprint {}", self.id), + after_zones: &self.omicron_zones, + after_zones_in_service: &self.zones_in_service, + } + } +} + +/// Describes which blueprint the system is currently trying to make real +// This is analogous to the db model type until we have that. +#[derive(Debug, Clone)] +pub struct BlueprintTarget { + pub target_id: Option, + pub enabled: bool, + pub time_set: chrono::DateTime, +} + +/// Specifies what blueprint, if any, the system should be working toward +#[derive(Deserialize, JsonSchema)] +pub struct BlueprintTargetSet { + pub target_id: Uuid, + pub enabled: bool, +} + +/// Summarizes the differences between two blueprints +pub struct OmicronZonesDiff<'a> { + before_label: String, + // We store an owned copy of "before_zones" to make it easier to support + // collections here, where we need to assemble this map ourselves. + before_zones: BTreeMap, + before_zones_in_service: &'a BTreeSet, + after_label: String, + after_zones: &'a BTreeMap, + after_zones_in_service: &'a BTreeSet, +} + +/// Describes a sled that appeared on both sides of a diff (possibly changed) +pub struct DiffSledCommon<'a> { + /// id of the sled + pub sled_id: Uuid, + /// generation of the "zones" configuration on the left side + pub generation_before: Generation, + /// generation of the "zones" configuration on the right side + pub generation_after: Generation, + zones_added: Vec<&'a OmicronZoneConfig>, + zones_removed: Vec<&'a OmicronZoneConfig>, + zones_common: Vec>, +} + +impl<'a> DiffSledCommon<'a> { + /// Iterate over zones added between the blueprints + pub fn zones_added( + &self, + ) -> impl Iterator + '_ { + self.zones_added.iter().copied() + } + + /// Iterate over zones removed between the blueprints + pub fn zones_removed( + &self, + ) -> impl Iterator + '_ { + self.zones_removed.iter().copied() + } + + /// Iterate over zones that are common to both blueprints + pub fn zones_in_common( + &self, + ) -> impl Iterator> + '_ { + self.zones_common.iter().copied() + } + + /// Iterate over zones that changed between the blue prints + pub fn zones_changed( + &self, + ) -> impl Iterator> + '_ { + self.zones_in_common() + .filter(|z| z.changed_how != DiffZoneChangedHow::NoChanges) + } +} + +/// Describes a zone that was common to both sides of a diff +#[derive(Debug, Copy, Clone)] +pub struct DiffZoneCommon<'a> { + /// full zone configuration before + pub zone_before: &'a OmicronZoneConfig, + /// full zone configuration after + pub zone_after: &'a OmicronZoneConfig, + /// summary of what changed, if anything + pub changed_how: DiffZoneChangedHow, +} + +/// Describes how a zone changed across two blueprints, if at all +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum DiffZoneChangedHow { + /// the zone did not change between these two blueprints + NoChanges, + /// the zone details are the same, but it was brought into service + AddedToService, + /// the zone details are the same, but it was removed from service + RemovedFromService, + /// the zone's details (i.e., configuration) changed + DetailsChanged, +} + +impl<'a> OmicronZonesDiff<'a> { + fn sleds_before(&self) -> BTreeSet { + self.before_zones.keys().copied().collect() + } + + fn sleds_after(&self) -> BTreeSet { + self.after_zones.keys().copied().collect() + } + + /// Iterate over sleds only present in the second blueprint of a diff + pub fn sleds_added( + &self, + ) -> impl Iterator + '_ { + let sled_ids = self + .sleds_after() + .difference(&self.sleds_before()) + .copied() + .collect::>(); + + sled_ids + .into_iter() + .map(|sled_id| (sled_id, self.after_zones.get(&sled_id).unwrap())) + } + + /// Iterate over sleds only present in the first blueprint of a diff + pub fn sleds_removed( + &self, + ) -> impl Iterator + '_ { + let sled_ids = self + .sleds_before() + .difference(&self.sleds_after()) + .copied() + .collect::>(); + sled_ids + .into_iter() + .map(|sled_id| (sled_id, self.before_zones.get(&sled_id).unwrap())) + } + + /// Iterate over sleds present in both blueprints in a diff + pub fn sleds_in_common( + &'a self, + ) -> impl Iterator)> + '_ { + let sled_ids = self + .sleds_before() + .intersection(&self.sleds_after()) + .copied() + .collect::>(); + sled_ids.into_iter().map(|sled_id| { + let b1sledzones = self.before_zones.get(&sled_id).unwrap(); + let b2sledzones = self.after_zones.get(&sled_id).unwrap(); + + // Assemble separate summaries of the zones, indexed by zone id. + #[derive(Debug)] + struct ZoneInfo<'a> { + zone: &'a OmicronZoneConfig, + in_service: bool, + } + + let b1zones: BTreeMap = b1sledzones + .zones + .iter() + .map(|zone| { + ( + zone.id, + ZoneInfo { + zone, + in_service: self + .before_zones_in_service + .contains(&zone.id), + }, + ) + }) + .collect(); + let mut b2zones: BTreeMap = b2sledzones + .zones + .iter() + .map(|zone| { + ( + zone.id, + ZoneInfo { + zone, + in_service: self + .after_zones_in_service + .contains(&zone.id), + }, + ) + }) + .collect(); + let mut zones_removed = vec![]; + let mut zones_changed = vec![]; + + // Now go through each zone and compare them. + for (zone_id, b1z_info) in &b1zones { + if let Some(b2z_info) = b2zones.remove(zone_id) { + let changed_how = if b1z_info.zone != b2z_info.zone { + DiffZoneChangedHow::DetailsChanged + } else if b1z_info.in_service && !b2z_info.in_service { + DiffZoneChangedHow::RemovedFromService + } else if !b1z_info.in_service && b2z_info.in_service { + DiffZoneChangedHow::AddedToService + } else { + DiffZoneChangedHow::NoChanges + }; + zones_changed.push(DiffZoneCommon { + zone_before: b1z_info.zone, + zone_after: b2z_info.zone, + changed_how, + }); + } else { + zones_removed.push(b1z_info.zone); + } + } + + // Since we removed common zones above, anything else exists only in + // b2 and was therefore added. + let zones_added = + b2zones.into_values().map(|b2z_info| b2z_info.zone).collect(); + + ( + sled_id, + DiffSledCommon { + sled_id, + generation_before: b1sledzones.generation, + generation_after: b2sledzones.generation, + zones_added, + zones_removed, + zones_common: zones_changed, + }, + ) + }) + } + + pub fn sleds_changed( + &'a self, + ) -> impl Iterator)> + '_ { + self.sleds_in_common().filter(|(_, sled_changes)| { + sled_changes.zones_added().next().is_some() + || sled_changes.zones_removed().next().is_some() + || sled_changes.zones_changed().next().is_some() + }) + } + + fn print_whole_sled( + &self, + f: &mut std::fmt::Formatter<'_>, + prefix: char, + label: &str, + bbsledzones: &OmicronZonesConfig, + sled_id: Uuid, + ) -> std::fmt::Result { + writeln!(f, "{} sled {} ({})", prefix, sled_id, label)?; + writeln!( + f, + "{} zone config generation {}", + prefix, bbsledzones.generation + )?; + for z in &bbsledzones.zones { + writeln!( + f, + "{} zone {} type {} ({})", + prefix, + z.id, + z.zone_type.label(), + label + )?; + } + + Ok(()) + } +} + +/// Implements diff(1)-like output for diff'ing two blueprints +impl<'a> std::fmt::Display for OmicronZonesDiff<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "diff {} {}", self.before_label, self.after_label)?; + writeln!(f, "--- {}", self.before_label)?; + writeln!(f, "+++ {}", self.after_label)?; + + for (sled_id, sled_zones) in self.sleds_removed() { + self.print_whole_sled(f, '-', "removed", sled_zones, sled_id)?; + } + + for (sled_id, sled_changes) in self.sleds_in_common() { + // Print a line about the sled itself and zone config generation, + // regardless of whether anything has changed. + writeln!(f, " sled {}", sled_id)?; + if sled_changes.generation_before != sled_changes.generation_after { + writeln!( + f, + "- zone config generation {}", + sled_changes.generation_before + )?; + writeln!( + f, + "+ zone config generation {}", + sled_changes.generation_after + )?; + } else { + writeln!( + f, + " zone config generation {}", + sled_changes.generation_before + )?; + } + + for zone in sled_changes.zones_removed() { + writeln!( + f, + "- zone {} type {} (removed)", + zone.id, + zone.zone_type.label(), + )?; + } + + for zone_changes in sled_changes.zones_in_common() { + let zone_id = zone_changes.zone_before.id; + let zone_type = zone_changes.zone_before.zone_type.label(); + let zone2_type = zone_changes.zone_after.zone_type.label(); + match zone_changes.changed_how { + DiffZoneChangedHow::DetailsChanged => { + writeln!( + f, + "- zone {} type {} (changed)", + zone_id, zone_type, + )?; + writeln!( + f, + "+ zone {} type {} (changed)", + zone_id, zone2_type, + )?; + } + DiffZoneChangedHow::RemovedFromService => { + writeln!( + f, + "- zone {} type {} (in service)", + zone_id, zone_type, + )?; + writeln!( + f, + "+ zone {} type {} (removed from service)", + zone_id, zone2_type, + )?; + } + DiffZoneChangedHow::AddedToService => { + writeln!( + f, + "- zone {} type {} (not in service)", + zone_id, zone_type, + )?; + writeln!( + f, + "+ zone {} type {} (added to service)", + zone_id, zone2_type, + )?; + } + DiffZoneChangedHow::NoChanges => { + writeln!( + f, + " zone {} type {} (unchanged)", + zone_id, zone_type, + )?; + } + } + } + + for zone in sled_changes.zones_added() { + writeln!( + f, + "+ zone {} type {} (added)", + zone.id, + zone.zone_type.label(), + )?; + } + } + + for (sled_id, sled_zones) in self.sleds_added() { + self.print_whole_sled(f, '+', "added", sled_zones, sled_id)?; + } + + Ok(()) + } +} diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index d3f269ef5d..a33bc0b8bb 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -88,6 +88,9 @@ id_path_param!(GroupPath, group_id, "group"); id_path_param!(SledPath, sled_id, "sled"); id_path_param!(SwitchPath, switch_id, "switch"); +// Internal API parameters +id_path_param!(BlueprintPath, blueprint_id, "blueprint"); + pub struct SledSelector { /// ID of the sled pub sled: Uuid, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index c85597e94c..cf312d3b82 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -428,7 +428,7 @@ pub struct Switch { /// /// Physical disks reside in a particular sled and are used to store both /// Instance Disk data as well as internal metadata. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] pub struct PhysicalDisk { #[serde(flatten)] pub identity: AssetIdentityMetadata, diff --git a/nexus/types/src/identity.rs b/nexus/types/src/identity.rs index 7837ed5bd9..ededb926df 100644 --- a/nexus/types/src/identity.rs +++ b/nexus/types/src/identity.rs @@ -43,7 +43,7 @@ pub trait Resource { /// Identity-related metadata that's included in "asset" public API objects /// (which generally have no name or description) -#[derive(Clone, Debug, Deserialize, PartialEq, Serialize, JsonSchema)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] pub struct AssetIdentityMetadata { /// unique, immutable, system-controlled identifier for each resource pub id: Uuid, diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index b27d7277ba..c99e51af4f 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -17,6 +17,7 @@ pub use gateway_client::types::PowerState; pub use gateway_client::types::RotSlot; pub use gateway_client::types::SpType; use omicron_common::api::external::ByteCount; +pub use omicron_common::api::internal::shared::SourceNatConfig; pub use sled_agent_client::types::NetworkInterface; pub use sled_agent_client::types::NetworkInterfaceKind; pub use sled_agent_client::types::OmicronZoneConfig; @@ -24,8 +25,6 @@ pub use sled_agent_client::types::OmicronZoneDataset; pub use sled_agent_client::types::OmicronZoneType; pub use sled_agent_client::types::OmicronZonesConfig; pub use sled_agent_client::types::SledRole; -pub use sled_agent_client::types::SourceNatConfig; -pub use sled_agent_client::types::Vni; pub use sled_agent_client::types::ZpoolName; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -128,6 +127,13 @@ impl Collection { .get(&which) .and_then(|by_bb| by_bb.get(baseboard_id)) } + + /// Iterate over all the Omicron zones in the collection + pub fn all_omicron_zones( + &self, + ) -> impl Iterator { + self.omicron_zones.values().flat_map(|z| z.zones.zones.iter()) + } } /// A unique baseboard id found during a collection diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index a48c4d3b00..494573e834 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -29,6 +29,7 @@ //! rules, so our model layer knows about our views. That seems to be a //! relatively minor offense, so it's the way we leave things for now. +pub mod deployment; pub mod external_api; pub mod identity; pub mod internal_api; diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index a1d70d838b..b5cbb25c66 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -125,6 +125,240 @@ } } }, + "/deployment/blueprints/all": { + "get": { + "summary": "Lists blueprints", + "operationId": "blueprint_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlueprintResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, + "/deployment/blueprints/all/{blueprint_id}": { + "get": { + "summary": "Fetches one blueprint", + "operationId": "blueprint_view", + "parameters": [ + { + "in": "path", + "name": "blueprint_id", + "description": "ID of the blueprint", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Blueprint" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "summary": "Deletes one blueprint", + "operationId": "blueprint_delete", + "parameters": [ + { + "in": "path", + "name": "blueprint_id", + "description": "ID of the blueprint", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/deployment/blueprints/generate-from-collection": { + "post": { + "summary": "Generates a new blueprint matching the specified inventory collection", + "operationId": "blueprint_generate_from_collection", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CollectionId" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Blueprint" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/deployment/blueprints/regenerate": { + "post": { + "summary": "Generates a new blueprint for the current system, re-evaluating anything", + "description": "that's changed since the last one was generated", + "operationId": "blueprint_regenerate", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Blueprint" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/deployment/blueprints/target": { + "get": { + "summary": "Fetches the current target blueprint, if any", + "operationId": "blueprint_target_view", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlueprintTarget" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "post": { + "summary": "Make the specified blueprint the new target", + "operationId": "blueprint_target_set", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlueprintTargetSet" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BlueprintTarget" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/disk/{disk_id}/remove-read-only-parent": { "post": { "summary": "Request removal of a read_only_parent from a disk", @@ -1844,42 +2078,171 @@ "range" ] }, - "ByteCount": { - "description": "Byte count to express memory or storage capacity.", - "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "Certificate": { + "Blueprint": { + "description": "Describes a complete set of software and configuration for the system", "type": "object", "properties": { - "cert": { + "comment": { + "description": "human-readable string describing why this blueprint was created (for debugging)", "type": "string" }, - "key": { + "creator": { + "description": "identity of the component that generated the blueprint (for debugging) This would generally be the Uuid of a Nexus instance.", "type": "string" + }, + "id": { + "description": "unique identifier for this blueprint", + "type": "string", + "format": "uuid" + }, + "omicron_zones": { + "description": "mapping: sled id -> zones deployed on each sled A sled is considered part of the control plane cluster iff it has an entry in this map.", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/OmicronZonesConfig" + } + }, + "parent_blueprint_id": { + "nullable": true, + "description": "which blueprint this blueprint is based on", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "when this blueprint was generated (for debugging)", + "type": "string", + "format": "date-time" + }, + "zones_in_service": { + "description": "Omicron zones considered in-service (which generally means that they should appear in DNS)", + "type": "array", + "items": { + "type": "string", + "format": "uuid" + }, + "uniqueItems": true } }, "required": [ - "cert", - "key" + "comment", + "creator", + "id", + "omicron_zones", + "time_created", + "zones_in_service" ] }, - "Cumulativedouble": { - "description": "A cumulative or counter data type.", + "BlueprintResultsPage": { + "description": "A single page of results", "type": "object", "properties": { - "start_time": { - "type": "string", - "format": "date-time" + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/Blueprint" + } }, - "value": { - "type": "number", - "format": "double" + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" } }, "required": [ - "start_time", + "items" + ] + }, + "BlueprintTarget": { + "description": "Describes what blueprint, if any, the system is currently working toward", + "type": "object", + "properties": { + "enabled": { + "description": "policy: should the system actively work towards this blueprint\n\nThis should generally be left enabled.", + "type": "boolean" + }, + "target_id": { + "description": "id of the blueprint that the system is trying to make real", + "type": "string", + "format": "uuid" + }, + "time_set": { + "description": "when this blueprint was made the target", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "enabled", + "target_id", + "time_set" + ] + }, + "BlueprintTargetSet": { + "description": "Specifies what blueprint, if any, the system should be working toward", + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "target_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "enabled", + "target_id" + ] + }, + "ByteCount": { + "description": "Byte count to express memory or storage capacity.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "Certificate": { + "type": "object", + "properties": { + "cert": { + "type": "string" + }, + "key": { + "type": "string" + } + }, + "required": [ + "cert", + "key" + ] + }, + "CollectionId": { + "type": "object", + "properties": { + "collection_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "collection_id" + ] + }, + "Cumulativedouble": { + "description": "A cumulative or counter data type.", + "type": "object", + "properties": { + "start_time": { + "type": "string", + "format": "date-time" + }, + "value": { + "type": "number", + "format": "double" + } + }, + "required": [ + "start_time", "value" ] }, @@ -3825,6 +4188,16 @@ } ] }, + "IpNet": { + "anyOf": [ + { + "$ref": "#/components/schemas/Ipv4Net" + }, + { + "$ref": "#/components/schemas/Ipv6Net" + } + ] + }, "IpNetwork": { "oneOf": [ { @@ -3912,6 +4285,10 @@ "vni" ] }, + "Ipv4Net": { + "description": "An IPv4 subnet, including prefix and subnet mask", + "type": "string" + }, "Ipv4Network": { "type": "string", "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" @@ -3934,6 +4311,10 @@ "last" ] }, + "Ipv6Net": { + "description": "An IPv6 subnet, including prefix and subnet mask", + "type": "string" + }, "Ipv6Network": { "type": "string", "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$" @@ -4272,6 +4653,99 @@ "minLength": 1, "maxLength": 63 }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/components/schemas/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "name": { + "$ref": "#/components/schemas/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "subnet": { + "$ref": "#/components/schemas/IpNet" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ] + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + }, + "required": [ + "id", + "type" + ] + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + }, + "required": [ + "id", + "type" + ] + } + ] + }, "NewPasswordHash": { "title": "A password hash in PHC string format", "description": "Password hashes must be in PHC (Password Hashing Competition) string format. Passwords must be hashed with Argon2id. Password hashes may be rejected if the parameters appear not to be secure enough.", @@ -4281,6 +4755,410 @@ "description": "Unique name for a saga [`Node`]\n\nEach node requires a string name that's unique within its DAG. The name is used to identify its output. Nodes that depend on a given node (either directly or indirectly) can access the node's output using its name.", "type": "string" }, + "OmicronZoneConfig": { + "description": "Describes one Omicron-managed zone running on a sled", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/components/schemas/OmicronZoneType" + } + }, + "required": [ + "id", + "underlay_address", + "zone_type" + ] + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "properties": { + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "pool_name" + ] + }, + "OmicronZoneType": { + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", + "oneOf": [ + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "nullable": true, + "type": "string" + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "snat_cfg": { + "description": "The SNAT configuration for outbound connections.", + "allOf": [ + { + "$ref": "#/components/schemas/SourceNatConfig" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + }, + "required": [ + "address", + "dns_servers", + "nic", + "ntp_servers", + "snat_cfg", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + }, + "required": [ + "address", + "type" + ] + }, + { + "type": "object", + "properties": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "type": "string" + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + }, + "required": [ + "dataset", + "dns_address", + "http_address", + "nic", + "type" + ] + }, + { + "type": "object", + "properties": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "http_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + }, + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "nullable": true, + "type": "string" + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] + } + }, + "required": [ + "address", + "dns_servers", + "ntp_servers", + "type" + ] + }, + { + "type": "object", + "properties": { + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "external_ip": { + "description": "The address at which the external nexus server is reachable.", + "type": "string", + "format": "ip" + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + }, + "required": [ + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + }, + "required": [ + "address", + "type" + ] + } + ] + }, + "OmicronZonesConfig": { + "description": "Describes the set of Omicron-managed zones running on a sled", + "type": "object", + "properties": { + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "zones": { + "description": "list of running zones", + "type": "array", + "items": { + "$ref": "#/components/schemas/OmicronZoneConfig" + } + } + }, + "required": [ + "generation", + "zones" + ] + }, "OximeterInfo": { "description": "Message used to notify Nexus that this oximeter instance is up and running.", "type": "object", @@ -5620,6 +6498,10 @@ "format": "uint32", "minimum": 0 }, + "ZpoolName": { + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string" + }, "ZpoolPutRequest": { "description": "Sent by a sled agent on startup to Nexus to request further instruction", "type": "object", diff --git a/sled-agent/src/bin/sled-agent-sim.rs b/sled-agent/src/bin/sled-agent-sim.rs index 4b3bc9e432..8de9a3c423 100644 --- a/sled-agent/src/bin/sled-agent-sim.rs +++ b/sled-agent/src/bin/sled-agent-sim.rs @@ -122,6 +122,7 @@ async fn do_run() -> Result<(), CmdError> { args.sim_mode, Some(args.nexus_addr), Some(tmp.path()), + None, ) }; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 8417546e3b..9120bafa9a 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -702,7 +702,7 @@ impl From for sled_agent_client::types::OmicronZoneType { dns_servers, domain, ntp_servers, - snat_cfg: snat_cfg.into(), + snat_cfg, nic: nic.into(), }, OmicronZoneType::Clickhouse { address, dataset } => { diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 441c7fd842..bed82a7a01 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -10,7 +10,8 @@ use crate::rack_setup::config::SetupServiceConfig as Config; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; -use internal_dns::{ServiceName, DNS_ZONE}; +use internal_dns::config::{Host, ZoneVariant}; +use internal_dns::ServiceName; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, @@ -659,7 +660,8 @@ impl Plan { let ntp_address = SocketAddrV6::new(address, NTP_PORT, 0, 0); let (zone_type, svcname) = if idx < BOUNDARY_NTP_COUNT { - boundary_ntp_servers.push(format!("{}.host.{}", id, DNS_ZONE)); + boundary_ntp_servers + .push(Host::for_zone(id, ZoneVariant::Other).fqdn()); let (nic, snat_cfg) = svc_port_builder.next_snat(id)?; ( OmicronZoneType::BoundaryNtp { diff --git a/sled-agent/src/sim/config.rs b/sled-agent/src/sim/config.rs index 81e11dc1c2..7a20dd5709 100644 --- a/sled-agent/src/sim/config.rs +++ b/sled-agent/src/sim/config.rs @@ -84,6 +84,7 @@ impl Config { sim_mode: SimMode, nexus_address: Option, update_directory: Option<&Utf8Path>, + zpools: Option>, ) -> Config { // This IP range is guaranteed by RFC 6666 to discard traffic. // For tests that don't use a Nexus, we use this address to simulate a @@ -94,6 +95,10 @@ impl Config { // updates, make up a path that doesn't exist. let update_directory = update_directory.unwrap_or_else(|| "/nonexistent".into()); + let zpools = zpools.unwrap_or_else(|| { + // By default, create 10 "virtual" U.2s, with 1 TB of storage. + vec![ConfigZpool { size: 1 << 40 }; 10] + }); Config { id, sim_mode, @@ -104,7 +109,7 @@ impl Config { ..Default::default() }, storage: ConfigStorage { - zpools: vec![], + zpools, ip: IpAddr::from(Ipv6Addr::LOCALHOST), }, updates: ConfigUpdates { From 0ab0df5cfd45e77d9b57212b68c4da263d368a69 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 18 Jan 2024 14:49:46 -0500 Subject: [PATCH 05/91] Bump SP to v1.0.5 (#4842) --- tools/hubris_checksums | 14 +++++++------- tools/hubris_version | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index 707c67fe0c..478d8f192e 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,7 +1,7 @@ -09f0342eed777495ac0a852f219d2dec45fdc1b860f938f95736851b1627cad7 build-gimlet-c-image-default-v1.0.4.zip -aef9279ba6d1d0ffa64586d71cdf5933eddbe048ce1a10f5f611128a84b53642 build-gimlet-d-image-default-v1.0.4.zip -989f89f0060239b77d92fe068ceae1be406591c997224256c617d77b2ccbf1b0 build-gimlet-e-image-default-v1.0.4.zip -8e41a139bc62ff86b8343989889491739bb90eb46e1a02585252adf3ee540db9 build-psc-b-image-default-v1.0.4.zip -76e35e71714921a1ca5f7f8314fc596e3b5fe1dfd422c59fdc9a62c1ebfeec0e build-psc-c-image-default-v1.0.4.zip -a406045b1d545fd063bb989c84a774e4d09a445618d4a8889ce232a3b45884a7 build-sidecar-b-image-default-v1.0.4.zip -69ba3ac372388058f8a6e58230e7e2964990609f18c0960357d17bfc16f25bae build-sidecar-c-image-default-v1.0.4.zip +6567a0775d5f0b7ff09d97f149532a627222971eadd89ea0dac186c9a825846d build-gimlet-c-image-default-v1.0.5.zip +1190b27246d8c8c20837d957266ac9e90e32934841b9acc2990d2762a3b53a16 build-gimlet-d-image-default-v1.0.5.zip +79e644ffbbd7195ff2699c90ee26f277edac40b385fc5bb8e7821a4611ad7c11 build-gimlet-e-image-default-v1.0.5.zip +bf83e0311e18fc716dd5a315106aa965d278c4f481892fe124bc376b2e23581e build-psc-b-image-default-v1.0.5.zip +0dd1de9c3d3c686e8a05525fbed48c6532b608b34c77214b7fe15a8f54b0f3cb build-psc-c-image-default-v1.0.5.zip +c024d5546288d0d953735b3a0221ee0e218cc27ed1e26eede5c91c9a8137c592 build-sidecar-b-image-default-v1.0.5.zip +de79320022718be94c81dc7d44b5229ce0956aff9c1ffa11e8c3ff8961af49bb build-sidecar-c-image-default-v1.0.5.zip diff --git a/tools/hubris_version b/tools/hubris_version index 0cce8d745a..37e565d060 100644 --- a/tools/hubris_version +++ b/tools/hubris_version @@ -1 +1 @@ -TAGS=(gimlet-v1.0.4 psc-v1.0.4 sidecar-v1.0.4) +TAGS=(gimlet-v1.0.5 psc-v1.0.5 sidecar-v1.0.5) From 9a3e1d8a5a26de46a338ba3b2bc0b170f0bcc360 Mon Sep 17 00:00:00 2001 From: Adam Leventhal Date: Thu, 18 Jan 2024 13:07:27 -0800 Subject: [PATCH 06/91] update dropshot (#4794) --- Cargo.lock | 92 ++++++++++++++++++++++------- Cargo.toml | 5 +- nexus/Cargo.toml | 1 + nexus/src/app/external_endpoints.rs | 17 +++--- nexus/src/app/mod.rs | 4 -- sled-agent/Cargo.toml | 6 +- test-utils/src/certificates.rs | 22 +++---- workspace-hack/Cargo.toml | 8 --- 8 files changed, 100 insertions(+), 55 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf05be9eba..59ea2919a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1906,7 +1906,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#b19a9a5d049f4433547f9f3b11d10a9483fc6acf" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#711a7490d81416731cfe0f9fef366ed5f266a0ee" dependencies = [ "async-stream", "async-trait", @@ -1927,7 +1927,7 @@ dependencies = [ "paste", "percent-encoding", "proc-macro2", - "rustls", + "rustls 0.22.2", "rustls-pemfile 2.0.0", "schemars", "serde", @@ -1941,7 +1941,7 @@ dependencies = [ "slog-json", "slog-term", "tokio", - "tokio-rustls", + "tokio-rustls 0.25.0", "toml 0.8.8", "usdt", "uuid", @@ -1952,7 +1952,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#b19a9a5d049f4433547f9f3b11d10a9483fc6acf" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#711a7490d81416731cfe0f9fef366ed5f266a0ee" dependencies = [ "proc-macro2", "quote", @@ -3064,15 +3064,30 @@ name = "hyper-rustls" version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.11", + "hyper", + "rustls 0.21.9", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "399c78f9338483cb7e630c8474b07268983c6bd5acee012e4211f9f7bb21b070" dependencies = [ "futures-util", "http 0.2.11", "hyper", "log", - "rustls", + "rustls 0.22.2", "rustls-native-certs", + "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.25.0", ] [[package]] @@ -4143,7 +4158,7 @@ dependencies = [ "headers", "http 0.2.11", "hyper", - "hyper-rustls", + "hyper-rustls 0.25.0", "internal-dns", "ipnetwork", "itertools 0.12.0", @@ -4173,7 +4188,7 @@ dependencies = [ "rcgen", "ref-cast", "regex", - "rustls", + "rustls 0.22.2", "samael", "serde", "serde_json", @@ -4784,7 +4799,7 @@ dependencies = [ "httptest", "hubtools", "hyper", - "hyper-rustls", + "hyper-rustls 0.25.0", "illumos-utils", "internal-dns", "ipnetwork", @@ -4832,7 +4847,8 @@ dependencies = [ "regex", "reqwest", "ring 0.17.7", - "rustls", + "rustls 0.22.2", + "rustls-pemfile 2.0.0", "samael", "schemars", "semver 1.0.21", @@ -5076,7 +5092,7 @@ dependencies = [ "regex", "reqwest", "ring 0.17.7", - "rustls", + "rustls 0.22.2", "slog", "subprocess", "tar", @@ -5136,7 +5152,6 @@ dependencies = [ "hex", "hmac", "hyper", - "hyper-rustls", "indexmap 2.1.0", "inout", "ipnetwork", @@ -6766,7 +6781,7 @@ dependencies = [ "http 0.2.11", "http-body", "hyper", - "hyper-rustls", + "hyper-rustls 0.24.2", "hyper-tls", "ipnet", "js-sys", @@ -6776,7 +6791,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.21.9", "rustls-pemfile 1.0.3", "serde", "serde_json", @@ -6784,7 +6799,7 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", - "tokio-rustls", + "tokio-rustls 0.24.1", "tokio-util", "tower-service", "url", @@ -7104,18 +7119,33 @@ checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9" dependencies = [ "log", "ring 0.17.7", - "rustls-webpki", + "rustls-webpki 0.101.7", "sct", ] +[[package]] +name = "rustls" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" +dependencies = [ + "log", + "ring 0.17.7", + "rustls-pki-types", + "rustls-webpki 0.102.1", + "subtle", + "zeroize", +] + [[package]] name = "rustls-native-certs" -version = "0.6.3" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" dependencies = [ "openssl-probe", - "rustls-pemfile 1.0.3", + "rustls-pemfile 2.0.0", + "rustls-pki-types", "schannel", "security-framework", ] @@ -7155,6 +7185,17 @@ dependencies = [ "untrusted 0.9.0", ] +[[package]] +name = "rustls-webpki" +version = "0.102.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4ca26037c909dedb327b48c3327d0ba91d3dd3c4e05dad328f210ffb68e95b" +dependencies = [ + "ring 0.17.7", + "rustls-pki-types", + "untrusted 0.9.0", +] + [[package]] name = "rustversion" version = "1.0.14" @@ -8826,7 +8867,18 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls", + "rustls 0.21.9", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +dependencies = [ + "rustls 0.22.2", + "rustls-pki-types", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 0ca70097cd..e81817310e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -219,7 +219,7 @@ httptest = "0.15.5" hubtools = { git = "https://github.com/oxidecomputer/hubtools.git", branch = "main" } humantime = "2.1.0" hyper = "0.14" -hyper-rustls = "0.24.2" +hyper-rustls = "0.25.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } indexmap = "2.1.0" @@ -319,7 +319,8 @@ ring = "0.17.7" rpassword = "7.3.1" rstest = "0.18.2" rustfmt-wrapper = "0.2" -rustls = "0.21.9" +rustls = "0.22.2" +rustls-pemfile = "2.0.0" rustyline = "12.0.0" samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } schemars = "0.8.16" diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 3feb333ee3..52ee7034dd 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -87,6 +87,7 @@ oximeter.workspace = true oximeter-instruments = { workspace = true, features = ["http-instruments"] } oximeter-producer.workspace = true rustls = { workspace = true } +rustls-pemfile = { workspace = true } omicron-workspace-hack.workspace = true [dev-dependencies] diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index 0a6dd41db6..bcfec667ce 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -429,19 +429,21 @@ impl TryFrom for TlsCertificate { // Assemble a rustls CertifiedKey with both the certificate and the key. let certified_key = { - let private_key_der = private_key - .private_key_to_der() - .context("serializing private key to DER")?; - let rustls_private_key = rustls::PrivateKey(private_key_der); + let mut cursor = std::io::Cursor::new(db_cert.key.clone()); + let rustls_private_key = rustls_pemfile::private_key(&mut cursor) + .expect("parsing private key PEM") + .expect("no private keys found"); let rustls_signing_key = - rustls::sign::any_supported_type(&rustls_private_key) - .context("parsing DER private key")?; + rustls::crypto::ring::sign::any_supported_type( + &rustls_private_key, + ) + .context("parsing DER private key")?; let rustls_certs = certs_pem .iter() .map(|x509| { x509.to_der() .context("serializing cert to DER") - .map(rustls::Certificate) + .map(rustls::pki_types::CertificateDer::from) }) .collect::>()?; Arc::new(CertifiedKey::new(rustls_certs, rustls_signing_key)) @@ -563,6 +565,7 @@ pub(crate) async fn read_all_endpoints( /// session. /// /// See the module-level comment for more details. +#[derive(Debug)] pub struct NexusCertResolver { log: slog::Logger, config_rx: watch::Receiver>, diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index defc4a05ea..80bfd5ef22 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -518,10 +518,6 @@ impl Nexus { } let mut rustls_cfg = rustls::ServerConfig::builder() - .with_safe_default_cipher_suites() - .with_safe_default_kx_groups() - .with_safe_default_protocol_versions() - .unwrap() .with_no_client_auth() .with_cert_resolver(Arc::new(NexusCertResolver::new( self.log.new(o!("component" => "NexusCertResolver")), diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index b734248f32..5bd205b32e 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -53,11 +53,11 @@ propolis-client.workspace = true propolis-mock-server.workspace = true # Only used by the simulated sled agent rand = { workspace = true, features = ["getrandom"] } reqwest = { workspace = true, features = ["rustls-tls", "stream"] } -schemars = { workspace = true, features = [ "chrono", "uuid1" ] } +schemars = { workspace = true, features = ["chrono", "uuid1"] } semver.workspace = true serde.workspace = true serde_human_bytes.workspace = true -serde_json = {workspace = true, features = ["raw_value"]} +serde_json = { workspace = true, features = ["raw_value"] } sha3.workspace = true sled-agent-client.workspace = true sled-hardware.workspace = true @@ -70,7 +70,7 @@ smf.workspace = true tar.workspace = true thiserror.workspace = true tofino.workspace = true -tokio = { workspace = true, features = [ "full" ] } +tokio = { workspace = true, features = ["full"] } toml.workspace = true usdt.workspace = true uuid.workspace = true diff --git a/test-utils/src/certificates.rs b/test-utils/src/certificates.rs index 54da013e0c..aac50a2ca8 100644 --- a/test-utils/src/certificates.rs +++ b/test-utils/src/certificates.rs @@ -5,14 +5,14 @@ //! Utilities for tests that need certificates. // Utility structure for making a test certificate -pub struct CertificateChain { - root_cert: rustls::Certificate, - intermediate_cert: rustls::Certificate, - end_cert: rustls::Certificate, +pub struct CertificateChain<'a> { + root_cert: rustls::pki_types::CertificateDer<'a>, + intermediate_cert: rustls::pki_types::CertificateDer<'a>, + end_cert: rustls::pki_types::CertificateDer<'a>, end_keypair: rcgen::Certificate, } -impl CertificateChain { +impl<'a> CertificateChain<'a> { pub fn new>(subject_alt_name: S) -> Self { let params = rcgen::CertificateParams::new(vec![subject_alt_name.into()]); @@ -36,17 +36,17 @@ impl CertificateChain { let end_keypair = rcgen::Certificate::from_params(params) .expect("failed to generate end-entity keys"); - let root_cert = rustls::Certificate( + let root_cert = rustls::pki_types::CertificateDer::from( root_keypair .serialize_der() .expect("failed to serialize root cert"), ); - let intermediate_cert = rustls::Certificate( + let intermediate_cert = rustls::pki_types::CertificateDer::from( intermediate_keypair .serialize_der_with_signer(&root_keypair) .expect("failed to serialize intermediate cert"), ); - let end_cert = rustls::Certificate( + let end_cert = rustls::pki_types::CertificateDer::from( end_keypair .serialize_der_with_signer(&intermediate_keypair) .expect("failed to serialize end-entity cert"), @@ -63,7 +63,7 @@ impl CertificateChain { self.end_keypair.serialize_private_key_pem() } - fn cert_chain(&self) -> Vec { + fn cert_chain(&self) -> Vec> { vec![ self.end_cert.clone(), self.intermediate_cert.clone(), @@ -76,12 +76,12 @@ impl CertificateChain { } } -fn tls_cert_to_pem(certs: &Vec) -> String { +fn tls_cert_to_pem(certs: &Vec) -> String { let mut serialized_certs = String::new(); for cert in certs { let encoded_cert = pem::encode(&pem::Pem::new( "CERTIFICATE".to_string(), - cert.0.clone(), + cert.to_vec(), )); serialized_certs.push_str(&encoded_cert); diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 2ddc38b380..214b57cdc5 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -224,14 +224,12 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [target.x86_64-unknown-linux-gnu.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -239,7 +237,6 @@ rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } errno = { version = "0.3.2", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -247,7 +244,6 @@ rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } errno = { version = "0.3.2", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -255,7 +251,6 @@ rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } errno = { version = "0.3.2", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -263,7 +258,6 @@ rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } errno = { version = "0.3.2", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -271,7 +265,6 @@ rustix = { version = "0.38.25", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } errno = { version = "0.3.2", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } @@ -281,7 +274,6 @@ toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", featu [target.x86_64-unknown-illumos.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } errno = { version = "0.3.2", default-features = false, features = ["std"] } -hyper-rustls = { version = "0.24.2" } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.25", features = ["fs", "termios"] } From 9febaab4011579445f36b50e5a7d847d331ee927 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 18 Jan 2024 21:55:45 +0000 Subject: [PATCH 07/91] Update Rust crate sqlparser to 0.41.0 (#4839) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [sqlparser](https://togithub.com/sqlparser-rs/sqlparser-rs) | workspace.dependencies | minor | `0.36.1` -> `0.41.0` | --- ### Release Notes
sqlparser-rs/sqlparser-rs (sqlparser) ### [`v0.41.0`](https://togithub.com/sqlparser-rs/sqlparser-rs/blob/HEAD/CHANGELOG.md#0410-2023-12-22) [Compare Source](https://togithub.com/sqlparser-rs/sqlparser-rs/compare/v0.40.0...v0.41.0) ##### Added - Support `DEFERRED`, `IMMEDIATE`, and `EXCLUSIVE` in SQLite's `BEGIN TRANSACTION` command ([#​1067](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1067)) - Thanks [@​takaebato](https://togithub.com/takaebato) - Support generated columns skipping `GENERATED ALWAYS` keywords ([#​1058](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1058)) - Thanks [@​takluyver](https://togithub.com/takluyver) - Support `LOCK/UNLOCK TABLES` for MySQL ([#​1059](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1059)) - Thanks [@​zzzdong](https://togithub.com/zzzdong) - Support `JSON_TABLE` ([#​1062](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1062)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support `CALL` statements ([#​1063](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1063)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) ##### Fixed - fix rendering of SELECT TOP ([#​1070](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1070)) for Snowflake - Thanks jmhain ##### Changed - Improve documentation formatting ([#​1068](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1068)) - Thanks [@​alamb](https://togithub.com/alamb) - Replace type_id() by trait method to allow wrapping dialects ([#​1065](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1065)) - Thanks [@​jjbayer](https://togithub.com/jjbayer) - Document that comments aren't preserved for round trip ([#​1060](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1060)) - Thanks [@​takluyver](https://togithub.com/takluyver) - Update sqlparser-derive to use `syn 2.0` ([#​1040](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1040)) - Thanks [@​serprex](https://togithub.com/serprex) ### [`v0.40.0`](https://togithub.com/sqlparser-rs/sqlparser-rs/blob/HEAD/CHANGELOG.md#0400-2023-11-27) [Compare Source](https://togithub.com/sqlparser-rs/sqlparser-rs/compare/v0.39.0...v0.40.0) ##### Added - Add `{pre,post}_visit_query` to `Visitor` ([#​1044](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1044)) - Thanks [@​jmhain](https://togithub.com/jmhain) - Support generated virtual columns with expression ([#​1051](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1051)) - Thanks [@​takluyver](https://togithub.com/takluyver) - Support PostgreSQL `END` ([#​1035](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1035)) - Thanks [@​tobyhede](https://togithub.com/tobyhede) - Support `INSERT INTO ... DEFAULT VALUES ...` ([#​1036](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1036)) - Thanks [@​CDThomas](https://togithub.com/CDThomas) - Support `RELEASE` and `ROLLBACK TO SAVEPOINT` ([#​1045](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1045)) - Thanks [@​CDThomas](https://togithub.com/CDThomas) - Support `CONVERT` expressions ([#​1048](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1048)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support `GLOBAL` and `SESSION` parts in `SHOW VARIABLES` for mysql and generic - Thanks [@​emin100](https://togithub.com/emin100) - Support snowflake `PIVOT` on derived table factors ([#​1027](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1027)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support mssql json and xml extensions ([#​1043](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1043)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support for `MAX` as a character length ([#​1038](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1038)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support `IN ()` syntax of SQLite ([#​1028](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1028)) - Thanks [@​alamb](https://togithub.com/alamb) ##### Fixed - Fix extra whitespace printed before `ON CONFLICT` ([#​1037](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1037)) - Thanks [@​CDThomas](https://togithub.com/CDThomas) ##### Changed - Document round trip ability ([#​1052](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1052)) - Thanks [@​alamb](https://togithub.com/alamb) - Add PRQL to list of users ([#​1031](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1031)) - Thanks [@​vanillajonathan](https://togithub.com/vanillajonathan) ### [`v0.39.0`](https://togithub.com/sqlparser-rs/sqlparser-rs/blob/HEAD/CHANGELOG.md#0390-2023-10-27) [Compare Source](https://togithub.com/sqlparser-rs/sqlparser-rs/compare/v0.38.0...v0.39.0) ##### Added - Support for `LATERAL FLATTEN` and similar ([#​1026](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1026)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support BigQuery struct, array and bytes , int64, `float64` datatypes ([#​1003](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1003)) - Thanks [@​iffyio](https://togithub.com/iffyio) - Support numbers as placeholders in Snowflake (e.g. `:1)` ([#​1001](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1001)) - Thanks [@​yuval-illumex](https://togithub.com/yuval-illumex) - Support date 'key' when using semi structured data ([#​1023](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1023)) [@​yuval-illumex](https://togithub.com/yuval-illumex) - Support IGNORE|RESPECT NULLs clause in window functions ([#​998](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/998)) - Thanks [@​yuval-illumex](https://togithub.com/yuval-illumex) - Support for single-quoted identifiers ([#​1021](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1021)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support multiple PARTITION statements in ALTER TABLE ADD statement ([#​1011](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1011)) - Thanks [@​bitemyapp](https://togithub.com/bitemyapp) - Support "with" identifiers surrounded by backticks in GenericDialect ([#​1010](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1010)) - Thanks [@​bitemyapp](https://togithub.com/bitemyapp) - Support INSERT IGNORE in MySql and GenericDialect ([#​1004](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1004)) - Thanks [@​emin100](https://togithub.com/emin100) - Support SQLite `pragma` statement ([#​969](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/969)) - Thanks [@​marhoily](https://togithub.com/marhoily) - Support `position` as a column name ([#​1022](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1022)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support `FILTER` in Functions (for `OVER`) clause ([#​1007](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1007)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support `SELECT * EXCEPT/REPLACE` syntax from ClickHouse ([#​1013](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1013)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support subquery as function arg w/o parens in Snowflake dialect ([#​996](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/996)) - Thanks [@​jmhain](https://togithub.com/jmhain) - Support `UNION DISTINCT BY NAME` syntax ([#​997](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/997)) - Thanks [@​alexander-beedie](https://togithub.com/alexander-beedie) - Support mysql `RLIKE` and `REGEXP` binary operators ([#​1017](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1017)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support bigquery `CAST AS x [STRING|DATE] FORMAT` syntax ([#​978](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/978)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support Snowflake/BigQuery `TRIM`. ([#​975](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/975)) - Thanks [@​zdenal](https://togithub.com/zdenal) - Support ` CREATE [TEMPORARY|TEMP] VIEW [IF NOT EXISTS] `([#​993](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/993)) - Thanks [@​gabivlj](https://togithub.com/gabivlj) - Support for `CREATE VIEW … WITH NO SCHEMA BINDING` Redshift ([#​979](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/979)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support `UNPIVOT` and a fix for chained PIVOTs ([#​983](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/983)) - [@​jmhain](https://togithub.com/jmhain) - Support for `LIMIT BY` ([#​977](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/977)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support for mixed BigQuery table name quoting ([#​971](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/971)) - Thanks [@​iffyio](https://togithub.com/iffyio) - Support `DELETE` with `ORDER BY` and `LIMIT` (MySQL) ([#​992](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/992)) - Thanks [@​ulrichsg](https://togithub.com/ulrichsg) - Support `EXTRACT` for `DAYOFWEEK`, `DAYOFYEAR`, `ISOWEEK`, `TIME` ([#​980](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/980)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Support `ATTACH DATABASE` ([#​989](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/989)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) ##### Fixed - Fix handling of `/~%` in Snowflake stage name ([#​1009](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1009)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Fix column `COLLATE` not displayed ([#​1012](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1012)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Fix for clippy 1.73 ([#​995](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/995)) - Thanks [@​alamb](https://togithub.com/alamb) ##### Changed - Test to ensure `+ - * / %` binary operators work the same in all dialects ([#​1025](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1025)) - Thanks [@​lustefaniak](https://togithub.com/lustefaniak) - Improve documentation on Parser::consume_token and friends ([#​994](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/994)) - Thanks [@​alamb](https://togithub.com/alamb) - Test that regexp can be used as an identifier in postgres ([#​1018](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1018)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Add docstrings for Dialects, update README ([#​1016](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/1016)) - Thanks [@​alamb](https://togithub.com/alamb) - Add JumpWire to users in README ([#​990](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/990)) - Thanks [@​hexedpackets](https://togithub.com/hexedpackets) - Add tests for clickhouse: `tokenize == as Token::DoubleEq` ([#​981](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/981))- Thanks [@​lustefaniak](https://togithub.com/lustefaniak) ### [`v0.38.0`](https://togithub.com/sqlparser-rs/sqlparser-rs/blob/HEAD/CHANGELOG.md#0380-2023-09-21) [Compare Source](https://togithub.com/sqlparser-rs/sqlparser-rs/compare/v0.37.0...v0.38.0) ##### Added - Support `==`operator for Sqlite ([#​970](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/970)) - Thanks [@​marhoily](https://togithub.com/marhoily) - Support mysql `PARTITION` to table selection ([#​959](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/959)) - Thanks [@​chunshao90](https://togithub.com/chunshao90) - Support `UNNEST` as a table factor for PostgreSQL ([#​968](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/968)) [@​hexedpackets](https://togithub.com/hexedpackets) - Support MySQL `UNIQUE KEY` syntax ([#​962](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/962)) - Thanks [@​artorias1024](https://togithub.com/artorias1024) - Support` `GROUP BY ALL\` ([#​964](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/964)) - [@​berkaysynnada](https://togithub.com/berkaysynnada) - Support multiple actions in one ALTER TABLE statement ([#​960](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/960)) - Thanks [@​ForbesLindesay](https://togithub.com/ForbesLindesay) - Add `--sqlite param` to CLI ([#​956](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/956)) - Thanks [@​ddol](https://togithub.com/ddol) ##### Fixed - Fix Rust 1.72 clippy lints ([#​957](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/957)) - Thanks [@​alamb](https://togithub.com/alamb) ##### Changed - Add missing token loc in parse err msg ([#​965](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/965)) - Thanks [@​ding-young](https://togithub.com/ding-young) - Change how `ANY` and `ALL` expressions are represented in AST ([#​963](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/963)) - Thanks [@​SeanTroyUWO](https://togithub.com/SeanTroyUWO) - Show location info in parse errors ([#​958](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/958)) - Thanks [@​MartinNowak](https://togithub.com/MartinNowak) - Update release documentation ([#​954](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/954)) - Thanks [@​alamb](https://togithub.com/alamb) - Break test and coverage test into separate jobs ([#​949](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/949)) - Thanks [@​alamb](https://togithub.com/alamb) ### [`v0.37.0`](https://togithub.com/sqlparser-rs/sqlparser-rs/blob/HEAD/CHANGELOG.md#0370-2023-08-22) [Compare Source](https://togithub.com/sqlparser-rs/sqlparser-rs/compare/v0.36.1...v0.37.0) ##### Added - Support `FOR SYSTEM_TIME AS OF` table time travel clause support, `visit_table_factor` to Visitor ([#​951](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/951)) - Thanks [@​gruuya](https://togithub.com/gruuya) - Support MySQL `auto_increment` offset in table definition ([#​950](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/950)) - Thanks [@​ehoeve](https://togithub.com/ehoeve) - Test for mssql table name in square brackets ([#​952](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/952)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Support additional Postgres `CREATE INDEX` syntax ([#​943](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/943)) - Thanks [@​ForbesLindesay](https://togithub.com/ForbesLindesay) - Support `ALTER ROLE` syntax of PostgreSQL and MS SQL Server ([#​942](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/942)) - Thanks [@​r4ntix](https://togithub.com/r4ntix) - Support table-level comments ([#​946](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/946)) - Thanks [@​ehoeve](https://togithub.com/ehoeve) - Support `DROP TEMPORARY TABLE`, MySQL syntax ([#​916](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/916)) - Thanks [@​liadgiladi](https://togithub.com/liadgiladi) - Support posgres type alias ([#​933](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/933)) - Thanks [@​Kikkon](https://togithub.com/Kikkon) ##### Fixed - Clarify the value of the special flag ([#​948](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/948)) - Thanks [@​alamb](https://togithub.com/alamb) - Fix `SUBSTRING` from/to argument construction for mssql ([#​947](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/947)) - Thanks [@​jmaness](https://togithub.com/jmaness) - Fix: use Rust idiomatic capitalization for newly added DataType enums ([#​939](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/939)) - Thanks [@​Kikkon](https://togithub.com/Kikkon) - Fix `BEGIN TRANSACTION` being serialized as `START TRANSACTION` ([#​935](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/935)) - Thanks [@​lovasoa](https://togithub.com/lovasoa) - Fix parsing of datetime functions without parenthesis ([#​930](https://togithub.com/sqlparser-rs/sqlparser-rs/issues/930)) - Thanks [@​lovasoa](https://togithub.com/lovasoa)
--- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). --------- Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> Co-authored-by: Benjamin Naecker --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- oximeter/db/src/sql/mod.rs | 13 ++++++++++--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59ea2919a2..e754c2d594 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8180,9 +8180,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.36.1" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eaa1e88e78d2c2460d78b7dc3f0c08dbb606ab4222f9aff36f420d36e307d87" +checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" dependencies = [ "log", "sqlparser_derive", @@ -8190,13 +8190,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.1.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.46", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e81817310e..650fd4a0b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -356,7 +356,7 @@ sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } -sqlparser = { version = "0.36.1", features = [ "visitor" ] } +sqlparser = { version = "0.41.0", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific diff --git a/oximeter/db/src/sql/mod.rs b/oximeter/db/src/sql/mod.rs index 1f84e208d2..5d9685d19f 100644 --- a/oximeter/db/src/sql/mod.rs +++ b/oximeter/db/src/sql/mod.rs @@ -40,6 +40,7 @@ use sqlparser::ast::BinaryOperator; use sqlparser::ast::Cte; use sqlparser::ast::Distinct; use sqlparser::ast::Expr; +use sqlparser::ast::GroupByExpr; use sqlparser::ast::Ident; use sqlparser::ast::Join; use sqlparser::ast::JoinConstraint; @@ -554,7 +555,7 @@ impl RestrictedQuery { from: vec![cte_from], lateral_views: vec![], selection: None, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -601,9 +602,11 @@ impl RestrictedQuery { body: Box::new(SetExpr::Select(Box::new(select))), order_by: vec![], limit: None, + limit_by: vec![], offset: None, fetch: None, locks: vec![], + for_clause: None, }) } @@ -633,6 +636,8 @@ impl RestrictedQuery { alias: None, args: None, with_hints: vec![], + version: None, + partitions: vec![], }, joins: vec![], }; @@ -678,7 +683,7 @@ impl RestrictedQuery { from: vec![from], lateral_views: vec![], selection, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -714,6 +719,8 @@ impl RestrictedQuery { alias: None, args: None, with_hints: vec![], + version: None, + partitions: vec![], }, joins: vec![], }; @@ -746,7 +753,7 @@ impl RestrictedQuery { from: vec![from], lateral_views: vec![], selection, - group_by: vec![], + group_by: GroupByExpr::Expressions(vec![]), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], From c998469d58d6e2b33bc14be1c11d4a6614c42aa9 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 05:29:10 +0000 Subject: [PATCH 08/91] Update taiki-e/install-action digest to 242f1c0 (#4846) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`e7dd06a` -> `242f1c0`](https://togithub.com/taiki-e/install-action/compare/e7dd06a...242f1c0) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 9dd17c985d..3dade2e190 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@e7dd06a5731075458d8bbd3465396374ad0d20cb # v2 + uses: taiki-e/install-action@242f1c0c1a882c44e7d32b89af9f2a0bced36540 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 1ae97e49d8698974989fe23ba5e5a8c421a24949 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 19 Jan 2024 12:45:51 -0500 Subject: [PATCH 09/91] Package omdb into switch zone (#4840) --- package-manifest.toml | 19 ++++++++++++++++--- smf/profile/profile | 2 +- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index 16f8f70c73..7b12583437 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -348,6 +348,7 @@ source.paths = [ { from = "smf/switch_zone_setup/manifest.xml", to = "/var/svc/manifest/site/switch_zone_setup/manifest.xml" }, { from = "smf/switch_zone_setup/switch_zone_setup", to = "/opt/oxide/bin/switch_zone_setup" }, { from = "smf/switch_zone_setup/support_authorized_keys", to = "/opt/oxide/support/authorized_keys" }, + { from = "/opt/ooce/pgsql-13/lib/amd64", to = "/opt/ooce/pgsql-13/lib/amd64" }, ] output.type = "zone" output.intermediate_only = true @@ -566,7 +567,8 @@ source.packages = [ "mg-ddm.tar.gz", "mgd.tar.gz", "switch_zone_setup.tar.gz", - "xcvradm.tar.gz" + "xcvradm.tar.gz", + "omicron-omdb.tar.gz" ] output.type = "zone" @@ -588,7 +590,8 @@ source.packages = [ "mg-ddm.tar.gz", "mgd.tar.gz", "switch_zone_setup.tar.gz", - "sp-sim-stub.tar.gz" + "sp-sim-stub.tar.gz", + "omicron-omdb.tar.gz" ] output.type = "zone" @@ -610,7 +613,8 @@ source.packages = [ "mg-ddm.tar.gz", "mgd.tar.gz", "switch_zone_setup.tar.gz", - "sp-sim-softnpu.tar.gz" + "sp-sim-softnpu.tar.gz", + "omicron-omdb.tar.gz" ] output.type = "zone" @@ -625,3 +629,12 @@ source.paths = [ ] output.type = "zone" output.intermediate_only = true + +[package.omicron-omdb] +service_name = "omdb" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["omdb"] +source.rust.release = true +output.type = "zone" +output.intermediate_only = true diff --git a/smf/profile/profile b/smf/profile/profile index 8f613d4d56..73256cd6fd 100644 --- a/smf/profile/profile +++ b/smf/profile/profile @@ -4,7 +4,7 @@ PATH+=:/opt/ooce/bin case "$HOSTNAME" in oxz_switch) # Add tools like xcvradm, swadm & ddmadm to the PATH by default - PATH+=:/opt/oxide/bin:/opt/oxide/dendrite/bin:/opt/oxide/mg-ddm/bin + PATH+=:/opt/oxide/bin:/opt/oxide/dendrite/bin:/opt/oxide/mg-ddm/bin:/opt/oxide/omdb/bin ;; oxz_cockroachdb*) PATH+=:/opt/oxide/cockroachdb/bin From 294b87853cb07a845b2816711a6c07c89e317c4c Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 19 Jan 2024 12:16:17 -0800 Subject: [PATCH 10/91] [update-common] move unzip_into_tempdir onto the blocking task pool (#4845) Pointed out by John in #4690. --- .../src/artifacts/artifacts_with_plan.rs | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/update-common/src/artifacts/artifacts_with_plan.rs b/update-common/src/artifacts/artifacts_with_plan.rs index 94c7294d48..9b579af29a 100644 --- a/update-common/src/artifacts/artifacts_with_plan.rs +++ b/update-common/src/artifacts/artifacts_with_plan.rs @@ -6,6 +6,7 @@ use super::ExtractedArtifactDataHandle; use super::UpdatePlan; use super::UpdatePlanBuilder; use crate::errors::RepositoryError; +use anyhow::anyhow; use camino_tempfile::Utf8TempDir; use debug_ignore::DebugIgnore; use omicron_common::update::ArtifactHash; @@ -55,10 +56,29 @@ impl ArtifactsWithPlan { log: &Logger, ) -> Result where - T: io::Read + io::Seek, + T: io::Read + io::Seek + Send + 'static, { // Create a temporary directory to hold the extracted TUF repository. - let dir = unzip_into_tempdir(zip_data, log)?; + let dir = { + let log = log.clone(); + tokio::task::spawn_blocking(move || { + // This is an expensive synchronous method, so run it on the + // blocking thread pool. + // + // TODO: at the moment we don't restrict the size of the + // extracted contents or its memory usage, making it + // susceptible to zip bombs and other related attacks. + // https://github.com/zip-rs/zip/issues/228. We need to think + // about this at some point. + unzip_into_tempdir(zip_data, &log) + }) + .await + .map_err(|join_error| { + RepositoryError::Extract( + anyhow!(join_error).context("unzip_into_tempdir panicked"), + ) + })?? + }; // Time is unavailable during initial setup, so ignore expiration. Even // if time were available, we might want to be able to load older From 823f90c0edd0407511372eb846976a157421cecb Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 19 Jan 2024 12:16:48 -0800 Subject: [PATCH 11/91] Update Rust crate tempfile to 3.9 (#4848) --- Cargo.lock | 45 +++++++++++++++------------------------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 28 ++++++++++++------------ 3 files changed, 32 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e754c2d594..8c85d4c5cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -326,7 +326,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc7b2dbe9169059af0f821e811180fddc971fc210c776c133c7819ccd6e478db" dependencies = [ - "rustix 0.38.25", + "rustix 0.38.30", "tempfile", "windows-sys 0.52.0", ] @@ -2145,23 +2145,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" -dependencies = [ - "errno-dragonfly", - "libc", - "windows-sys 0.48.0", -] - -[[package]] -name = "errno-dragonfly" -version = "0.1.2" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "cc", "libc", + "windows-sys 0.52.0", ] [[package]] @@ -2216,7 +2205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" dependencies = [ "cfg-if", - "rustix 0.38.25", + "rustix 0.38.30", "windows-sys 0.48.0", ] @@ -3513,7 +3502,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.2", - "rustix 0.38.25", + "rustix 0.38.30", "windows-sys 0.48.0", ] @@ -3766,9 +3755,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "linux-raw-sys" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lock_api" @@ -5183,7 +5172,7 @@ dependencies = [ "regex-syntax 0.8.2", "reqwest", "ring 0.17.7", - "rustix 0.38.25", + "rustix 0.38.30", "schemars", "semver 1.0.21", "serde", @@ -7100,15 +7089,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.25" +version = "0.38.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" +checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" dependencies = [ "bitflags 2.4.0", "errno", "libc", - "linux-raw-sys 0.4.11", - "windows-sys 0.48.0", + "linux-raw-sys 0.4.13", + "windows-sys 0.52.0", ] [[package]] @@ -8530,15 +8519,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.1" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand", "redox_syscall 0.4.1", - "rustix 0.38.25", - "windows-sys 0.48.0", + "rustix 0.38.30", + "windows-sys 0.52.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 650fd4a0b9..54db531d06 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -371,7 +371,7 @@ syn = { version = "2.0" } tabled = "0.14" tar = "0.4" tempdir = "0.3" -tempfile = "3.8" +tempfile = "3.9" term = "0.7" termios = "0.3" textwrap = "0.16.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 214b57cdc5..b574a292d1 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -226,57 +226,57 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -errno = { version = "0.3.2", default-features = false, features = ["std"] } +errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -errno = { version = "0.3.2", default-features = false, features = ["std"] } +errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.aarch64-apple-darwin.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -errno = { version = "0.3.2", default-features = false, features = ["std"] } +errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.aarch64-apple-darwin.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -errno = { version = "0.3.2", default-features = false, features = ["std"] } +errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -errno = { version = "0.3.2", default-features = false, features = ["std"] } +errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } [target.x86_64-unknown-illumos.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } -errno = { version = "0.3.2", default-features = false, features = ["std"] } +errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } -rustix = { version = "0.38.25", features = ["fs", "termios"] } +rustix = { version = "0.38.30", features = ["fs", "termios"] } toml_datetime = { version = "0.6.5", default-features = false, features = ["serde"] } toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", features = ["serde"] } From 4646d9ae8e9998a35327ec5411b5136e26c0e8f5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 20 Jan 2024 01:22:57 +0000 Subject: [PATCH 12/91] Bump h2 from 0.3.21 to 0.3.24 (#4850) --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8c85d4c5cb..5211ecab78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2707,9 +2707,9 @@ checksum = "92620684d99f750bae383ecb3be3748142d6095760afd5cbcf2261e9a279d780" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -2717,7 +2717,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.11", - "indexmap 1.9.3", + "indexmap 2.1.0", "slab", "tokio", "tokio-util", From d8bbf6d20e3d2472f7f1e60b5b0fb591c7a61572 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 20 Jan 2024 05:25:37 +0000 Subject: [PATCH 13/91] Update taiki-e/install-action digest to bd4f144 (#4859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`242f1c0` -> `bd4f144`](https://togithub.com/taiki-e/install-action/compare/242f1c0...bd4f144) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 3dade2e190..a4ebc74843 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@242f1c0c1a882c44e7d32b89af9f2a0bced36540 # v2 + uses: taiki-e/install-action@bd4f14420660e33ca2929e5c0306a8367173c1ee # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 205382f7ee151f09a5c6c11ed4ae73b14f0d64b3 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Sat, 20 Jan 2024 01:30:16 -0600 Subject: [PATCH 14/91] [nexus] Endpoint to list IP pools for silo, add `is_default` to silo-scoped IP pools list (#4843) Fixes #4752 Fixes #4763 The main trick here is introducing `views::SiloIpPool`, which is the same as `views::IpPool` except it also has `is_default` on it. It only makes sense in the context of a particular silo because `is_default` is only defined for a (pool, silo) pair, not for a pool alone. - [x] Add `GET /v1/system/silos/{silo}/ip-pools` - [x] `/v1/ip-pools` and `/v1/ip-pools/{pool}` should return `SiloIpPool` too - [x] Tests for `/v1/system/silos/{silo}/ip-pools` - [x] We can't have both `SiloIpPool` and `IpPoolSilo`, cleaned up by changing: - `views::IpPoolSilo` -> `views::SiloIpSiloLink` - `params::IpPoolSiloLink` -> `views::IpPoolLinkSilo` --- end-to-end-tests/src/bin/bootstrap.rs | 4 +- nexus/db-model/src/ip_pool.rs | 2 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 86 ++++----- nexus/src/app/ip_pool.rs | 47 ++++- nexus/src/external_api/http_entrypoints.rs | 81 ++++++-- nexus/test-utils/src/resource_helpers.rs | 4 +- nexus/tests/integration_tests/endpoints.rs | 14 +- nexus/tests/integration_tests/instances.rs | 6 +- nexus/tests/integration_tests/ip_pools.rs | 121 ++++++++---- nexus/tests/output/nexus_tags.txt | 1 + nexus/types/src/external_api/params.rs | 2 +- nexus/types/src/external_api/views.rs | 14 +- openapi/nexus.json | 186 ++++++++++++++++--- 13 files changed, 431 insertions(+), 137 deletions(-) diff --git a/end-to-end-tests/src/bin/bootstrap.rs b/end-to-end-tests/src/bin/bootstrap.rs index 21e59647ae..b02bed4265 100644 --- a/end-to-end-tests/src/bin/bootstrap.rs +++ b/end-to-end-tests/src/bin/bootstrap.rs @@ -4,7 +4,7 @@ use end_to_end_tests::helpers::{generate_name, get_system_ip_pool}; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ ByteCount, DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, - DiskCreate, DiskSource, IpPoolCreate, IpPoolSiloLink, IpRange, Ipv4Range, + DiskCreate, DiskSource, IpPoolCreate, IpPoolLinkSilo, IpRange, Ipv4Range, NameOrId, SiloQuotasUpdate, }; use oxide_client::{ @@ -51,7 +51,7 @@ async fn main() -> Result<()> { client .ip_pool_silo_link() .pool(pool_name) - .body(IpPoolSiloLink { + .body(IpPoolLinkSilo { silo: NameOrId::Name(params.silo_name().parse().unwrap()), is_default: true, }) diff --git a/nexus/db-model/src/ip_pool.rs b/nexus/db-model/src/ip_pool.rs index bec1113151..030d052c22 100644 --- a/nexus/db-model/src/ip_pool.rs +++ b/nexus/db-model/src/ip_pool.rs @@ -97,7 +97,7 @@ pub struct IpPoolResource { pub is_default: bool, } -impl From for views::IpPoolSilo { +impl From for views::IpPoolSiloLink { fn from(assoc: IpPoolResource) -> Self { Self { ip_pool_id: assoc.ip_pool_id, diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index c9fdb5f0ee..331126ef97 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -79,47 +79,6 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// List IP pools linked to the current silo - pub async fn silo_ip_pools_list( - &self, - opctx: &OpContext, - pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { - use db::schema::ip_pool; - use db::schema::ip_pool_resource; - - // From the developer user's point of view, we treat IP pools linked to - // their silo as silo resources, so they can list them if they can list - // silo children - let authz_silo = - opctx.authn.silo_required().internal_context("listing IP pools")?; - opctx.authorize(authz::Action::ListChildren, &authz_silo).await?; - - let silo_id = authz_silo.id(); - - match pagparams { - PaginatedBy::Id(pagparams) => { - paginated(ip_pool::table, ip_pool::id, pagparams) - } - PaginatedBy::Name(pagparams) => paginated( - ip_pool::table, - ip_pool::name, - &pagparams.map_name(|n| Name::ref_cast(n)), - ), - } - .inner_join(ip_pool_resource::table) - .filter( - ip_pool_resource::resource_type - .eq(IpPoolResourceType::Silo) - .and(ip_pool_resource::resource_id.eq(silo_id)), - ) - .filter(ip_pool::time_deleted.is_null()) - .select(db::model::IpPool::as_select()) - .get_results_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - /// Look up whether the given pool is available to users in the current /// silo, i.e., whether there is an entry in the association table linking /// the pool with that silo @@ -400,6 +359,37 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Returns (IpPool, IpPoolResource) so we can know in the calling code + /// whether the pool is default for the silo + pub async fn silo_ip_pool_list( + &self, + opctx: &OpContext, + authz_silo: &authz::Silo, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec<(IpPool, IpPoolResource)> { + use db::schema::ip_pool; + use db::schema::ip_pool_resource; + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(ip_pool::table, ip_pool::id, pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + ip_pool::table, + ip_pool::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .inner_join(ip_pool_resource::table) + .filter(ip_pool_resource::resource_id.eq(authz_silo.id())) + .filter(ip_pool_resource::resource_type.eq(IpPoolResourceType::Silo)) + .filter(ip_pool::time_deleted.is_null()) + .select(<(IpPool, IpPoolResource)>::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + pub async fn ip_pool_link_silo( &self, opctx: &OpContext, @@ -867,8 +857,11 @@ mod test { .await .expect("Should list IP pools"); assert_eq!(all_pools.len(), 0); + + let authz_silo = opctx.authn.silo_required().unwrap(); + let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 0); @@ -893,7 +886,7 @@ mod test { .expect("Should list IP pools"); assert_eq!(all_pools.len(), 1); let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 0); @@ -929,11 +922,12 @@ mod test { // now it shows up in the silo list let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 1); - assert_eq!(silo_pools[0].id(), pool1_for_silo.id()); + assert_eq!(silo_pools[0].0.id(), pool1_for_silo.id()); + assert_eq!(silo_pools[0].1.is_default, false); // linking an already linked silo errors due to PK conflict let err = datastore @@ -998,7 +992,7 @@ mod test { // and silo pools list is empty again let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 0); diff --git a/nexus/src/app/ip_pool.rs b/nexus/src/app/ip_pool.rs index 1d9b3e515e..d8d36fff4b 100644 --- a/nexus/src/app/ip_pool.rs +++ b/nexus/src/app/ip_pool.rs @@ -20,6 +20,7 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; @@ -74,12 +75,20 @@ impl super::Nexus { } /// List IP pools in current silo - pub(crate) async fn silo_ip_pools_list( + pub(crate) async fn current_silo_ip_pool_list( &self, opctx: &OpContext, pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { - self.db_datastore.silo_ip_pools_list(opctx, pagparams).await + ) -> ListResultVec<(db::model::IpPool, db::model::IpPoolResource)> { + let authz_silo = + opctx.authn.silo_required().internal_context("listing IP pools")?; + + // From the developer user's point of view, we treat IP pools linked to + // their silo as silo resources, so they can list them if they can list + // silo children + opctx.authorize(authz::Action::ListChildren, &authz_silo).await?; + + self.db_datastore.silo_ip_pool_list(opctx, &authz_silo, pagparams).await } // Look up pool by name or ID, but only return it if it's linked to the @@ -88,19 +97,19 @@ impl super::Nexus { &'a self, opctx: &'a OpContext, pool: &'a NameOrId, - ) -> LookupResult { + ) -> LookupResult<(db::model::IpPool, db::model::IpPoolResource)> { let (authz_pool, pool) = self.ip_pool_lookup(opctx, pool)?.fetch().await?; // 404 if no link is found in the current silo let link = self.db_datastore.ip_pool_fetch_link(opctx, pool.id()).await; - if link.is_err() { - return Err(authz_pool.not_found()); + match link { + Ok(link) => Ok((pool, link)), + Err(_) => Err(authz_pool.not_found()), } - - Ok(pool) } + /// List silos for a given pool pub(crate) async fn ip_pool_silo_list( &self, opctx: &OpContext, @@ -109,14 +118,34 @@ impl super::Nexus { ) -> ListResultVec { let (.., authz_pool) = pool_lookup.lookup_for(authz::Action::ListChildren).await?; + + // check ability to list silos in general + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + self.db_datastore.ip_pool_silo_list(opctx, &authz_pool, pagparams).await } + // List pools for a given silo + pub(crate) async fn silo_ip_pool_list( + &self, + opctx: &OpContext, + silo_lookup: &lookup::Silo<'_>, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec<(db::model::IpPool, db::model::IpPoolResource)> { + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Read).await?; + // check ability to list pools in general + opctx + .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) + .await?; + self.db_datastore.silo_ip_pool_list(opctx, &authz_silo, pagparams).await + } + pub(crate) async fn ip_pool_link_silo( &self, opctx: &OpContext, pool_lookup: &lookup::IpPool<'_>, - silo_link: ¶ms::IpPoolSiloLink, + silo_link: ¶ms::IpPoolLinkSilo, ) -> CreateResult { let (authz_pool,) = pool_lookup.lookup_for(authz::Action::Modify).await?; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 21acb45ed3..65b03a9fdf 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -279,6 +279,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(silo_delete)?; api.register(silo_policy_view)?; api.register(silo_policy_update)?; + api.register(silo_ip_pool_list)?; api.register(silo_utilization_view)?; api.register(silo_utilization_list)?; @@ -741,7 +742,7 @@ async fn silo_create( /// Fetch a silo /// -/// Fetch a silo by name. +/// Fetch a silo by name or ID. #[endpoint { method = GET, path = "/v1/system/silos/{silo}", @@ -763,6 +764,48 @@ async fn silo_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// List IP pools available within silo +#[endpoint { + method = GET, + path = "/v1/system/silos/{silo}/ip-pools", + tags = ["system/silos"], +}] +async fn silo_ip_pool_list( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let pools = nexus + .silo_ip_pool_list(&opctx, &silo_lookup, &paginated_by) + .await? + .iter() + .map(|(pool, silo_link)| views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + }) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + pools, + &marker_for_name_or_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// Delete a silo /// /// Delete a silo by name. @@ -1302,7 +1345,7 @@ async fn project_policy_update( async fn project_ip_pool_list( rqctx: RequestContext>, query_params: Query, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -1312,10 +1355,13 @@ async fn project_ip_pool_list( let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let pools = nexus - .silo_ip_pools_list(&opctx, &paginated_by) + .current_silo_ip_pool_list(&opctx, &paginated_by) .await? .into_iter() - .map(IpPool::from) + .map(|(pool, silo_link)| views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + }) .collect(); Ok(HttpResponseOk(ScanByNameOrId::results_page( &query, @@ -1335,14 +1381,18 @@ async fn project_ip_pool_list( async fn project_ip_pool_view( rqctx: RequestContext>, path_params: Path, -) -> Result, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let nexus = &apictx.nexus; let pool_selector = path_params.into_inner().pool; - let pool = nexus.silo_ip_pool_fetch(&opctx, &pool_selector).await?; - Ok(HttpResponseOk(IpPool::from(pool))) + let (pool, silo_link) = + nexus.silo_ip_pool_fetch(&opctx, &pool_selector).await?; + Ok(HttpResponseOk(views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + })) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } @@ -1489,7 +1539,14 @@ async fn ip_pool_silo_list( // option would be to paginate by a composite key representing the (pool, // resource_type, resource) query_params: Query, -) -> Result>, HttpError> { + // TODO: this could just list views::Silo -- it's not like knowing silo_id + // and nothing else is particularly useful -- except we also want to say + // whether the pool is marked default on each silo. So one option would + // be to do the same as we did with SiloIpPool -- include is_default on + // whatever the thing is. Still... all we'd have to do to make this usable + // in both places would be to make it { ...IpPool, silo_id, silo_name, + // is_default } +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; @@ -1511,7 +1568,7 @@ async fn ip_pool_silo_list( Ok(HttpResponseOk(ScanById::results_page( &query, assocs, - &|_, x: &views::IpPoolSilo| x.silo_id, + &|_, x: &views::IpPoolSiloLink| x.silo_id, )?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await @@ -1526,8 +1583,8 @@ async fn ip_pool_silo_list( async fn ip_pool_silo_link( rqctx: RequestContext>, path_params: Path, - resource_assoc: TypedBody, -) -> Result, HttpError> { + resource_assoc: TypedBody, +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; @@ -1581,7 +1638,7 @@ async fn ip_pool_silo_update( rqctx: RequestContext>, path_params: Path, update: TypedBody, -) -> Result, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index c2516a1509..4fe03f204c 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -246,9 +246,9 @@ pub async fn link_ip_pool( is_default: bool, ) { let link = - params::IpPoolSiloLink { silo: NameOrId::Id(*silo_id), is_default }; + params::IpPoolLinkSilo { silo: NameOrId::Id(*silo_id), is_default }; let url = format!("/v1/system/ip-pools/{pool_name}/silos"); - object_create::( + object_create::( client, &url, &link, ) .await; diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 11bfa34c5f..8beffe43a5 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -80,6 +80,8 @@ pub static DEMO_SILO_NAME: Lazy = Lazy::new(|| "demo-silo".parse().unwrap()); pub static DEMO_SILO_URL: Lazy = Lazy::new(|| format!("/v1/system/silos/{}", *DEMO_SILO_NAME)); +pub static DEMO_SILO_IP_POOLS_URL: Lazy = + Lazy::new(|| format!("{}/ip-pools", *DEMO_SILO_URL)); pub static DEMO_SILO_POLICY_URL: Lazy = Lazy::new(|| format!("/v1/system/silos/{}/policy", *DEMO_SILO_NAME)); pub static DEMO_SILO_QUOTAS_URL: Lazy = @@ -627,8 +629,8 @@ pub static DEMO_IP_POOL_UPDATE: Lazy = }); pub static DEMO_IP_POOL_SILOS_URL: Lazy = Lazy::new(|| format!("{}/silos", *DEMO_IP_POOL_URL)); -pub static DEMO_IP_POOL_SILOS_BODY: Lazy = - Lazy::new(|| params::IpPoolSiloLink { +pub static DEMO_IP_POOL_SILOS_BODY: Lazy = + Lazy::new(|| params::IpPoolLinkSilo { silo: NameOrId::Id(DEFAULT_SILO.identity().id), is_default: true, // necessary for demo instance create to go through }); @@ -1110,6 +1112,14 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Delete, ], }, + VerifyEndpoint { + url: &DEMO_SILO_IP_POOLS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get, + ], + }, VerifyEndpoint { url: &DEMO_SILO_POLICY_URL, visibility: Visibility::Protected, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 044f87f7c1..2f4e913185 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3657,7 +3657,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( ); // make pool2 default and create instance with default pool. check that it now it comes from pool2 - let _: views::IpPoolSilo = object_put( + let _: views::IpPoolSiloLink = object_put( client, &format!("/v1/system/ip-pools/pool2/silos/{}", DEFAULT_SILO.id()), ¶ms::IpPoolSiloUpdate { is_default: true }, @@ -3788,11 +3788,11 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( // associate the pool with a different silo and we should get the same // error on instance create - let params = params::IpPoolSiloLink { + let params = params::IpPoolLinkSilo { silo: NameOrId::Name(cptestctx.silo_name.clone()), is_default: false, }; - let _: views::IpPoolSilo = + let _: views::IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/orphan-pool/silos", ¶ms) .await; diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index d97eda9a0b..7843e816fd 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -31,7 +31,7 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::params::IpPoolCreate; -use nexus_types::external_api::params::IpPoolSiloLink; +use nexus_types::external_api::params::IpPoolLinkSilo; use nexus_types::external_api::params::IpPoolSiloUpdate; use nexus_types::external_api::params::IpPoolUpdate; use nexus_types::external_api::shared::IpRange; @@ -40,8 +40,9 @@ use nexus_types::external_api::shared::Ipv6Range; use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::views::IpPool; use nexus_types::external_api::views::IpPoolRange; -use nexus_types::external_api::views::IpPoolSilo; +use nexus_types::external_api::views::IpPoolSiloLink; use nexus_types::external_api::views::Silo; +use nexus_types::external_api::views::SiloIpPool; use nexus_types::identity::Resource; use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::NameOrId; @@ -261,6 +262,19 @@ async fn test_ip_pool_list_dedupe(cptestctx: &ControlPlaneTestContext) { assert_eq!(ip_pools.len(), 2); assert_eq!(ip_pools[0].identity.id, pool1.id()); assert_eq!(ip_pools[1].identity.id, pool2.id()); + + let silo1_pools = pools_for_silo(client, "silo1").await; + assert_eq!(silo1_pools.len(), 2); + assert_eq!(silo1_pools[0].id(), pool1.id()); + assert_eq!(silo1_pools[1].id(), pool2.id()); + + let silo2_pools = pools_for_silo(client, "silo2").await; + assert_eq!(silo2_pools.len(), 1); + assert_eq!(silo2_pools[0].identity.name, "pool1"); + + let silo3_pools = pools_for_silo(client, "silo3").await; + assert_eq!(silo3_pools.len(), 1); + assert_eq!(silo3_pools[0].identity.name, "pool1"); } /// The internal IP pool, defined by its association with the internal silo, @@ -332,7 +346,7 @@ async fn test_ip_pool_service_no_cud(cptestctx: &ControlPlaneTestContext) { // linking not allowed - // let link_body = params::IpPoolSiloLink { + // let link_body = params::IpPoolLinkSilo { // silo: NameOrId::Name(cptestctx.silo_name.clone()), // is_default: false, // }; @@ -360,9 +374,13 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { let assocs_p0 = silos_for_pool(client, "p0").await; assert_eq!(assocs_p0.items.len(), 0); + let silo_name = cptestctx.silo_name.as_str(); + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 0); + // expect 404 on association if the specified silo doesn't exist let nonexistent_silo_id = Uuid::new_v4(); - let params = params::IpPoolSiloLink { + let params = params::IpPoolLinkSilo { silo: NameOrId::Id(nonexistent_silo_id), is_default: false, }; @@ -374,17 +392,20 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { StatusCode::NOT_FOUND, ) .await; + let not_found = + format!("not found: silo with id \"{nonexistent_silo_id}\""); + assert_eq!(error.message, not_found); - assert_eq!( - error.message, - format!("not found: silo with id \"{nonexistent_silo_id}\"") - ); + // pools for silo also 404s on nonexistent silo + let url = format!("/v1/system/silos/{}/ip-pools", nonexistent_silo_id); + let error = object_get_error(client, &url, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found); // associate by name with silo that exists let silo = NameOrId::Name(cptestctx.silo_name.clone()); let params = - params::IpPoolSiloLink { silo: silo.clone(), is_default: false }; - let _: IpPoolSilo = + params::IpPoolLinkSilo { silo: silo.clone(), is_default: false }; + let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; // second attempt to create the same link errors due to conflict @@ -402,26 +423,45 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { let silo_id = object_get::(client, &silo_url).await.identity.id; let assocs_p0 = silos_for_pool(client, "p0").await; - let silo_link = - IpPoolSilo { ip_pool_id: p0.identity.id, silo_id, is_default: false }; + let silo_link = IpPoolSiloLink { + ip_pool_id: p0.identity.id, + silo_id, + is_default: false, + }; assert_eq!(assocs_p0.items.len(), 1); assert_eq!(assocs_p0.items[0], silo_link); + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 1); + assert_eq!(silo_pools[0].identity.id, p0.identity.id); + assert_eq!(silo_pools[0].is_default, false); + // associate same silo to other pool by ID instead of name - let link_params = params::IpPoolSiloLink { + let link_params = params::IpPoolLinkSilo { silo: NameOrId::Id(silo_id), is_default: true, }; let url = "/v1/system/ip-pools/p1/silos"; - let _: IpPoolSilo = object_create(client, &url, &link_params).await; + let _: IpPoolSiloLink = object_create(client, &url, &link_params).await; let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 1); assert_eq!( silos_p1.items[0], - IpPoolSilo { ip_pool_id: p1.identity.id, is_default: true, silo_id } + IpPoolSiloLink { + ip_pool_id: p1.identity.id, + is_default: true, + silo_id + } ); + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 2); + assert_eq!(silo_pools[0].id(), p0.id()); + assert_eq!(silo_pools[0].is_default, false); + assert_eq!(silo_pools[1].id(), p1.id()); + assert_eq!(silo_pools[1].is_default, true); + // creating a third pool and trying to link it as default: true should fail create_pool(client, "p2").await; let url = "/v1/system/ip-pools/p2/silos"; @@ -446,13 +486,19 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { "IP Pool cannot be deleted while it is linked to a silo", ); - // unlink silo (doesn't matter that it's a default) + // unlink p1 from silo (doesn't matter that it's a default) let url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); object_delete(client, &url).await; let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 0); + // after unlinking p1, only p0 is left + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 1); + assert_eq!(silo_pools[0].identity.id, p0.identity.id); + assert_eq!(silo_pools[0].is_default, false); + // now we can delete the pool too object_delete(client, "/v1/system/ip-pools/p1").await; } @@ -486,10 +532,10 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { // associate both pools with the test silo let silo = NameOrId::Name(cptestctx.silo_name.clone()); let params = - params::IpPoolSiloLink { silo: silo.clone(), is_default: false }; - let _: IpPoolSilo = + params::IpPoolLinkSilo { silo: silo.clone(), is_default: false }; + let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; - let _: IpPoolSilo = + let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p1/silos", ¶ms).await; // now both are linked to the silo, neither is marked default @@ -503,10 +549,10 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { // make p0 default let params = IpPoolSiloUpdate { is_default: true }; - let _: IpPoolSilo = object_put(client, &p0_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p0_silo_url, ¶ms).await; // making the same one default again is not an error - let _: IpPoolSilo = object_put(client, &p0_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p0_silo_url, ¶ms).await; // now p0 is default let silos_p0 = silos_for_pool(client, "p0").await; @@ -524,7 +570,7 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { let params = IpPoolSiloUpdate { is_default: true }; let p1_silo_url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); - let _: IpPoolSilo = object_put(client, &p1_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p1_silo_url, ¶ms).await; // p1 is now default let silos_p1 = silos_for_pool(client, "p1").await; @@ -538,7 +584,7 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { // we can also unset default let params = IpPoolSiloUpdate { is_default: false }; - let _: IpPoolSilo = object_put(client, &p1_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p1_silo_url, ¶ms).await; let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 1); @@ -589,10 +635,18 @@ fn get_names(pools: Vec) -> Vec { async fn silos_for_pool( client: &ClientTestContext, - id: &str, -) -> ResultsPage { - let url = format!("/v1/system/ip-pools/{}/silos", id); - objects_list_page_authz::(client, &url).await + pool: &str, +) -> ResultsPage { + let url = format!("/v1/system/ip-pools/{}/silos", pool); + objects_list_page_authz::(client, &url).await +} + +async fn pools_for_silo( + client: &ClientTestContext, + silo: &str, +) -> Vec { + let url = format!("/v1/system/silos/{}/ip-pools", silo); + objects_list_page_authz::(client, &url).await.items } async fn create_pool(client: &ClientTestContext, name: &str) -> IpPool { @@ -933,17 +987,20 @@ async fn test_ip_pool_list_in_silo(cptestctx: &ControlPlaneTestContext) { ); create_ip_pool(client, otherpool_name, Some(otherpool_range)).await; - let list = - objects_list_page_authz::(client, "/v1/ip-pools").await.items; + let list = objects_list_page_authz::(client, "/v1/ip-pools") + .await + .items; // only mypool shows up because it's linked to my silo assert_eq!(list.len(), 1); assert_eq!(list[0].identity.name.to_string(), mypool_name); + assert!(list[0].is_default); // fetch the pool directly too let url = format!("/v1/ip-pools/{}", mypool_name); - let pool: IpPool = object_get(client, &url).await; + let pool = object_get::(client, &url).await; assert_eq!(pool.identity.name.as_str(), mypool_name); + assert!(pool.is_default); // fetching the other pool directly 404s let url = format!("/v1/ip-pools/{}", otherpool_name); @@ -978,13 +1035,13 @@ async fn test_ip_range_delete_with_allocated_external_ip_fails( .await; // associate pool with default silo, which is the privileged user's silo - let params = IpPoolSiloLink { + let params = IpPoolLinkSilo { silo: NameOrId::Id(DEFAULT_SILO.id()), is_default: true, }; NexusRequest::objects_post(client, &ip_pool_silos_url, ¶ms) .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() + .execute_and_parse_unwrap::() .await; // Add an IP range to the pool diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 2d842dd930..bd79a9c3e9 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -185,6 +185,7 @@ saml_identity_provider_view GET /v1/system/identity-providers/ silo_create POST /v1/system/silos silo_delete DELETE /v1/system/silos/{silo} silo_identity_provider_list GET /v1/system/identity-providers +silo_ip_pool_list GET /v1/system/silos/{silo}/ip-pools silo_list GET /v1/system/silos silo_policy_update PUT /v1/system/silos/{silo}/policy silo_policy_view GET /v1/system/silos/{silo}/policy diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a33bc0b8bb..750e83c2a2 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -855,7 +855,7 @@ pub struct IpPoolSiloPath { } #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct IpPoolSiloLink { +pub struct IpPoolLinkSilo { pub silo: NameOrId, /// When a pool is the default for a silo, floating IPs and instance /// ephemeral IPs will come from that pool when no other pool is specified. diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index cf312d3b82..314dd4ed00 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -303,10 +303,22 @@ pub struct IpPool { pub identity: IdentityMetadata, } +/// An IP pool in the context of a silo +#[derive(ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloIpPool { + #[serde(flatten)] + pub identity: IdentityMetadata, + + /// When a pool is the default for a silo, floating IPs and instance + /// ephemeral IPs will come from that pool when no other pool is specified. + /// There can be at most one default for a given silo. + pub is_default: bool, +} + /// A link between an IP pool and a silo that allows one to allocate IPs from /// the pool within the silo #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct IpPoolSilo { +pub struct IpPoolSiloLink { pub ip_pool_id: Uuid, pub silo_id: Uuid, /// When a pool is the default for a silo, floating IPs and instance diff --git a/openapi/nexus.json b/openapi/nexus.json index a4ba6cbb86..2dd4037430 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2191,7 +2191,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolResultsPage" + "$ref": "#/components/schemas/SiloIpPoolResultsPage" } } } @@ -2232,7 +2232,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPool" + "$ref": "#/components/schemas/SiloIpPool" } } } @@ -5039,7 +5039,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSiloResultsPage" + "$ref": "#/components/schemas/IpPoolSiloLinkResultsPage" } } } @@ -5076,7 +5076,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSiloLink" + "$ref": "#/components/schemas/IpPoolLinkSilo" } } }, @@ -5088,7 +5088,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSilo" + "$ref": "#/components/schemas/IpPoolSiloLink" } } } @@ -5144,7 +5144,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSilo" + "$ref": "#/components/schemas/IpPoolSiloLink" } } } @@ -6580,7 +6580,7 @@ "system/silos" ], "summary": "Fetch a silo", - "description": "Fetch a silo by name.", + "description": "Fetch a silo by name or ID.", "operationId": "silo_view", "parameters": [ { @@ -6643,6 +6643,74 @@ } } }, + "/v1/system/silos/{silo}/ip-pools": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "List IP pools available within silo", + "operationId": "silo_ip_pool_list", + "parameters": [ + { + "in": "path", + "name": "silo", + "description": "Name or ID of the silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloIpPoolResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/v1/system/silos/{silo}/policy": { "get": { "tags": [ @@ -12497,6 +12565,22 @@ "name" ] }, + "IpPoolLinkSilo": { + "type": "object", + "properties": { + "is_default": { + "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", + "type": "boolean" + }, + "silo": { + "$ref": "#/components/schemas/NameOrId" + } + }, + "required": [ + "is_default", + "silo" + ] + }, "IpPoolRange": { "type": "object", "properties": { @@ -12565,7 +12649,7 @@ "items" ] }, - "IpPoolSilo": { + "IpPoolSiloLink": { "description": "A link between an IP pool and a silo that allows one to allocate IPs from the pool within the silo", "type": "object", "properties": { @@ -12588,23 +12672,7 @@ "silo_id" ] }, - "IpPoolSiloLink": { - "type": "object", - "properties": { - "is_default": { - "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", - "type": "boolean" - }, - "silo": { - "$ref": "#/components/schemas/NameOrId" - } - }, - "required": [ - "is_default", - "silo" - ] - }, - "IpPoolSiloResultsPage": { + "IpPoolSiloLinkResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -12612,7 +12680,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/IpPoolSilo" + "$ref": "#/components/schemas/IpPoolSiloLink" } }, "next_page": { @@ -13802,6 +13870,72 @@ } ] }, + "SiloIpPool": { + "description": "An IP pool in the context of a silo", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "is_default": { + "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", + "type": "boolean" + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "is_default", + "name", + "time_created", + "time_modified" + ] + }, + "SiloIpPoolResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/SiloIpPool" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "SiloQuotas": { "description": "A collection of resource counts used to set the virtual capacity of a silo", "type": "object", From e5a3caa09ba43eff5f8b6ef7016fbea0856f0911 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 22 Jan 2024 09:23:56 -0800 Subject: [PATCH 15/91] [sled-agent] Encrypt a specific set of U.2 datasets (#4853) This PR does the following: - As a part of processing U.2s during initialization, `ensure_zpool_datasets_are_encrypted` is invoked. This identifies all datasets which should be encrypted (`cockroachdb`, `clickhouse`, `internal_dns`, `external_dns`, `clickhouse_keeper`) and performs a one-way migration from unencrypted to encrypted dataset. - Additionally, during zone launching, the sled agent verifies properties about datasets that it expects should be encrypted. This helps prevent these encrypted dataset from being used before their transfer has finished, and also prevents these zones from ever using unencrypted datasets in the future. - Furthermore, for all new deployments, this PR uses encryption on these datasets by default. --- illumos-utils/src/zfs.rs | 63 +++-- sled-agent/src/services.rs | 51 ++++- sled-agent/src/sled_agent.rs | 2 +- sled-storage/src/dataset.rs | 431 ++++++++++++++++++++++++++++++++++- sled-storage/src/disk.rs | 10 + sled-storage/src/manager.rs | 5 +- 6 files changed, 532 insertions(+), 30 deletions(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index e9554100af..c111955761 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -108,12 +108,13 @@ enum GetValueErrorRaw { MissingValue, } -/// Error returned by [`Zfs::get_oxide_value`]. +/// Error returned by [`Zfs::get_oxide_value`] or [`Zfs::get_value`]. #[derive(thiserror::Error, Debug)] -#[error("Failed to get value '{name}' from filesystem {filesystem}: {err}")] +#[error("Failed to get value '{name}' from filesystem {filesystem}")] pub struct GetValueError { filesystem: String, name: String, + #[source] err: GetValueErrorRaw, } @@ -464,28 +465,13 @@ impl Zfs { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } + /// Calls "zfs get" with a single value pub fn get_value( filesystem_name: &str, name: &str, ) -> Result { - let mut command = std::process::Command::new(PFEXEC); - let cmd = - command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); - let output = execute(cmd).map_err(|err| GetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - err: err.into(), - })?; - let stdout = String::from_utf8_lossy(&output.stdout); - let value = stdout.trim(); - if value == "-" { - return Err(GetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - err: GetValueErrorRaw::MissingValue, - }); - } - Ok(value.to_string()) + let [value] = Self::get_values(filesystem_name, &[name])?; + Ok(value) } /// List all extant snapshots. @@ -549,6 +535,43 @@ impl Zfs { } } +// These methods don't work with mockall, so they exist in a separate impl block +impl Zfs { + /// Calls "zfs get" to acquire multiple values + pub fn get_values( + filesystem_name: &str, + names: &[&str; N], + ) -> Result<[String; N], GetValueError> { + let mut cmd = std::process::Command::new(PFEXEC); + let all_names = + names.into_iter().map(|n| *n).collect::>().join(","); + cmd.args(&[ZFS, "get", "-Ho", "value", &all_names, filesystem_name]); + let output = execute(&mut cmd).map_err(|err| GetValueError { + filesystem: filesystem_name.to_string(), + name: format!("{:?}", names), + err: err.into(), + })?; + let stdout = String::from_utf8_lossy(&output.stdout); + let values = stdout.trim(); + + const EMPTY_STRING: String = String::new(); + let mut result: [String; N] = [EMPTY_STRING; N]; + + for (i, value) in values.lines().enumerate() { + let value = value.trim(); + if value == "-" { + return Err(GetValueError { + filesystem: filesystem_name.to_string(), + name: names[i].to_string(), + err: GetValueErrorRaw::MissingValue, + }); + } + result[i] = value.to_string(); + } + Ok(result) + } +} + /// A read-only snapshot of a ZFS filesystem. #[derive(Clone, Debug)] pub struct Snapshot { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c068515d14..211e602bbf 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -199,6 +199,24 @@ pub enum Error { #[error("Failed to get address: {0}")] GetAddressFailure(#[from] illumos_utils::zone::GetAddressError), + #[error( + "Failed to launch zone {zone} because ZFS value cannot be accessed" + )] + GetZfsValue { + zone: String, + #[source] + source: illumos_utils::zfs::GetValueError, + }, + + #[error("Cannot launch {zone} with {dataset} (saw {prop_name} = {prop_value}, expected {prop_value_expected})")] + DatasetNotReady { + zone: String, + dataset: String, + prop_name: String, + prop_value: String, + prop_value_expected: String, + }, + #[error("NTP zone not ready")] NtpZoneNotReady, @@ -1474,7 +1492,7 @@ impl ServiceManager { ZoneArgs::Omicron(zone_config) => zone_config .zone .dataset_name() - .map(|n| zone::Dataset { name: n.full() }) + .map(|n| zone::Dataset { name: n.full_name() }) .into_iter() .collect(), ZoneArgs::Switch(_) => vec![], @@ -1711,7 +1729,7 @@ impl ServiceManager { dataset.pool_name.clone(), DatasetKind::Crucible, ) - .full(); + .full_name(); let uuid = &Uuid::new_v4().to_string(); let config = PropertyGroupBuilder::new("config") .add_property("datalink", "astring", datalink) @@ -2930,6 +2948,35 @@ impl ServiceManager { // Currently, the zone filesystem should be destroyed between // reboots, so it's fine to make this decision locally. let root = if let Some(dataset) = zone.dataset_name() { + // Check that the dataset is actually ready to be used. + let [zoned, canmount, encryption] = + illumos_utils::zfs::Zfs::get_values( + &dataset.full_name(), + &["zoned", "canmount", "encryption"], + ) + .map_err(|err| Error::GetZfsValue { + zone: zone.zone_name(), + source: err, + })?; + + let check_property = |name, actual, expected| { + if actual != expected { + return Err(Error::DatasetNotReady { + zone: zone.zone_name(), + dataset: dataset.full_name(), + prop_name: String::from(name), + prop_value: actual, + prop_value_expected: String::from(expected), + }); + } + return Ok(()); + }; + check_property("zoned", zoned, "on")?; + check_property("canmount", canmount, "on")?; + if dataset.dataset().dataset_should_be_encrypted() { + check_property("encryption", encryption, "aes-256-gcm")?; + } + // If the zone happens to already manage a dataset, then // we co-locate the zone dataset on the same zpool. // diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d094643cf9..71fe3584f0 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -611,7 +611,7 @@ impl SledAgent { warn!( self.log, "Failed to load services, will retry in {:?}", delay; - "error" => %err, + "error" => ?err, ); }, ) diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index a2878af7f6..41b77ea38b 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -18,7 +18,9 @@ use rand::distributions::{Alphanumeric, DistString}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::DiskVariant; -use slog::{info, Logger}; +use slog::{debug, info, Logger}; +use std::process::Stdio; +use std::str::FromStr; use std::sync::OnceLock; pub const INSTALL_DATASET: &'static str = "install"; @@ -138,16 +140,57 @@ pub enum DatasetKind { InternalDns, } +impl DatasetKind { + pub fn dataset_should_be_encrypted(&self) -> bool { + match self { + // We encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + DatasetKind::Crucible => false, + _ => true, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum DatasetKindParseError { + #[error("Dataset unknown: {0}")] + UnknownDataset(String), +} + +impl FromStr for DatasetKind { + type Err = DatasetKindParseError; + + fn from_str(s: &str) -> Result { + use DatasetKind::*; + let kind = match s { + "crucible" => Crucible, + "cockroachdb" => CockroachDb, + "clickhouse" => Clickhouse, + "clickhouse_keeper" => ClickhouseKeeper, + "external_dns" => ExternalDns, + "internal_dns" => InternalDns, + _ => { + return Err(DatasetKindParseError::UnknownDataset( + s.to_string(), + )) + } + }; + Ok(kind) + } +} + impl std::fmt::Display for DatasetKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use DatasetKind::*; let s = match self { Crucible => "crucible", - CockroachDb { .. } => "cockroachdb", + CockroachDb => "cockroachdb", Clickhouse => "clickhouse", ClickhouseKeeper => "clickhouse_keeper", - ExternalDns { .. } => "external_dns", - InternalDns { .. } => "internal_dns", + ExternalDns => "external_dns", + InternalDns => "internal_dns", }; write!(f, "{}", s) } @@ -176,7 +219,28 @@ impl DatasetName { &self.kind } - pub fn full(&self) -> String { + /// Returns the full name of the dataset, as would be returned from + /// "zfs get" or "zfs list". + /// + /// If this dataset should be encrypted, this automatically adds the + /// "crypt" dataset component. + pub fn full_name(&self) -> String { + // Currently, we encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + if self.kind.dataset_should_be_encrypted() { + self.full_encrypted_name() + } else { + self.full_unencrypted_name() + } + } + + fn full_encrypted_name(&self) -> String { + format!("{}/crypt/{}", self.pool_name, self.kind) + } + + fn full_unencrypted_name(&self) -> String { format!("{}/{}", self.pool_name, self.kind) } } @@ -201,6 +265,8 @@ pub enum DatasetError { #[source] err: Box, }, + #[error("Failed to make datasets encrypted")] + EncryptionMigration(#[from] DatasetEncryptionMigrationError), } /// Ensure that the zpool contains all the datasets we would like it to @@ -364,6 +430,361 @@ pub(crate) async fn ensure_zpool_has_datasets( Ok(()) } +#[derive(Debug, thiserror::Error)] +pub enum DatasetEncryptionMigrationError { + #[error(transparent)] + IoError(#[from] std::io::Error), + + #[error("Failed to run command")] + FailedCommand { command: String, stderr: Option }, + + #[error("Cannot create new encrypted dataset")] + DatasetCreation(#[from] illumos_utils::zfs::EnsureFilesystemError), + + #[error("Missing stdout stream during 'zfs send' command")] + MissingStdoutForZfsSend, +} + +fn status_ok_or_get_stderr( + command: &tokio::process::Command, + output: &std::process::Output, +) -> Result<(), DatasetEncryptionMigrationError> { + if !output.status.success() { + let stdcmd = command.as_std(); + return Err(DatasetEncryptionMigrationError::FailedCommand { + command: format!( + "{:?} {:?}", + stdcmd.get_program(), + stdcmd + .get_args() + .collect::>() + .join(std::ffi::OsStr::new(" ")) + ), + stderr: Some(String::from_utf8_lossy(&output.stderr).to_string()), + }); + } + Ok(()) +} + +/// Migrates unencrypted datasets to their encrypted formats. +pub(crate) async fn ensure_zpool_datasets_are_encrypted( + log: &Logger, + zpool_name: &ZpoolName, +) -> Result<(), DatasetEncryptionMigrationError> { + info!(log, "Looking for unencrypted datasets in {zpool_name}"); + let unencrypted_datasets = + find_all_unencrypted_datasets_directly_within_pool(&log, &zpool_name) + .await?; + + // TODO: Could do this in parallel? + for dataset in unencrypted_datasets { + let log = &log.new(slog::o!("dataset" => dataset.clone())); + info!(log, "Found unencrypted dataset"); + + ensure_zpool_dataset_is_encrypted(&log, &zpool_name, &dataset).await?; + } + Ok(()) +} + +async fn find_all_unencrypted_datasets_directly_within_pool( + log: &Logger, + zpool_name: &ZpoolName, +) -> Result, DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let pool_name = zpool_name.to_string(); + let cmd = command.args(&[ + "list", + "-rHo", + "name,encryption", + "-d", + "1", + &pool_name, + ]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let lines = stdout.trim().split('\n'); + + let mut unencrypted_datasets = vec![]; + for line in lines { + let mut iter = line.split_whitespace(); + let Some(dataset) = iter.next() else { + continue; + }; + let log = log.new(slog::o!("dataset" => dataset.to_string())); + + let Some(encryption) = iter.next() else { + continue; + }; + + // We don't bother checking HOW the dataset is encrypted, just that it + // IS encrypted somehow. The sled agent is slightly more opinionated, as + // it looks for "aes-256-gcm" explicitly, but we currently don't plan on + // providing support for migrating between encryption schemes + // automatically. + let encrypted = match encryption { + "off" | "-" => false, + _ => true, + }; + if encrypted { + debug!(log, "Found dataset, but it is already encrypted"); + continue; + } + debug!(log, "Found dataset, and it isn't encrypted"); + if let Some(dataset) = + dataset.strip_prefix(&format!("{pool_name}/")).map(String::from) + { + unencrypted_datasets.push(dataset); + } + } + Ok(unencrypted_datasets) +} + +// Precondition: +// - We found the dataset as a direct descendant of "zpool_name", which +// has encryption set to "off". +// +// "dataset" does not include the zpool prefix; format!("{zpool_name}/dataset") +// would be the full name of the unencrypted dataset. +async fn ensure_zpool_dataset_is_encrypted( + log: &Logger, + zpool_name: &ZpoolName, + unencrypted_dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let Ok(kind) = DatasetKind::from_str(&unencrypted_dataset) else { + info!(log, "Unrecognized dataset kind"); + return Ok(()); + }; + info!(log, "Dataset recognized"); + let unencrypted_dataset = format!("{zpool_name}/{unencrypted_dataset}"); + + if !kind.dataset_should_be_encrypted() { + info!(log, "Dataset should not be encrypted"); + return Ok(()); + } + info!(log, "Dataset should be encrypted"); + + let encrypted_dataset = DatasetName::new(zpool_name.clone(), kind); + let encrypted_dataset = encrypted_dataset.full_name(); + + let (unencrypted_dataset_exists, encrypted_dataset_exists) = ( + dataset_exists(&unencrypted_dataset).await?, + dataset_exists(&encrypted_dataset).await?, + ); + + match (unencrypted_dataset_exists, encrypted_dataset_exists) { + (false, _) => { + // In this case, there is no unencrypted dataset! Bail out, there is + // nothing to transfer. + return Ok(()); + } + (true, true) => { + // In this case, the following is true: + // - An unencrypted dataset exists + // - An encrypted dataset exists + // + // This is indicative of an incomplete transfer from "old" -> "new". + // If we managed to create the encrypted dataset, and got far enough to + // rename to it's "non-tmp" location, then pick up where we left off: + // - Mark the encrypted dataset as usable + // - Remove the unencrypted dataset + info!( + log, + "Dataset already has encrypted variant, resuming migration" + ); + return finalize_encryption_migration( + &log, + &encrypted_dataset, + &unencrypted_dataset, + ) + .await; + } + (true, false) => { + // This is the "normal" transfer case, initially: We have an + // unencrypted dataset that should become encrypted. + info!(log, "Dataset has not yet been encrypted"); + } + } + + let snapshot_name = |dataset: &str| format!("{dataset}@migration"); + + // A snapshot taken to provide a point-in-time view of the dataset for + // copying. + let unencrypted_dataset_snapshot = snapshot_name(&unencrypted_dataset); + // A "temporary" name for the encrypted target dataset. + let encrypted_dataset_tmp = format!("{}-tmp", encrypted_dataset); + // A snapshot that is automatically generated by "zfs receive". + let encrypted_dataset_tmp_snapshot = snapshot_name(&encrypted_dataset_tmp); + + // Get rid of snapshots and temporary datasets. + // + // This removes work of any prior sled agents that might have failed halfway + // through this operation. + let _ = zfs_destroy(&unencrypted_dataset_snapshot).await; + let _ = zfs_destroy(&encrypted_dataset_tmp).await; + + zfs_create_snapshot(&unencrypted_dataset_snapshot).await?; + info!(log, "Encrypted dataset snapshotted"); + + // Transfer to a "tmp" dataset that's encrypted, but not mountable. + // + // This makes it clear it's a "work-in-progress" dataset until the transfer + // has fully completed. + zfs_transfer_to_unmountable_dataset( + &unencrypted_dataset_snapshot, + &encrypted_dataset_tmp, + ) + .await?; + info!(log, "Dataset transferred to encrypted (temporary) location"); + + zfs_destroy(&unencrypted_dataset_snapshot).await?; + zfs_destroy(&encrypted_dataset_tmp_snapshot).await?; + info!(log, "Removed snapshots"); + + // We tragically cannot "zfs rename" any datasets with "zoned=on". + // + // We perform the rename first, then set "zoned=on" with "canmount=on". + // This prevents the dataset from being used by zones until these properties + // have finally been set. + zfs_rename(&encrypted_dataset_tmp, &encrypted_dataset).await?; + + return finalize_encryption_migration( + &log, + &encrypted_dataset, + &unencrypted_dataset, + ) + .await; +} + +// Returns true if the dataset exists. +async fn dataset_exists( + dataset: &str, +) -> Result { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["list", "-H", dataset]); + Ok(cmd.status().await?.success()) +} + +// Destroys the dataset and all children, recursively. +async fn zfs_destroy( + dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["destroy", "-r", dataset]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +// Creates a snapshot named "dataset_snapshot". +async fn zfs_create_snapshot( + dataset_snapshot: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["snapshot", dataset_snapshot]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +// Uses "zfs send" and "zfs receive" to create an unmountable, unzoned dataset. +// +// These properties are set to allow the caller to continue manipulating the +// dataset (via rename, setting other properties, etc) before it's used. +async fn zfs_transfer_to_unmountable_dataset( + from: &str, + to: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let sender_cmd = command + .args(&["send", from]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + let mut sender = sender_cmd.spawn()?; + + let Some(sender_stdout) = sender.stdout.take() else { + return Err(DatasetEncryptionMigrationError::MissingStdoutForZfsSend); + }; + let sender_stdout: Stdio = sender_stdout.try_into().map_err(|_| { + DatasetEncryptionMigrationError::MissingStdoutForZfsSend + })?; + + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let receiver_cmd = command + .args(&[ + "receive", + "-o", + "mountpoint=/data", + "-o", + "canmount=off", + "-o", + "zoned=off", + to, + ]) + .stdin(sender_stdout) + .stderr(Stdio::piped()); + let receiver = receiver_cmd.spawn()?; + + let output = receiver.wait_with_output().await?; + status_ok_or_get_stderr(&receiver_cmd, &output)?; + let output = sender.wait_with_output().await?; + status_ok_or_get_stderr(&sender_cmd, &output)?; + + Ok(()) +} + +// Sets "properties" on "dataset". +// +// Each member of "properties" should have the form "key=value". +async fn zfs_set( + dataset: &str, + properties: &[&str], +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.arg("set"); + for property in properties { + cmd.arg(property); + } + cmd.arg(dataset); + + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +// Sets properties to make a dataset "ready to be used by zones". +async fn zfs_set_zoned_and_mountable( + dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + zfs_set(&dataset, &["zoned=on", "canmount=on"]).await +} + +// Renames a dataset from "from" to "to". +async fn zfs_rename( + from: &str, + to: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["rename", from, to]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +async fn finalize_encryption_migration( + log: &Logger, + encrypted_dataset: &str, + unencrypted_dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + zfs_set_zoned_and_mountable(&encrypted_dataset).await?; + info!(log, "Dataset is encrypted, zoned, and mountable"; "dataset" => encrypted_dataset); + + zfs_destroy(&unencrypted_dataset).await?; + info!(log, "Destroyed unencrypted dataset"; "dataset" => unencrypted_dataset); + Ok(()) +} + #[cfg(test)] mod test { use super::*; diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index f5209def77..cfe189a409 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -151,6 +151,16 @@ impl Disk { key_requester, ) .await?; + + if matches!(disk.variant(), DiskVariant::U2) { + dataset::ensure_zpool_datasets_are_encrypted( + log, + disk.zpool_name(), + ) + .await + .map_err(|err| crate::dataset::DatasetError::from(err))?; + } + Ok(disk) } diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 50b1c44148..547c4ec2d7 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -471,7 +471,8 @@ impl StorageManager { Err(err) => { error!( self.log, - "Persistent error: {err}: not queueing disk"; + "Persistent error:not queueing disk"; + "err" => ?err, "disk_id" => ?raw_disk.identity() ); Err(err.into()) @@ -575,7 +576,7 @@ impl StorageManager { } let zoned = true; - let fs_name = &request.dataset_name.full(); + let fs_name = &request.dataset_name.full_name(); let do_format = true; let encryption_details = None; let size_details = None; From e8b6dd1dc4e7abb39276ad347bdf1ac08171862d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 22 Jan 2024 18:33:01 +0000 Subject: [PATCH 16/91] Add floating IP check to project delete (#4862) The main floating IP PR missed the check in `project_delete` for any existing child floating IP objects. This commit adds in this check so that a project cannot be deleted while any FIPs remain (matching other project-scoped resources), as well as a matching integration test. Closes #4854. --- nexus/db-queries/src/db/datastore/project.rs | 2 ++ nexus/tests/integration_tests/projects.rs | 34 ++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs index e3927fdfc1..08647b421e 100644 --- a/nexus/db-queries/src/db/datastore/project.rs +++ b/nexus/db-queries/src/db/datastore/project.rs @@ -221,6 +221,7 @@ impl DataStore { generate_fn_to_ensure_none_in_project!(instance, name, String); generate_fn_to_ensure_none_in_project!(disk, name, String); + generate_fn_to_ensure_none_in_project!(floating_ip, name, String); generate_fn_to_ensure_none_in_project!(project_image, name, String); generate_fn_to_ensure_none_in_project!(snapshot, name, String); generate_fn_to_ensure_none_in_project!(vpc, name, String); @@ -237,6 +238,7 @@ impl DataStore { // Verify that child resources do not exist. self.ensure_no_instances_in_project(opctx, authz_project).await?; self.ensure_no_disks_in_project(opctx, authz_project).await?; + self.ensure_no_floating_ips_in_project(opctx, authz_project).await?; self.ensure_no_project_images_in_project(opctx, authz_project).await?; self.ensure_no_snapshots_in_project(opctx, authz_project).await?; self.ensure_no_vpcs_in_project(opctx, authz_project).await?; diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index d9d6ceef5b..60195e5902 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -9,6 +9,7 @@ use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_floating_ip; use nexus_test_utils::resource_helpers::{ create_default_ip_pool, create_disk, create_project, create_vpc, object_create, project_get, projects_list, DiskTest, @@ -209,6 +210,39 @@ async fn test_project_deletion_with_disk(cptestctx: &ControlPlaneTestContext) { delete_project(&url, &client).await; } +#[nexus_test] +async fn test_project_deletion_with_floating_ip( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let _test = DiskTest::new(&cptestctx).await; + + // Create a project that we'll use for testing. + let name = "springfield-squidport"; + let url = format!("/v1/projects/{}", name); + + create_default_ip_pool(&client).await; + + create_project(&client, &name).await; + delete_project_default_subnet(&name, &client).await; + delete_project_default_vpc(&name, &client).await; + let fip = create_floating_ip(&client, "my-fip", &name, None, None).await; + assert_eq!( + "project to be deleted contains a floating ip: my-fip", + delete_project_expect_fail(&url, &client).await, + ); + let disk_url = + super::external_ips::get_floating_ip_by_id_url(&fip.identity.id); + NexusRequest::object_delete(&client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete floating IP"); + + delete_project(&url, &client).await; +} + #[nexus_test] async fn test_project_deletion_with_image(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; From 030adce411fe37c9e2d3c70ee5a6cdbdfd49f3f9 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 22 Jan 2024 14:57:01 -0800 Subject: [PATCH 17/91] [update-engine] reuse parent_key_and_child_index from existing steps (#4858) During a dogfood mupdate on 2024-01-18, I saw some really weird output with wicket's `rack-update attach` that looked like: ``` [sled 8 00:20:48] Running .... 12a 5s 1/3) Writing host phase 2 to slot B ``` The "5s" is all wrong -- the letter there is supposed to indicate, for an engine that has one or more nested engines, the index of that nested engine. So for example, if a step 12 has two nested engines, they would be marked "12a" and "12b". "5s" indicates that that's the 19th nested engine for that step, and we definitely have nowhere near 19 nested engines for a step anywhere in wicketd. This turned out to be because we weren't reusing child indexes from earlier steps in the sequence. Fix that, and also add: * tests which catch this issue * some dev-only code to wicket which made it easy to debug this locally --- update-engine/src/buffer.rs | 185 +++++++++++++++++++++++++------- update-engine/src/test_utils.rs | 31 ++++-- wicket/src/cli/rack_update.rs | 179 +++++++++++++++++++++++++++++- 3 files changed, 350 insertions(+), 45 deletions(-) diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 36a0626963..04363ffc26 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -262,45 +262,59 @@ impl EventStore { root_event_index, event.total_elapsed, ); + if let Some(new_execution) = actions.new_execution { if new_execution.nest_level == 0 { self.root_execution_id = Some(new_execution.execution_id); } - // If there's a parent key, then what's the child index? - let parent_key_and_child_index = - if let Some(parent_key) = new_execution.parent_key { - match self.map.get_mut(&parent_key) { - Some(parent_data) => { - let child_index = parent_data.child_executions_seen; - parent_data.child_executions_seen += 1; - Some((parent_key, child_index)) - } - None => { - // This should never happen -- it indicates that the - // parent key was unknown. This can happen if we - // didn't receive an event regarding a parent - // execution being started. + + if let Some((first_step_key, ..)) = + new_execution.steps_to_add.first() + { + // Do we already know about this execution? If so, grab the parent + // key and child index from the first step. + let parent_key_and_child_index = + if let Some(data) = self.map.get(first_step_key) { + data.parent_key_and_child_index + } else { + if let Some(parent_key) = new_execution.parent_key { + match self.map.get_mut(&parent_key) { + Some(parent_data) => { + let child_index = + parent_data.child_executions_seen; + parent_data.child_executions_seen += 1; + Some((parent_key, child_index)) + } + None => { + // This should never happen -- it indicates that the + // parent key was unknown. This can happen if we + // didn't receive an event regarding a parent + // execution being started. + None + } + } + } else { None } - } - } else { - None - }; - let total_steps = new_execution.steps_to_add.len(); - for (new_step_key, new_step, sort_key) in new_execution.steps_to_add - { - // These are brand new steps so their keys shouldn't exist in the - // map. But if they do, don't overwrite them. - self.map.entry(new_step_key).or_insert_with(|| { - EventBufferStepData::new( - new_step, - parent_key_and_child_index, - sort_key, - new_execution.nest_level, - total_steps, - root_event_index, - ) - }); + }; + + let total_steps = new_execution.steps_to_add.len(); + for (new_step_key, new_step, sort_key) in + new_execution.steps_to_add + { + // These are brand new steps so their keys shouldn't exist in the + // map. But if they do, don't overwrite them. + self.map.entry(new_step_key).or_insert_with(|| { + EventBufferStepData::new( + new_step, + parent_key_and_child_index, + sort_key, + new_execution.nest_level, + total_steps, + root_event_index, + ) + }); + } } } @@ -1808,6 +1822,7 @@ mod tests { struct BufferTestContext { root_execution_id: ExecutionId, generated_events: Vec>, + // Data derived from generated_events. generated_step_events: Vec>, } @@ -1885,9 +1900,95 @@ mod tests { Event::Progress(_) => None, }) .collect(); + + // Create two buffer and feed events. + // * The incremental buffer has each event fed into it one-by-one. + // * The "idempotent" buffer has events 0, 0..1, 0..2, 0..3, etc + // fed into it one by one. The name is because this is really + // testing the idempotency of the event buffer. + + println!("** generating incremental and idempotent buffers **"); + let mut incremental_buffer = EventBuffer::default(); + let mut idempotent_buffer = EventBuffer::default(); + for event in &generated_events { + incremental_buffer.add_event(event.clone()); + let report = incremental_buffer.generate_report(); + idempotent_buffer.add_event_report(report); + } + + // Check that the two buffers above are similar. + Self::ensure_buffers_similar( + &incremental_buffer, + &idempotent_buffer, + ) + .expect("idempotent buffer is similar to incremental buffer"); + + // Also generate a buffer with a single event report. + println!("** generating oneshot buffer **"); + let mut oneshot_buffer = EventBuffer::default(); + oneshot_buffer + .add_event_report(incremental_buffer.generate_report()); + + Self::ensure_buffers_similar(&incremental_buffer, &oneshot_buffer) + .expect("oneshot buffer is similar to incremental buffer"); + Self { root_execution_id, generated_events, generated_step_events } } + fn ensure_buffers_similar( + buf1: &EventBuffer, + buf2: &EventBuffer, + ) -> anyhow::Result<()> { + // The two should have the same step keys. + let buf1_steps = buf1.steps(); + let buf2_steps = buf2.steps(); + + ensure!( + buf1_steps.as_slice().len() == buf2_steps.as_slice().len(), + "buffers have same number of steps ({} vs {})", + buf1_steps.as_slice().len(), + buf2_steps.as_slice().len() + ); + + for (ix, ((k1, data1), (k2, data2))) in buf1_steps + .as_slice() + .iter() + .zip(buf2_steps.as_slice().iter()) + .enumerate() + { + ensure!( + k1 == k2, + "buffers have same step keys at index {} ({:?} vs {:?})", + ix, + k1, + k2 + ); + ensure!( + data1.sort_key() == data2.sort_key(), + "buffers have same sort key at index {} ({:?} vs {:?})", + ix, + data1.sort_key(), + data2.sort_key() + ); + ensure!( + data1.parent_key_and_child_index() == data2.parent_key_and_child_index(), + "buffers have same parent key and child index at index {} ({:?} vs {:?})", + ix, + data1.parent_key_and_child_index(), + data2.parent_key_and_child_index(), + ); + ensure!( + data1.nest_level() == data2.nest_level(), + "buffers have same nest level at index {} ({:?} vs {:?})", + ix, + data1.nest_level(), + data2.nest_level(), + ); + } + + Ok(()) + } + /// Runs a test in a scenario where all elements should be seen. /// /// Each event is added `times` times. @@ -2165,10 +2266,10 @@ mod tests { ), "this is the last event so ExecutionStatus must be completed" ); - // There are two nested engines. + // There are three nested engines. ensure!( - summary.len() == 3, - "two nested engines must be defined" + summary.len() == 4, + "three nested engines (plus one root engine) must be defined" ); let (_, nested_summary) = summary @@ -2186,6 +2287,18 @@ mod tests { let (_, nested_summary) = summary .get_index(2) .expect("this is the second nested engine"); + ensure!( + matches!( + &nested_summary.execution_status, + ExecutionStatus::Terminal(info) + if info.kind == TerminalKind::Failed + ), + "for this engine, the ExecutionStatus must be failed" + ); + + let (_, nested_summary) = summary + .get_index(3) + .expect("this is the third nested engine"); ensure!( matches!( &nested_summary.execution_status, diff --git a/update-engine/src/test_utils.rs b/update-engine/src/test_utils.rs index b943d1ddfe..539ef28864 100644 --- a/update-engine/src/test_utils.rs +++ b/update-engine/src/test_utils.rs @@ -141,7 +141,24 @@ fn define_test_steps( move |parent_cx| async move { parent_cx .with_nested_engine(|engine| { - define_nested_engine(&parent_cx, engine); + define_nested_engine(&parent_cx, engine, 3, "steps"); + Ok(()) + }) + .await + .expect_err("this is expected to fail"); + + // Define a second nested engine -- this verifies that internal + // buffer indexes match up. + parent_cx + .with_nested_engine(|engine| { + define_nested_engine( + &parent_cx, + engine, + 10, + // The tests in buffer.rs expect the units to be + // "steps" exactly once, so use a different name here. + "steps (again)", + ); Ok(()) }) .await @@ -214,18 +231,20 @@ fn define_test_steps( fn define_nested_engine<'a>( parent_cx: &'a StepContext, engine: &mut UpdateEngine<'a, TestSpec>, + start_id: usize, + step_units: &'static str, ) { engine .new_step( "nested-foo".to_owned(), - 4, + start_id + 1, "Nested step 1", move |cx| async move { parent_cx .send_progress(StepProgress::with_current_and_total( 1, 3, - "steps", + step_units, Default::default(), )) .await; @@ -239,7 +258,7 @@ fn define_nested_engine<'a>( engine .new_step::<_, _, ()>( "nested-bar".to_owned(), - 5, + start_id + 2, "Nested step 2 (fails)", move |cx| async move { // This is used by NestedProgressCheck below. @@ -247,7 +266,7 @@ fn define_nested_engine<'a>( .send_progress(StepProgress::with_current_and_total( 2, 3, - "steps", + step_units, Default::default(), )) .await; @@ -263,7 +282,7 @@ fn define_nested_engine<'a>( .send_progress(StepProgress::with_current_and_total( 3, 3, - "steps", + step_units, Default::default(), )) .await; diff --git a/wicket/src/cli/rack_update.rs b/wicket/src/cli/rack_update.rs index cac0f09ee5..ccacea0e38 100644 --- a/wicket/src/cli/rack_update.rs +++ b/wicket/src/cli/rack_update.rs @@ -8,23 +8,29 @@ use std::{ collections::{BTreeMap, BTreeSet}, + io::{BufReader, Write}, net::SocketAddrV6, time::Duration, }; use anyhow::{anyhow, bail, Context, Result}; +use camino::Utf8PathBuf; use clap::{Args, Subcommand, ValueEnum}; use slog::Logger; use tokio::{sync::watch, task::JoinHandle}; use update_engine::{ display::{GroupDisplay, LineDisplayStyles}, - NestedError, + EventBuffer, NestedError, }; use wicket_common::{ - rack_update::ClearUpdateStateResponse, update_events::EventReport, + rack_update::ClearUpdateStateResponse, + update_events::{EventReport, WicketdEngineSpec}, WICKETD_TIMEOUT, }; -use wicketd_client::types::{ClearUpdateStateParams, StartUpdateParams}; +use wicketd_client::types::{ + ClearUpdateStateParams, GetArtifactsAndEventReportsResponse, + StartUpdateParams, +}; use crate::{ cli::GlobalOpts, @@ -41,10 +47,22 @@ use super::command::CommandOutput; pub(crate) enum RackUpdateArgs { /// Start one or more updates. Start(StartRackUpdateArgs), + /// Attach to one or more running updates. Attach(AttachArgs), + /// Clear updates. Clear(ClearArgs), + + /// Dump artifacts and event reports from wicketd. + /// + /// Debug-only, intended for development. + DebugDump(DumpArgs), + + /// Replay update logs from a dump file. + /// + /// Debug-only, intended for development. + DebugReplay(ReplayArgs), } impl RackUpdateArgs { @@ -65,6 +83,12 @@ impl RackUpdateArgs { RackUpdateArgs::Clear(args) => { args.exec(log, wicketd_addr, global_opts, output).await } + RackUpdateArgs::DebugDump(args) => { + args.exec(log, wicketd_addr).await + } + RackUpdateArgs::DebugReplay(args) => { + args.exec(log, global_opts, output) + } } } } @@ -380,6 +404,155 @@ async fn do_clear_update_state( Ok(response) } +#[derive(Debug, Args)] +pub(crate) struct DumpArgs { + /// Pretty-print JSON output. + #[clap(long)] + pretty: bool, +} + +impl DumpArgs { + async fn exec(self, log: Logger, wicketd_addr: SocketAddrV6) -> Result<()> { + let client = create_wicketd_client(&log, wicketd_addr, WICKETD_TIMEOUT); + + let response = client + .get_artifacts_and_event_reports() + .await + .context("error calling get_artifacts_and_event_reports")?; + let response = response.into_inner(); + + // Return the response as a JSON object. + if self.pretty { + serde_json::to_writer_pretty(std::io::stdout(), &response) + .context("error writing to stdout")?; + } else { + serde_json::to_writer(std::io::stdout(), &response) + .context("error writing to stdout")?; + } + Ok(()) + } +} + +#[derive(Debug, Args)] +pub(crate) struct ReplayArgs { + /// The dump file to replay. + /// + /// This should be the output of `rack-update debug-dump`, or something + /// like . + file: Utf8PathBuf, + + /// How to feed events into the display. + #[clap(long, value_enum, default_value_t)] + strategy: ReplayStrategy, + + #[clap(flatten)] + component_ids: ComponentIdSelector, +} + +impl ReplayArgs { + fn exec( + self, + log: Logger, + global_opts: GlobalOpts, + output: CommandOutput<'_>, + ) -> Result<()> { + let update_ids = self.component_ids.to_component_ids()?; + let mut display = GroupDisplay::new_with_display( + &log, + update_ids.iter().copied(), + output.stderr, + ); + if global_opts.use_color() { + display.set_styles(LineDisplayStyles::colorized()); + } + + let file = BufReader::new( + std::fs::File::open(&self.file) + .with_context(|| format!("error opening {}", self.file))?, + ); + let response: GetArtifactsAndEventReportsResponse = + serde_json::from_reader(file)?; + let event_reports = + parse_event_report_map(&log, response.event_reports); + + self.strategy.execute(display, event_reports)?; + + Ok(()) + } +} + +#[derive(Clone, Copy, Default, Eq, PartialEq, Hash, Debug, ValueEnum)] +enum ReplayStrategy { + /// Feed all events into the buffer immediately. + #[default] + Oneshot, + + /// Feed events into the buffer one at a time. + Incremental, + + /// Feed events into the buffer as 0, 0..1, 0..2, 0..3 etc. + Idempotent, +} + +impl ReplayStrategy { + fn execute( + self, + mut display: GroupDisplay< + ComponentId, + &mut dyn Write, + WicketdEngineSpec, + >, + event_reports: BTreeMap, + ) -> Result<()> { + match self { + ReplayStrategy::Oneshot => { + // TODO: parallelize this computation? + for (id, event_report) in event_reports { + // If display.add_event_report errors out, it's for a report for a + // component we weren't interested in. Ignore it. + _ = display.add_event_report(&id, event_report); + } + + display.write_events()?; + } + ReplayStrategy::Incremental => { + for (id, event_report) in &event_reports { + let mut buffer = EventBuffer::default(); + let mut last_seen = None; + for event in &event_report.step_events { + buffer.add_step_event(event.clone()); + let report = + buffer.generate_report_since(&mut last_seen); + + // If display.add_event_report errors out, it's for a report for a + // component we weren't interested in. Ignore it. + _ = display.add_event_report(&id, report); + + display.write_events()?; + } + } + } + ReplayStrategy::Idempotent => { + for (id, event_report) in &event_reports { + let mut buffer = EventBuffer::default(); + for event in &event_report.step_events { + buffer.add_step_event(event.clone()); + let report = buffer.generate_report(); + + // If display.add_event_report errors out, it's for a report for a + // component we weren't interested in. Ignore it. + _ = display.add_event_report(&id, report); + + display.write_events()?; + } + } + } + } + + Ok(()) + } +} + #[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, ValueEnum)] enum MessageFormat { Human, From 8726a5fa86e9c5d8423a97cf9c710af54914a687 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Tue, 23 Jan 2024 10:06:58 -0800 Subject: [PATCH 18/91] Set version to 6.0.0 (#4841) --- .github/buildomat/jobs/package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 350ab37233..f0bd764feb 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -37,7 +37,7 @@ rustc --version # trampoline global zone images. # COMMIT=$(git rev-parse HEAD) -VERSION="5.0.0-0.ci+git${COMMIT:0:11}" +VERSION="6.0.0-0.ci+git${COMMIT:0:11}" echo "$VERSION" >/work/version.txt ptime -m ./tools/install_builder_prerequisites.sh -yp From 4cf2a69699764ec27c1c3715d0b33554b460f417 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 23 Jan 2024 14:21:19 -0500 Subject: [PATCH 19/91] [sled-agent] Include error when panicking on start (#4869) We build with `panic=abort`, so even though we `error!` log this error immediately prior to panicking, it's very likely the log won't be flushed by the time we abort. Include the error in the panic message itself so we don't have to fish it out of core files. --- sled-agent/src/bootstrap/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 999e4cc0c8..1a9d36c86b 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -604,7 +604,7 @@ impl Inner { // This error is unrecoverable, and if returned we'd // end up in maintenance mode anyway. error!(log, "Failed to start sled agent: {err:#}"); - panic!("Failed to start sled agent"); + panic!("Failed to start sled agent: {err:#}"); } }; _ = response_tx.send(response); From 81831382091e8d9eebff1ae4628db1f3d0e241b2 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 23 Jan 2024 13:27:15 -0600 Subject: [PATCH 20/91] Delete IP pool links on silo delete and IP pool delete (#4867) Closes #4849 Before this change, IP pool delete is blocked if there are any outstanding silo links, which would mean the user would have to unlink every silo before deleting a pool. This is annoying. The main insight here, discussed in #4849, is that once we get past the other checks to ensure that the pool or silo is not in use (pool contains no IP ranges, silo contains no projects), there is no need to block on links, and it is fine for us to delete any associated links when we delete the thing. --- nexus/db-model/src/schema.rs | 2 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 37 +++---- nexus/db-queries/src/db/datastore/silo.rs | 18 ++++ nexus/tests/integration_tests/ip_pools.rs | 38 ++++--- nexus/tests/integration_tests/silos.rs | 105 ++++++++++++++++++- schema/crdb/25.0.0/up.sql | 7 ++ schema/crdb/dbinit.sql | 10 +- 7 files changed, 180 insertions(+), 37 deletions(-) create mode 100644 schema/crdb/25.0.0/up.sql diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 68991f1d75..2e7493716e 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(24, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(25, 0, 0); table! { disk (id) { diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index 331126ef97..6d3a95af7d 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -224,15 +224,15 @@ impl DataStore { use db::schema::ip_pool_resource; opctx.authorize(authz::Action::Delete, authz_pool).await?; + let conn = self.pool_connection_authorized(opctx).await?; + // Verify there are no IP ranges still in this pool let range = ip_pool_range::dsl::ip_pool_range .filter(ip_pool_range::dsl::ip_pool_id.eq(authz_pool.id())) .filter(ip_pool_range::dsl::time_deleted.is_null()) .select(ip_pool_range::dsl::id) .limit(1) - .first_async::( - &*self.pool_connection_authorized(opctx).await?, - ) + .first_async::(&*conn) .await .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -242,23 +242,6 @@ impl DataStore { )); } - // Verify there are no linked silos - let silo_link = ip_pool_resource::table - .filter(ip_pool_resource::ip_pool_id.eq(authz_pool.id())) - .select(ip_pool_resource::resource_id) - .limit(1) - .first_async::( - &*self.pool_connection_authorized(opctx).await?, - ) - .await - .optional() - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - if silo_link.is_some() { - return Err(Error::invalid_request( - "IP Pool cannot be deleted while it is linked to a silo", - )); - } - // Delete the pool, conditional on the rcgen not having changed. This // protects the delete from occuring if clients created a new IP range // in between the above check for children and this query. @@ -268,7 +251,7 @@ impl DataStore { .filter(dsl::id.eq(authz_pool.id())) .filter(dsl::rcgen.eq(db_pool.rcgen)) .set(dsl::time_deleted.eq(now)) - .execute_async(&*self.pool_connection_authorized(opctx).await?) + .execute_async(&*conn) .await .map_err(|e| { public_error_from_diesel( @@ -282,6 +265,18 @@ impl DataStore { "deletion failed due to concurrent modification", )); } + + // Rather than treating outstanding links as a blocker for pool delete, + // just delete them. If we've gotten this far, we know there are no + // ranges in the pool, which means it can't be in use. + + // delete any links from this pool to any other resources (silos) + diesel::delete(ip_pool_resource::table) + .filter(ip_pool_resource::ip_pool_id.eq(authz_pool.id())) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index 2c0c5f3c47..a88a27872f 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -17,6 +17,7 @@ use crate::db::error::TransactionError; use crate::db::fixed_data::silo::{DEFAULT_SILO, INTERNAL_SILO}; use crate::db::identity::Resource; use crate::db::model::CollectionTypeProvisioned; +use crate::db::model::IpPoolResourceType; use crate::db::model::Name; use crate::db::model::Silo; use crate::db::model::VirtualProvisioningCollection; @@ -547,6 +548,23 @@ impl DataStore { debug!(opctx.log, "deleted {} silo IdPs for silo {}", updated_rows, id); + // delete IP pool links (not IP pools, just the links) + use db::schema::ip_pool_resource; + + let updated_rows = diesel::delete(ip_pool_resource::table) + .filter(ip_pool_resource::resource_id.eq(id)) + .filter( + ip_pool_resource::resource_type.eq(IpPoolResourceType::Silo), + ) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + debug!( + opctx.log, + "deleted {} IP pool links for silo {}", updated_rows, id + ); + Ok(()) } } diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 7843e816fd..77a5cd5c8a 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -275,6 +275,32 @@ async fn test_ip_pool_list_dedupe(cptestctx: &ControlPlaneTestContext) { let silo3_pools = pools_for_silo(client, "silo3").await; assert_eq!(silo3_pools.len(), 1); assert_eq!(silo3_pools[0].identity.name, "pool1"); + + // this is a great spot to check that deleting a pool cleans up the links! + + // first we have to delete the range, otherwise delete will fail + let url = "/v1/system/ip-pools/pool1/ranges/remove"; + NexusRequest::new( + RequestBuilder::new(client, Method::POST, url) + .body(Some(&range1)) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to delete IP range from a pool"); + + object_delete(client, "/v1/system/ip-pools/pool1").await; + + let silo1_pools = pools_for_silo(client, "silo1").await; + assert_eq!(silo1_pools.len(), 1); + assert_eq!(silo1_pools[0].id(), pool2.id()); + + let silo2_pools = pools_for_silo(client, "silo2").await; + assert_eq!(silo2_pools.len(), 0); + + let silo3_pools = pools_for_silo(client, "silo3").await; + assert_eq!(silo3_pools.len(), 0); } /// The internal IP pool, defined by its association with the internal silo, @@ -474,18 +500,6 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { .await; assert_eq!(error.error_code.unwrap(), "ObjectAlreadyExists"); - // pool delete fails because it is linked to a silo - let error = object_delete_error( - client, - "/v1/system/ip-pools/p1", - StatusCode::BAD_REQUEST, - ) - .await; - assert_eq!( - error.message, - "IP Pool cannot be deleted while it is linked to a silo", - ); - // unlink p1 from silo (doesn't matter that it's a default) let url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); object_delete(client, &url).await; diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index a5d4b47eaa..86bf01062f 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -16,8 +16,9 @@ use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils::resource_helpers::{ - create_local_user, create_project, create_silo, grant_iam, object_create, - objects_list_page_authz, projects_list, + create_ip_pool, create_local_user, create_project, create_silo, grant_iam, + link_ip_pool, object_create, object_delete, objects_list_page_authz, + projects_list, }; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::Certificate; @@ -25,6 +26,7 @@ use nexus_types::external_api::views::{ self, IdentityProvider, Project, SamlIdentityProvider, Silo, }; use nexus_types::external_api::{params, shared}; +use omicron_common::address::{IpRange, Ipv4Range}; use omicron_common::api::external::ObjectIdentity; use omicron_common::api::external::{ IdentityMetadataCreateParams, LookupType, Name, @@ -2526,3 +2528,102 @@ async fn test_silo_admin_can_create_certs(cptestctx: &ControlPlaneTestContext) { assert_eq!(silo_certs.len(), 1); assert_eq!(silo_certs[0].identity.id, cert.identity.id); } + +// Test that silo delete cleans up associated groups +#[nexus_test] +async fn test_silo_delete_cleans_up_ip_pool_links( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + // Create a silo + let silo1 = + create_silo(&client, "silo1", true, shared::SiloIdentityMode::SamlJit) + .await; + let silo2 = + create_silo(&client, "silo2", true, shared::SiloIdentityMode::SamlJit) + .await; + + // link pool1 to both, link pool2 to silo1 only + let range1 = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 51), + std::net::Ipv4Addr::new(10, 0, 0, 52), + ) + .unwrap(), + ); + create_ip_pool(client, "pool1", Some(range1)).await; + link_ip_pool(client, "pool1", &silo1.identity.id, true).await; + link_ip_pool(client, "pool1", &silo2.identity.id, true).await; + + let range2 = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 53), + std::net::Ipv4Addr::new(10, 0, 0, 54), + ) + .unwrap(), + ); + create_ip_pool(client, "pool2", Some(range2)).await; + link_ip_pool(client, "pool2", &silo1.identity.id, false).await; + + // we want to make sure the links are there before we make sure they're gone + let url = "/v1/system/ip-pools/pool1/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 2); + + let url = "/v1/system/ip-pools/pool2/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 1); + + // Delete the silo + let url = format!("/v1/system/silos/{}", silo1.identity.id); + object_delete(client, &url).await; + + // Now make sure the links are gone + let url = "/v1/system/ip-pools/pool1/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 1); + + let url = "/v1/system/ip-pools/pool2/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 0); + + // but the pools are of course still there + let url = "/v1/system/ip-pools"; + let pools = objects_list_page_authz::(client, &url).await; + assert_eq!(pools.items.len(), 2); + assert_eq!(pools.items[0].identity.name, "pool1"); + assert_eq!(pools.items[1].identity.name, "pool2"); + + // nothing prevents us from deleting the pools (except the child ranges -- + // we do have to remove those) + + let url = "/v1/system/ip-pools/pool1/ranges/remove"; + NexusRequest::new( + RequestBuilder::new(client, Method::POST, url) + .body(Some(&range1)) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to delete IP range from a pool"); + + let url = "/v1/system/ip-pools/pool2/ranges/remove"; + NexusRequest::new( + RequestBuilder::new(client, Method::POST, url) + .body(Some(&range2)) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to delete IP range from a pool"); + + object_delete(client, "/v1/system/ip-pools/pool1").await; + object_delete(client, "/v1/system/ip-pools/pool2").await; +} diff --git a/schema/crdb/25.0.0/up.sql b/schema/crdb/25.0.0/up.sql new file mode 100644 index 0000000000..3c963b9bc6 --- /dev/null +++ b/schema/crdb/25.0.0/up.sql @@ -0,0 +1,7 @@ +-- created solely to prevent a table scan when we delete links on silo delete +CREATE INDEX IF NOT EXISTS ip_pool_resource_id ON omicron.public.ip_pool_resource ( + resource_id +); +CREATE INDEX IF NOT EXISTS ip_pool_resource_ip_pool_id ON omicron.public.ip_pool_resource ( + ip_pool_id +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 2105caabef..f3ca5c4b85 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1604,6 +1604,14 @@ CREATE UNIQUE INDEX IF NOT EXISTS one_default_ip_pool_per_resource ON omicron.pu ) where is_default = true; +-- created solely to prevent a table scan when we delete links on silo delete +CREATE INDEX IF NOT EXISTS ip_pool_resource_id ON omicron.public.ip_pool_resource ( + resource_id +); +CREATE INDEX IF NOT EXISTS ip_pool_resource_ip_pool_id ON omicron.public.ip_pool_resource ( + ip_pool_id +); + /* * IP Pools are made up of a set of IP ranges, which are start/stop addresses. * Note that these need not be CIDR blocks or well-behaved subnets with a @@ -3258,7 +3266,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '24.0.0', NULL) + ( TRUE, NOW(), NOW(), '25.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 624fbba20d7eb754e2c1a3ca7f1c5676fd2f1e0d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 23 Jan 2024 19:32:39 +0000 Subject: [PATCH 21/91] Chore: Update OPTE to v0.27.214 (#4868) This PR introduces fixes to TCP state machine handling, which has been causing new TCP flows to hang whenever a source-port is reused under certain circumstances. Most of the relevant details are outlined on oxidecomputer/opte#444, but the gist is that OPTE is far more permissive about TCP packets which are allowed to be sent/received by guests. --- Cargo.lock | 47 ++++++++++++++++++++++++++++++++-------------- Cargo.toml | 4 ++-- tools/opte_version | 2 +- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5211ecab78..a2d9601a38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2739,6 +2739,15 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -2791,12 +2800,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db04bc24a18b9ea980628ecf00e6c0264f3c1426dac36c00cb49b6fbad8b0743" dependencies = [ "atomic-polyfill", - "hash32", + "hash32 0.2.1", "rustc_version 0.4.0", "spin 0.9.8", "stable_deref_trait", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32 0.3.1", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.3.3" @@ -3174,7 +3193,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" [[package]] name = "illumos-utils" @@ -3582,7 +3601,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "quote", "syn 2.0.46", @@ -5323,7 +5342,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "cfg-if", "dyn-clone", @@ -5332,26 +5351,26 @@ dependencies = [ "opte-api", "postcard", "serde", - "smoltcp 0.10.0", + "smoltcp 0.11.0", "version_check", ] [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "illumos-sys-hdrs", "ipnetwork", "postcard", "serde", - "smoltcp 0.10.0", + "smoltcp 0.11.0", ] [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "libc", "libnet", @@ -5425,12 +5444,12 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "illumos-sys-hdrs", "opte", "serde", - "smoltcp 0.10.0", + "smoltcp 0.11.0", "zerocopy 0.7.31", ] @@ -8019,21 +8038,21 @@ dependencies = [ "bitflags 1.3.2", "byteorder", "cfg-if", - "heapless", + "heapless 0.7.16", "managed", ] [[package]] name = "smoltcp" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2e3a36ac8fea7b94e666dfa3871063d6e0a5c9d5d4fec9a1a6b7b6760f0229" +checksum = "5a1a996951e50b5971a2c8c0fa05a381480d70a933064245c4a223ddc87ccc97" dependencies = [ "bitflags 1.3.2", "byteorder", "cfg-if", "defmt", - "heapless", + "heapless 0.8.0", "managed", ] diff --git a/Cargo.toml b/Cargo.toml index 54db531d06..d97236b632 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -267,7 +267,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.10.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" @@ -275,7 +275,7 @@ openapiv3 = "2.0.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/tools/opte_version b/tools/opte_version index 619a109b35..82d79dcf28 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.27.202 +0.27.214 From 8af3d844deb340bd0018ad6df60210ea957113f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 13:44:30 -0800 Subject: [PATCH 22/91] Bump shlex from 1.1.0 to 1.3.0 (#4865) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a2d9601a38..1b04752e34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7691,9 +7691,9 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "shlex" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook" From beb1d1149a0876a786e94bf24a60568a29911056 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 13:46:07 -0800 Subject: [PATCH 23/91] Update Rust crate hyper-rustls to 0.26.0 (#4814) --- Cargo.lock | 127 ++++++++++++++++++++++++++++++-------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 2 + 3 files changed, 103 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b04752e34..d66fc3dadc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1920,7 +1920,7 @@ dependencies = [ "futures", "hostname", "http 0.2.11", - "hyper", + "hyper 0.14.27", "indexmap 2.1.0", "multer", "openapiv3", @@ -2091,7 +2091,7 @@ dependencies = [ "base64", "chrono", "http 0.2.11", - "hyper", + "hyper 0.14.27", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", @@ -2938,6 +2938,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.0.0", +] + [[package]] name = "http-range" version = "0.1.5" @@ -2968,7 +2978,7 @@ dependencies = [ "form_urlencoded", "futures", "http 0.2.11", - "hyper", + "hyper 0.14.27", "log", "once_cell", "regex", @@ -3055,7 +3065,7 @@ dependencies = [ "futures-util", "h2", "http 0.2.11", - "http-body", + "http-body 0.4.5", "httparse", "httpdate", "itoa", @@ -3067,6 +3077,24 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5aa53871fc917b1a9ed87b683a5d86db645e23acb32c2e0785a353e522fb75" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.0.0", + "http-body 1.0.0", + "httparse", + "itoa", + "pin-project-lite", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -3075,7 +3103,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.11", - "hyper", + "hyper 0.14.27", "rustls 0.21.9", "tokio", "tokio-rustls 0.24.1", @@ -3083,19 +3111,21 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "399c78f9338483cb7e630c8474b07268983c6bd5acee012e4211f9f7bb21b070" +checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", - "http 0.2.11", - "hyper", + "http 1.0.0", + "hyper 1.1.0", + "hyper-util", "log", "rustls 0.22.2", "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.25.0", + "tower-service", ] [[package]] @@ -3108,7 +3138,7 @@ dependencies = [ "http 0.2.11", "http-range", "httpdate", - "hyper", + "hyper 0.14.27", "mime_guess", "percent-encoding", "rand 0.8.5", @@ -3124,12 +3154,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.27", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdea9aac0dbe5a9240d68cfd9501e2db94222c6dc06843e06640b9e07f0fdc67" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.0.0", + "http-body 1.0.0", + "hyper 1.1.0", + "pin-project-lite", + "socket2 0.5.5", + "tokio", + "tower", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.57" @@ -3371,7 +3421,7 @@ dependencies = [ "clap 4.4.3", "dropshot", "expectorate", - "hyper", + "hyper 0.14.27", "installinator-common", "omicron-common", "omicron-test-utils", @@ -3427,7 +3477,7 @@ dependencies = [ "dropshot", "expectorate", "futures", - "hyper", + "hyper 0.14.27", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", @@ -4165,8 +4215,8 @@ dependencies = [ "gateway-client", "headers", "http 0.2.11", - "hyper", - "hyper-rustls 0.25.0", + "hyper 0.14.27", + "hyper-rustls 0.26.0", "internal-dns", "ipnetwork", "itertools 0.12.0", @@ -4304,7 +4354,7 @@ dependencies = [ "gateway-test-utils", "headers", "http 0.2.11", - "hyper", + "hyper 0.14.27", "internal-dns", "nexus-db-queries", "nexus-test-interface", @@ -4745,7 +4795,7 @@ dependencies = [ "gateway-test-utils", "hex", "http 0.2.11", - "hyper", + "hyper 0.14.27", "illumos-utils", "ipcc", "omicron-common", @@ -4806,8 +4856,8 @@ dependencies = [ "http 0.2.11", "httptest", "hubtools", - "hyper", - "hyper-rustls 0.25.0", + "hyper 0.14.27", + "hyper-rustls 0.26.0", "illumos-utils", "internal-dns", "ipnetwork", @@ -5020,7 +5070,7 @@ dependencies = [ "guppy", "hex", "http 0.2.11", - "hyper", + "hyper 0.14.27", "hyper-staticfile", "illumos-utils", "installinator-common", @@ -5159,7 +5209,7 @@ dependencies = [ "hashbrown 0.13.2", "hex", "hmac", - "hyper", + "hyper 0.14.27", "indexmap 2.1.0", "inout", "ipnetwork", @@ -5200,6 +5250,7 @@ dependencies = [ "similar", "slog", "snafu", + "socket2 0.5.5", "spin 0.9.8", "string_cache", "subtle", @@ -5427,7 +5478,7 @@ dependencies = [ "chrono", "futures", "http 0.2.11", - "hyper", + "hyper 0.14.27", "omicron-workspace-hack", "progenitor", "rand 0.8.5", @@ -5501,7 +5552,7 @@ dependencies = [ "dropshot", "expectorate", "futures", - "hyper", + "hyper 0.14.27", "internal-dns", "nexus-client", "nexus-types", @@ -6352,7 +6403,7 @@ dependencies = [ "clap 4.4.3", "dropshot", "futures", - "hyper", + "hyper 0.14.27", "progenitor", "propolis_types", "rand 0.8.5", @@ -6787,8 +6838,8 @@ dependencies = [ "futures-util", "h2", "http 0.2.11", - "http-body", - "hyper", + "http-body 0.4.5", + "hyper 0.14.27", "hyper-rustls 0.24.2", "hyper-tls", "ipnet", @@ -9048,6 +9099,28 @@ dependencies = [ "walkdir", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + [[package]] name = "tower-service" version = "0.3.2" @@ -9933,7 +10006,7 @@ dependencies = [ "hex", "http 0.2.11", "hubtools", - "hyper", + "hyper 0.14.27", "illumos-utils", "installinator", "installinator-artifact-client", diff --git a/Cargo.toml b/Cargo.toml index d97236b632..43faed9360 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -219,7 +219,7 @@ httptest = "0.15.5" hubtools = { git = "https://github.com/oxidecomputer/hubtools.git", branch = "main" } humantime = "2.1.0" hyper = "0.14" -hyper-rustls = "0.25.0" +hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } indexmap = "2.1.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b574a292d1..e4733992bc 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -94,6 +94,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -198,6 +199,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } From 66afddbd174c02e3da3f47e2519fb82558fec01c Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 23 Jan 2024 17:10:55 -0500 Subject: [PATCH 24/91] Add oxlog tool and library (#4810) --- .github/buildomat/jobs/package.sh | 2 +- Cargo.lock | 14 + Cargo.toml | 3 + dev-tools/oxlog/Cargo.toml | 16 + dev-tools/oxlog/src/bin/oxlog.rs | 121 ++++++++ dev-tools/oxlog/src/lib.rs | 457 ++++++++++++++++++++++++++++ illumos-utils/Cargo.toml | 1 + illumos-utils/src/running_zone.rs | 28 +- package-manifest.toml | 8 + tools/build-global-zone-packages.sh | 7 + workspace-hack/Cargo.toml | 2 + 11 files changed, 631 insertions(+), 28 deletions(-) create mode 100644 dev-tools/oxlog/Cargo.toml create mode 100644 dev-tools/oxlog/src/bin/oxlog.rs create mode 100644 dev-tools/oxlog/src/lib.rs diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index f0bd764feb..b4d10891b9 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -91,7 +91,7 @@ ptime -m cargo run --locked --release --bin omicron-package -- \ -t host target create -i standard -m gimlet -s asic -r multi-sled ptime -m cargo run --locked --release --bin omicron-package -- \ -t host package -stamp_packages omicron-sled-agent mg-ddm-gz propolis-server overlay +stamp_packages omicron-sled-agent mg-ddm-gz propolis-server overlay oxlog # Create global zone package @ /work/global-zone-packages.tar.gz ptime -m ./tools/build-global-zone-packages.sh "$tarball_src_dir" /work diff --git a/Cargo.lock b/Cargo.lock index d66fc3dadc..ec594abb8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3266,6 +3266,7 @@ dependencies = [ "omicron-workspace-hack", "opte-ioctl", "oxide-vpc", + "oxlog", "regress", "schemars", "serde", @@ -5177,6 +5178,7 @@ dependencies = [ "bstr 1.6.0", "byteorder", "bytes", + "camino", "chrono", "cipher", "clap 4.4.3", @@ -5676,6 +5678,18 @@ dependencies = [ "uuid", ] +[[package]] +name = "oxlog" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap 4.4.3", + "omicron-workspace-hack", + "uuid", +] + [[package]] name = "p256" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index 43faed9360..3d0be64380 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/oxlog", "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", @@ -93,6 +94,7 @@ default-members = [ "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/oxlog", "dev-tools/thing-flinger", # Do not include xtask in the list of default members, because this causes # hakari to not work as well and build times to be longer. @@ -252,6 +254,7 @@ nexus-inventory = { path = "nexus/inventory" } omicron-certificates = { path = "certificates" } omicron-passwords = { path = "passwords" } omicron-workspace-hack = "0.1.0" +oxlog = { path = "dev-tools/oxlog" } nexus-test-interface = { path = "nexus/test-interface" } nexus-test-utils-macros = { path = "nexus/test-utils-macros" } nexus-test-utils = { path = "nexus/test-utils" } diff --git a/dev-tools/oxlog/Cargo.toml b/dev-tools/oxlog/Cargo.toml new file mode 100644 index 0000000000..5d7cfaf5c1 --- /dev/null +++ b/dev-tools/oxlog/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "oxlog" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +uuid.workspace = true +omicron-workspace-hack.workspace = true + +[[bin]] +name = "oxlog" diff --git a/dev-tools/oxlog/src/bin/oxlog.rs b/dev-tools/oxlog/src/bin/oxlog.rs new file mode 100644 index 0000000000..ef79605dda --- /dev/null +++ b/dev-tools/oxlog/src/bin/oxlog.rs @@ -0,0 +1,121 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tool for discovering oxide related logfiles on sleds + +use clap::{Args, Parser, Subcommand}; +use oxlog::{Filter, LogFile, Zones}; + +#[derive(Debug, Parser)] +#[command(version)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + /// List all zones found on the filesystem + Zones, + + /// List logs for a given service + Logs { + // The name of the zone + zone: String, + + /// The name of the service to list logs for + service: Option, + + /// Print available metadata + #[arg(short, long)] + metadata: bool, + + #[command(flatten)] + filter: FilterArgs, + }, +} + +#[derive(Args, Debug)] +#[group(required = true, multiple = true)] +struct FilterArgs { + /// Print only the current log file + #[arg(short, long)] + current: bool, + + /// Print only the archived log files + #[arg(short, long)] + archived: bool, + + // Print only the extra log files + #[arg(short, long)] + extra: bool, +} + +fn main() -> Result<(), anyhow::Error> { + let cli = Cli::parse(); + + match cli.command { + Commands::Zones => { + for zone in Zones::load()?.zones.keys() { + println!("{zone}"); + } + Ok(()) + } + Commands::Logs { zone, service, metadata, filter } => { + let zones = Zones::load()?; + let filter = Filter { + current: filter.current, + archived: filter.archived, + extra: filter.extra, + }; + let print_metadata = |f: &LogFile| { + println!( + "{}\t{}\t{}", + f.path, + f.size.map_or_else(|| "-".to_string(), |s| s.to_string()), + f.modified + .map_or_else(|| "-".to_string(), |s| s.to_rfc3339()) + ); + }; + + let logs = zones.zone_logs(&zone, filter); + for (svc_name, mut svc_logs) in logs { + if let Some(service) = &service { + if svc_name != service.as_str() { + continue; + } + } + svc_logs.archived.sort(); + if filter.current { + if let Some(current) = &svc_logs.current { + if metadata { + print_metadata(current); + } else { + println!("{}", current.path); + } + } + } + if filter.archived { + for f in &svc_logs.archived { + if metadata { + print_metadata(f); + } else { + println!("{}", f.path); + } + } + } + if filter.extra { + for f in &svc_logs.extra { + if metadata { + print_metadata(f); + } else { + println!("{}", f.path); + } + } + } + } + Ok(()) + } + } +} diff --git a/dev-tools/oxlog/src/lib.rs b/dev-tools/oxlog/src/lib.rs new file mode 100644 index 0000000000..589b113928 --- /dev/null +++ b/dev-tools/oxlog/src/lib.rs @@ -0,0 +1,457 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A tool to show oxide related log file paths +//! +//! All data is based off of reading the filesystem + +use anyhow::Context; +use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf}; +use chrono::{DateTime, Utc}; +use std::collections::BTreeMap; +use std::io; +use uuid::Uuid; + +/// Return a UUID if the `DirEntry` contains a directory that parses into a UUID. +fn get_uuid_dir(result: io::Result) -> Option { + let Ok(entry) = result else { + return None; + }; + let Ok(file_type) = entry.file_type() else { + return None; + }; + if !file_type.is_dir() { + return None; + } + let file_name = entry.file_name(); + if let Ok(uuid) = file_name.parse() { + Some(uuid) + } else { + None + } +} + +#[derive(Debug)] +pub struct Pools { + pub internal: Vec, + pub external: Vec, +} + +impl Pools { + pub fn read() -> anyhow::Result { + let internal = Utf8Path::new("/pool/int/") + .read_dir_utf8() + .context("Failed to read /pool/int")? + .filter_map(get_uuid_dir) + .collect(); + let external = Utf8Path::new("/pool/ext/") + .read_dir_utf8() + .context("Failed to read /pool/ext")? + .filter_map(get_uuid_dir) + .collect(); + Ok(Pools { internal, external }) + } +} + +/// Filter which logs to search for in a given zone +/// +/// Each field in the filter is additive. +/// +/// The filter was added to the library and not just the CLI because in some +/// cases searching for archived logs is pretty expensive. +#[derive(Clone, Copy, Debug)] +pub struct Filter { + /// The current logfile for a service. + /// e.g. `/var/svc/log/oxide-sled-agent:default.log` + pub current: bool, + + /// Any rotated log files in the default service directory or archived to + /// a debug directory. e.g. `/var/svc/log/oxide-sled-agent:default.log.0` + /// or `/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/debug/global/oxide-sled-agent:default.log.1697509861` + pub archived: bool, + + /// Any files of special interest for a given service that don't reside in + /// standard paths or don't follow the naming conventions of SMF service + /// files. e.g. `/pool/ext/e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3/root/data/logs/cockroach.log` + pub extra: bool, +} + +/// Path and metadata about a logfile +/// We use options for metadata as retrieval is fallible +#[derive(Debug, Clone, Eq)] +pub struct LogFile { + pub path: Utf8PathBuf, + pub size: Option, + pub modified: Option>, +} + +impl LogFile { + pub fn read_metadata(&mut self, entry: &Utf8DirEntry) { + if let Ok(metadata) = entry.metadata() { + self.size = Some(metadata.len()); + if let Ok(modified) = metadata.modified() { + self.modified = Some(modified.into()); + } + } + } +} + +impl PartialEq for LogFile { + fn eq(&self, other: &Self) -> bool { + self.path == other.path + } +} + +impl PartialOrd for LogFile { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for LogFile { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path.cmp(&other.path) + } +} + +impl LogFile { + fn new(path: Utf8PathBuf) -> LogFile { + LogFile { path, size: None, modified: None } + } +} + +/// All oxide logs for a given service in a given zone +#[derive(Debug, Clone, Default)] +pub struct SvcLogs { + /// The current logfile for a service. + /// e.g. `/var/svc/log/oxide-sled-agent:default.log` + pub current: Option, + + /// Any rotated log files in the default service directory or archived to + /// a debug directory. e.g. `/var/svc/log/oxide-sled-agent:default.log.0` + /// or `/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/debug/global/oxide-sled-agent:default.log.1697509861` + pub archived: Vec, + + /// Any files of special interest for a given service that don't reside in + /// standard paths or don't follow the naming conventions of SMF service + /// files. e.g. `/pool/ext/e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3/root/data/logs/cockroach.log` + pub extra: Vec, +} + +// These probably don't warrant newtypes. They are just to make the +// keys in maps a bit easier to read. +type ZoneName = String; +type ServiceName = String; + +pub struct Paths { + /// Links to the location of current and rotated log files for a given service + pub primary: Utf8PathBuf, + + /// Links to debug directories containing archived log files + pub debug: Vec, + + /// Links to directories containing extra files such as cockroachdb logs + /// that reside outside our SMF log and debug service log paths. + pub extra: Vec<(&'static str, Utf8PathBuf)>, +} + +pub struct Zones { + pub zones: BTreeMap, +} + +impl Zones { + pub fn load() -> Result { + let mut zones = BTreeMap::new(); + + // Describe where to find logs for the global zone + zones.insert( + "global".to_string(), + Paths { + primary: Utf8PathBuf::from("/var/svc/log"), + debug: vec![], + extra: vec![], + }, + ); + + // Describe where to find logs for the switch zone + zones.insert( + "oxz_switch".to_string(), + Paths { + primary: Utf8PathBuf::from("/zone/oxz_switch/root/var/svc/log"), + debug: vec![], + extra: vec![( + "dendrite", + "/zone/oxz_switch/root/var/dendrite".into(), + )], + }, + ); + + // Find the directories containing the primary and extra log files + // for all zones on external storage pools. + let pools = Pools::read()?; + for uuid in &pools.external { + let zones_path: Utf8PathBuf = + ["/pool/ext", &uuid.to_string(), "crypt/zone"].iter().collect(); + // Find the zones on the given pool + let Ok(entries) = zones_path.read_dir_utf8() else { + continue; + }; + for entry in entries { + let Ok(zone_entry) = entry else { + continue; + }; + let zone = zone_entry.file_name(); + + // Add the path to the current logs for the zone + let mut dir = zones_path.clone(); + dir.push(zone); + dir.push("root/var/svc/log"); + let mut paths = + Paths { primary: dir, debug: vec![], extra: vec![] }; + + // Add the path to the extra logs for the zone + if zone.starts_with("oxz_cockroachdb") { + let mut dir = zones_path.clone(); + dir.push(zone); + dir.push("root/data/logs"); + paths.extra.push(("cockroachdb", dir)); + } + + zones.insert(zone.to_string(), paths); + } + } + + // Find the directories containing the debug log files + for uuid in &pools.external { + let zones_path: Utf8PathBuf = + ["/pool/ext", &uuid.to_string(), "crypt/debug"] + .iter() + .collect(); + // Find the zones on the given pool + let Ok(entries) = zones_path.read_dir_utf8() else { + continue; + }; + for entry in entries { + let Ok(zone_entry) = entry else { + continue; + }; + let zone = zone_entry.file_name(); + let mut dir = zones_path.clone(); + dir.push(zone); + + // We only add debug paths if the zones have primary paths + if let Some(paths) = zones.get_mut(zone) { + paths.debug.push(dir); + } + } + } + + Ok(Zones { zones }) + } + + /// Return log files organized by service name + pub fn zone_logs( + &self, + zone: &str, + filter: Filter, + ) -> BTreeMap { + let mut output = BTreeMap::new(); + let Some(paths) = self.zones.get(zone) else { + return BTreeMap::new(); + }; + // Some rotated files exist in `paths.primary` that we track as + // 'archived'. These files have not yet been migrated into the debug + // directory. + if filter.current || filter.archived { + load_svc_logs(paths.primary.clone(), &mut output); + } + + if filter.archived { + for dir in paths.debug.clone() { + load_svc_logs(dir, &mut output); + } + } + if filter.extra { + for (svc_name, dir) in paths.extra.clone() { + load_extra_logs(dir, svc_name, &mut output); + } + } + output + } +} + +const OX_SMF_PREFIXES: [&str; 2] = ["oxide-", "system-illumos-"]; + +/// Return true if the provided file name appears to be a valid log file for an +/// Oxide-managed SMF service. +/// +/// Note that this operates on the _file name_. Any leading path components will +/// cause this check to return `false`. +pub fn is_oxide_smf_log_file(filename: impl AsRef) -> bool { + // Log files are named by the SMF services, with the `/` in the FMRI + // translated to a `-`. + let filename = filename.as_ref(); + OX_SMF_PREFIXES + .iter() + .any(|prefix| filename.starts_with(prefix) && filename.contains(".log")) +} + +// Parse an oxide smf log file name and return the name of the underlying +// service. +// +// If parsing fails for some reason, return `None`. +pub fn oxide_smf_service_name_from_log_file_name( + filename: &str, +) -> Option<&str> { + let Some((prefix, _suffix)) = filename.split_once(':') else { + // No ':' found + return None; + }; + + for ox_prefix in OX_SMF_PREFIXES { + if let Some(svc_name) = prefix.strip_prefix(ox_prefix) { + return Some(svc_name); + } + } + + None +} + +// Given a directory, find all oxide specific SMF service logs and return them +// mapped to their inferred service name. +fn load_svc_logs(dir: Utf8PathBuf, logs: &mut BTreeMap) { + let Ok(entries) = dir.read_dir_utf8() else { + return; + }; + for entry in entries { + let Ok(entry) = entry else { + continue; + }; + let filename = entry.file_name(); + + // Is this a log file we care about? + if is_oxide_smf_log_file(filename) { + let mut path = dir.clone(); + path.push(filename); + let mut logfile = LogFile::new(path); + + let Some(svc_name) = + oxide_smf_service_name_from_log_file_name(filename) + else { + // parsing failed + continue; + }; + + logfile.read_metadata(&entry); + if logfile.size == Some(0) { + // skip 0 size files + continue; + } + + let is_current = filename.ends_with(".log"); + + let svc_logs = + logs.entry(svc_name.to_string()).or_insert(SvcLogs::default()); + + if is_current { + svc_logs.current = Some(logfile.clone()); + } else { + svc_logs.archived.push(logfile.clone()); + } + } + } +} + +// Load any logs in non-standard paths. We grab all logs in `dir` and +// don't filter based on filename prefix as in `load_svc_logs`. +fn load_extra_logs( + dir: Utf8PathBuf, + svc_name: &str, + logs: &mut BTreeMap, +) { + let Ok(entries) = dir.read_dir_utf8() else { + return; + }; + + let svc_logs = + logs.entry(svc_name.to_string()).or_insert(SvcLogs::default()); + + for entry in entries { + let Ok(entry) = entry else { + continue; + }; + let filename = entry.file_name(); + let mut path = dir.clone(); + path.push(filename); + let mut logfile = LogFile::new(path); + logfile.read_metadata(&entry); + if logfile.size == Some(0) { + // skip 0 size files + continue; + } + svc_logs.extra.push(logfile); + } +} + +#[cfg(test)] +mod tests { + pub use super::is_oxide_smf_log_file; + pub use super::oxide_smf_service_name_from_log_file_name; + + #[test] + fn test_is_oxide_smf_log_file() { + assert!(is_oxide_smf_log_file("oxide-blah:default.log")); + assert!(is_oxide_smf_log_file("oxide-blah:default.log.0")); + assert!(is_oxide_smf_log_file("oxide-blah:default.log.1111")); + assert!(is_oxide_smf_log_file("system-illumos-blah:default.log")); + assert!(is_oxide_smf_log_file("system-illumos-blah:default.log.0")); + assert!(!is_oxide_smf_log_file("not-oxide-blah:default.log")); + assert!(!is_oxide_smf_log_file("not-system-illumos-blah:default.log")); + assert!(!is_oxide_smf_log_file("system-blah:default.log")); + } + + #[test] + fn test_oxide_smf_service_name_from_log_file_name() { + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name("oxide-blah:default.log") + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "oxide-blah:default.log.0" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "oxide-blah:default.log.1111" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "system-illumos-blah:default.log" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "system-illumos-blah:default.log.0" + ) + ); + assert!(oxide_smf_service_name_from_log_file_name( + "not-oxide-blah:default.log" + ) + .is_none()); + assert!(oxide_smf_service_name_from_log_file_name( + "not-system-illumos-blah:default.log" + ) + .is_none()); + assert!(oxide_smf_service_name_from_log_file_name( + "system-blah:default.log" + ) + .is_none()); + } +} diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 8296eace5c..e4a99095fd 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -20,6 +20,7 @@ libc.workspace = true macaddr.workspace = true omicron-common.workspace = true oxide-vpc.workspace = true +oxlog.workspace = true schemars.workspace = true serde.workspace = true slog.workspace = true diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ea80a6d34b..4b4107f529 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -14,6 +14,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use camino_tempfile::Utf8TempDir; use ipnetwork::IpNetwork; use omicron_common::backoff; +pub use oxlog::is_oxide_smf_log_file; use slog::{error, info, o, warn, Logger}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::sync::Arc; @@ -1411,24 +1412,8 @@ pub fn is_oxide_smf_service(fmri: impl AsRef) -> bool { SMF_SERVICE_PREFIXES.iter().any(|prefix| fmri.starts_with(prefix)) } -/// Return true if the provided file name appears to be a valid log file for an -/// Oxide-managed SMF service. -/// -/// Note that this operates on the _file name_. Any leading path components will -/// cause this check to return `false`. -pub fn is_oxide_smf_log_file(filename: impl AsRef) -> bool { - // Log files are named by the SMF services, with the `/` in the FMRI - // translated to a `-`. - const PREFIXES: [&str; 2] = ["oxide-", "system-illumos-"]; - let filename = filename.as_ref(); - PREFIXES - .iter() - .any(|prefix| filename.starts_with(prefix) && filename.contains(".log")) -} - #[cfg(test)] mod tests { - use super::is_oxide_smf_log_file; use super::is_oxide_smf_service; #[test] @@ -1438,15 +1423,4 @@ mod tests { assert!(!is_oxide_smf_service("svc:/system/blah:default")); assert!(!is_oxide_smf_service("svc:/not/oxide/blah:default")); } - - #[test] - fn test_is_oxide_smf_log_file() { - assert!(is_oxide_smf_log_file("oxide-blah:default.log")); - assert!(is_oxide_smf_log_file("oxide-blah:default.log.0")); - assert!(is_oxide_smf_log_file("oxide-blah:default.log.1111")); - assert!(is_oxide_smf_log_file("system-illumos-blah:default.log")); - assert!(is_oxide_smf_log_file("system-illumos-blah:default.log.0")); - assert!(!is_oxide_smf_log_file("not-oxide-blah:default.log")); - assert!(!is_oxide_smf_log_file("not-system-illumos-blah:default.log")); - } } diff --git a/package-manifest.toml b/package-manifest.toml index 7b12583437..fa6bba7a96 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -638,3 +638,11 @@ source.rust.binary_names = ["omdb"] source.rust.release = true output.type = "zone" output.intermediate_only = true + +[package.oxlog] +service_name = "oxlog" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["oxlog"] +source.rust.release = true +output.type = "tarball" diff --git a/tools/build-global-zone-packages.sh b/tools/build-global-zone-packages.sh index fc1ab42ade..b989e6a543 100755 --- a/tools/build-global-zone-packages.sh +++ b/tools/build-global-zone-packages.sh @@ -15,6 +15,7 @@ deps=( "$tarball_src_dir/mg-ddm-gz.tar" "$tarball_src_dir/propolis-server.tar.gz" "$tarball_src_dir/overlay.tar.gz" + "$tarball_src_dir/oxlog.tar" ) for dep in "${deps[@]}"; do if [[ ! -e $dep ]]; then @@ -48,6 +49,12 @@ mkdir -p "$pkg_dir" cd "$pkg_dir" tar -xvfz "$tarball_src_dir/mg-ddm-gz.tar" cd - +# Extract the oxlog tarball for re-packaging into the layered GZ archive. +pkg_dir="$tmp_gz/root/opt/oxide/oxlog" +mkdir -p "$pkg_dir" +cd "$pkg_dir" +tar -xvfz "$tarball_src_dir/oxlog.tar" +cd - # propolis should be bundled with this OS: Put the propolis-server zone image # under /opt/oxide in the gz. diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index e4733992bc..b6d61d9ea5 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -25,6 +25,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } +camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } @@ -130,6 +131,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } +camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } From 7bb6cd3d9b5b597424e3929f5f5dd8485fe8d454 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 23 Jan 2024 16:15:19 -0600 Subject: [PATCH 25/91] Bump web console (#4873) https://github.com/oxidecomputer/console/compare/644a45b8...b9013a33 * [b9013a33](https://github.com/oxidecomputer/console/commit/b9013a33) oxidecomputer/console#1898 * [fb9e9ca5](https://github.com/oxidecomputer/console/commit/fb9e9ca5) oxidecomputer/console#1904 * [9ae29498](https://github.com/oxidecomputer/console/commit/9ae29498) oxidecomputer/console#1897 * [6b894ceb](https://github.com/oxidecomputer/console/commit/6b894ceb) oxidecomputer/console#1901 * [d80d2e7c](https://github.com/oxidecomputer/console/commit/d80d2e7c) oxidecomputer/console#1886 * [2a7da0fa](https://github.com/oxidecomputer/console/commit/2a7da0fa) bump vite for security fix * [700e2700](https://github.com/oxidecomputer/console/commit/700e2700) oxidecomputer/console#1893 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index e76c29c9a0..0dc0024f2c 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="644a45b8e4ab673ad51754e372277abc3ddfd036" -SHA2="a059917d826081df04efd44186f6dfeef0099fc53f1e8618796ea990a510f4b0" +COMMIT="b9013a33eaa3f5efdcd5c7d244e36a54e7222295" +SHA2="bebb9800ff94c42897d54faac8c2a3f89b2b0e927ebf75ec74223b6163e4209d" From 0de612278713efe3e48c990dd516125402eedb31 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 05:59:51 +0000 Subject: [PATCH 26/91] Update taiki-e/install-action digest to cf2d7f1 (#4879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`bd4f144` -> `cf2d7f1`](https://togithub.com/taiki-e/install-action/compare/bd4f144...cf2d7f1) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index a4ebc74843..06da0395a1 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@bd4f14420660e33ca2929e5c0306a8367173c1ee # v2 + uses: taiki-e/install-action@cf2d7f1118304815479579570ad3ec572fe94523 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From a6a74d3efcbb9db3b00d2f1ae03dd1b8f86cbfba Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 00:42:54 -0800 Subject: [PATCH 27/91] [nexus] simplify BackgroundTask trait (#4877) Just using `'a` everywhere is semantically identical to the current pattern because all the lifetime parameters are covariant. --- nexus/src/app/background/common.rs | 29 ++++++------------- nexus/src/app/background/dns_config.rs | 10 ++----- nexus/src/app/background/dns_propagation.rs | 10 ++----- nexus/src/app/background/dns_servers.rs | 10 ++----- .../src/app/background/external_endpoints.rs | 10 ++----- .../app/background/inventory_collection.rs | 10 ++----- nexus/src/app/background/nat_cleanup.rs | 10 ++----- nexus/src/app/background/phantom_disks.rs | 10 ++----- 8 files changed, 30 insertions(+), 69 deletions(-) diff --git a/nexus/src/app/background/common.rs b/nexus/src/app/background/common.rs index 7b05eab61b..4fcce74714 100644 --- a/nexus/src/app/background/common.rs +++ b/nexus/src/app/background/common.rs @@ -153,13 +153,10 @@ use tokio::time::MissedTickBehavior; /// /// See module-level documentation for details. pub trait BackgroundTask: Send + Sync { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c; + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value>; } /// Drives the execution of background tasks @@ -499,14 +496,10 @@ mod test { } impl BackgroundTask for ReportingTask { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - _: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + _: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let count = self.counter; self.counter += 1; @@ -684,14 +677,10 @@ mod test { } impl BackgroundTask for PausingTask { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - _: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + _: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let count = self.counter; self.counter += 1; diff --git a/nexus/src/app/background/dns_config.rs b/nexus/src/app/background/dns_config.rs index 805ae813fe..959cf1843e 100644 --- a/nexus/src/app/background/dns_config.rs +++ b/nexus/src/app/background/dns_config.rs @@ -43,14 +43,10 @@ impl DnsConfigWatcher { } impl BackgroundTask for DnsConfigWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Set up a logger for this activation that includes metadata about // the current generation. diff --git a/nexus/src/app/background/dns_propagation.rs b/nexus/src/app/background/dns_propagation.rs index 45776df21b..cf7a399999 100644 --- a/nexus/src/app/background/dns_propagation.rs +++ b/nexus/src/app/background/dns_propagation.rs @@ -36,14 +36,10 @@ impl DnsPropagator { } impl BackgroundTask for DnsPropagator { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Read the DNS configuration and server list from the other // background tasks that assemble these. Clone them because diff --git a/nexus/src/app/background/dns_servers.rs b/nexus/src/app/background/dns_servers.rs index 3a75c09302..97fb3510b7 100644 --- a/nexus/src/app/background/dns_servers.rs +++ b/nexus/src/app/background/dns_servers.rs @@ -57,14 +57,10 @@ impl DnsServersWatcher { } impl BackgroundTask for DnsServersWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Set up a logger for this activation that includes metadata about // the current generation. diff --git a/nexus/src/app/background/external_endpoints.rs b/nexus/src/app/background/external_endpoints.rs index 53401c16de..ed530e0775 100644 --- a/nexus/src/app/background/external_endpoints.rs +++ b/nexus/src/app/background/external_endpoints.rs @@ -42,14 +42,10 @@ impl ExternalEndpointsWatcher { } impl BackgroundTask for ExternalEndpointsWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index 5c52fa519b..044e5a2234 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -51,14 +51,10 @@ impl InventoryCollector { } impl BackgroundTask for InventoryCollector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { match inventory_activate( opctx, diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/nat_cleanup.rs index 1691d96a4b..5014dc0553 100644 --- a/nexus/src/app/background/nat_cleanup.rs +++ b/nexus/src/app/background/nat_cleanup.rs @@ -32,14 +32,10 @@ impl Ipv4NatGarbageCollector { } impl BackgroundTask for Ipv4NatGarbageCollector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; diff --git a/nexus/src/app/background/phantom_disks.rs b/nexus/src/app/background/phantom_disks.rs index b038d70ac6..48688838e5 100644 --- a/nexus/src/app/background/phantom_disks.rs +++ b/nexus/src/app/background/phantom_disks.rs @@ -37,14 +37,10 @@ impl PhantomDiskDetector { } impl BackgroundTask for PhantomDiskDetector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; warn!(&log, "phantom disk task started"); From 95778ebb270929469e208ad4d7052101e372aad2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:50:52 +0000 Subject: [PATCH 28/91] Update Rust crate argon2 to 0.5.3 (#4881) --- Cargo.lock | 8 ++++---- passwords/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec594abb8f..d58b5f6cd9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,9 +188,9 @@ checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] name = "argon2" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ba4cac0a46bc1d2912652a751c47f2a9f3a7fe89bcae2275d418f5270402f9" +checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" dependencies = [ "base64ct", "blake2", @@ -1100,9 +1100,9 @@ checksum = "f9236877021b66ad90f833d8a73a7acb702b985b64c5986682d9f1f1a184f0fb" [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] diff --git a/passwords/Cargo.toml b/passwords/Cargo.toml index 8adcf75a2e..4f3922a7a5 100644 --- a/passwords/Cargo.toml +++ b/passwords/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] -argon2 = { version = "0.5.2", features = ["alloc", "password-hash", "rand", "std"] } +argon2 = { version = "0.5.3", features = ["alloc", "password-hash", "rand", "std"] } rand.workspace = true thiserror.workspace = true schemars.workspace = true From d978cd6cedc1653643c7ab2ab529634a10b46c0d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 09:39:53 +0000 Subject: [PATCH 29/91] Update Rust crate tabled to 0.15 (#4847) --- Cargo.lock | 12 ++++++------ Cargo.toml | 2 +- oximeter/db/src/bin/oxdb.rs | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d58b5f6cd9..d179ad4b0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5726,9 +5726,9 @@ dependencies = [ [[package]] name = "papergrid" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ccbe15f2b6db62f9a9871642746427e297b0ceb85f9a7f1ee5ff47d184d0c8" +checksum = "9ad43c07024ef767f9160710b3a6773976194758c7919b17e63b863db0bdf7fb" dependencies = [ "bytecount", "fnv", @@ -8528,9 +8528,9 @@ dependencies = [ [[package]] name = "tabled" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfe9c3632da101aba5131ed63f9eed38665f8b3c68703a6bb18124835c1a5d22" +checksum = "4c998b0c8b921495196a48aabaf1901ff28be0760136e31604f7967b0792050e" dependencies = [ "papergrid", "tabled_derive", @@ -8539,9 +8539,9 @@ dependencies = [ [[package]] name = "tabled_derive" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f688a08b54f4f02f0a3c382aefdb7884d3d69609f785bd253dc033243e3fe4" +checksum = "4c138f99377e5d653a371cdad263615634cfc8467685dfe8e73e2b8e98f44b17" dependencies = [ "heck 0.4.1", "proc-macro-error", diff --git a/Cargo.toml b/Cargo.toml index 3d0be64380..2e538656ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -371,7 +371,7 @@ supports-color = "2.1.0" swrite = "0.1.0" libsw = { version = "3.3.1", features = ["tokio"] } syn = { version = "2.0" } -tabled = "0.14" +tabled = "0.15.0" tar = "0.4" tempdir = "0.3" tempfile = "3.9" diff --git a/oximeter/db/src/bin/oxdb.rs b/oximeter/db/src/bin/oxdb.rs index 17f05c24e2..02a8054da0 100644 --- a/oximeter/db/src/bin/oxdb.rs +++ b/oximeter/db/src/bin/oxdb.rs @@ -391,7 +391,7 @@ async fn describe_virtual_table( } let mut builder = tabled::builder::Builder::default(); - builder.set_header(cols); + builder.push_record(cols); // first record is the header builder.push_record(types); println!( "{}", @@ -553,7 +553,7 @@ async fn sql_shell( println!(); let mut builder = tabled::builder::Builder::default(); - builder.set_header(&table.column_names); + builder.push_record(&table.column_names); // first record is the header for row in table.rows.iter() { builder.push_record( row.iter().map(ToString::to_string), From 255cb83822098f80c2f60811152922259ca8e82c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 09:57:33 +0000 Subject: [PATCH 30/91] Update Rust crate ciborium to 0.2.2 (#4882) --- Cargo.lock | 20 ++++++++++++-------- Cargo.toml | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d179ad4b0f..4d531366f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -892,9 +892,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -903,15 +903,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", @@ -2726,9 +2726,13 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", +] [[package]] name = "hash32" diff --git a/Cargo.toml b/Cargo.toml index 2e538656ac..5364f4b4e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,7 +170,7 @@ camino = "1.1" camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" chacha20poly1305 = "0.10.1" -ciborium = "0.2.1" +ciborium = "0.2.2" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.4", features = ["cargo", "derive", "env", "wrap_help"] } From 8f1134a911532792cac29fdea7ad42b9403ac233 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 08:36:41 -0800 Subject: [PATCH 31/91] [wicket] update to ratatui 0.25.0 (#4876) This was mostly a smooth upgrade, with just a small change required to our use of tui-tree-widget. Tested the TUI locally -- works great. --- Cargo.lock | 51 +++++++++++++++------ Cargo.toml | 3 +- wicket/Cargo.toml | 2 +- wicket/src/runner.rs | 1 - wicket/src/state/update.rs | 1 + wicket/src/ui/controls/mod.rs | 4 +- wicket/src/ui/main.rs | 3 +- wicket/src/ui/panes/overview.rs | 3 +- wicket/src/ui/panes/rack_setup.rs | 2 +- wicket/src/ui/panes/update.rs | 68 ++++++++++++++++------------ wicket/src/ui/splash.rs | 3 +- wicket/src/ui/widgets/fade.rs | 11 +---- wicket/src/ui/widgets/status_view.rs | 3 +- workspace-hack/Cargo.toml | 6 ++- 14 files changed, 96 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d531366f6..f5b80f9262 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,6 +74,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -2772,6 +2778,10 @@ name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "headers" @@ -3595,15 +3605,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.0" @@ -3885,6 +3886,15 @@ dependencies = [ "zerocopy 0.6.4", ] +[[package]] +name = "lru" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7" +dependencies = [ + "hashbrown 0.14.2", +] + [[package]] name = "lru-cache" version = "0.1.2" @@ -5213,6 +5223,7 @@ dependencies = [ "getrandom 0.2.10", "group", "hashbrown 0.13.2", + "hashbrown 0.14.2", "hex", "hmac", "hyper 0.14.27", @@ -6631,16 +6642,18 @@ dependencies = [ [[package]] name = "ratatui" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad" +checksum = "a5659e52e4ba6e07b2dad9f1158f578ef84a73762625ddb51536019f34d180eb" dependencies = [ "bitflags 2.4.0", "cassowary", "crossterm", "indoc 2.0.3", - "itertools 0.11.0", + "itertools 0.12.0", + "lru", "paste", + "stability", "strum", "unicode-segmentation", "unicode-width", @@ -8276,6 +8289,16 @@ dependencies = [ "syn 2.0.46", ] +[[package]] +name = "stability" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebd1b177894da2a2d9120208c3386066af06a488255caabc5de8ddca22dbc3ce" +dependencies = [ + "quote", + "syn 1.0.109", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -9351,9 +9374,9 @@ dependencies = [ [[package]] name = "tui-tree-widget" -version = "0.13.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f01f9172fb8f4f925fb1e259c2f411be14af031ab8b35d517fd05cb78c0784d5" +checksum = "136011b328c4f392499a02c4b5b78d509fb297bf9c10f2bda5d11d65cb946e4c" dependencies = [ "ratatui", "unicode-width", diff --git a/Cargo.toml b/Cargo.toml index 5364f4b4e1..d47b5727ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -310,7 +310,7 @@ propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev proptest = "1.4.0" quote = "1.0" rand = "0.8.5" -ratatui = "0.23.0" +ratatui = "0.25.0" rayon = "1.8" rcgen = "0.12.0" reedline = "0.28.0" @@ -396,6 +396,7 @@ trust-dns-server = "0.22" trybuild = "1.0.89" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } +tui-tree-widget = "0.16.0" unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } diff --git a/wicket/Cargo.toml b/wicket/Cargo.toml index efb8e51dff..140c011511 100644 --- a/wicket/Cargo.toml +++ b/wicket/Cargo.toml @@ -37,7 +37,7 @@ tokio = { workspace = true, features = ["full"] } tokio-util.workspace = true toml.workspace = true toml_edit.workspace = true -tui-tree-widget = "0.13.0" +tui-tree-widget.workspace = true unicode-width.workspace = true zeroize.workspace = true diff --git a/wicket/src/runner.rs b/wicket/src/runner.rs index 32fabde53e..e83d321459 100644 --- a/wicket/src/runner.rs +++ b/wicket/src/runner.rs @@ -34,7 +34,6 @@ use crate::{Action, Cmd, Event, KeyHandler, Recorder, State, TICK_INTERVAL}; // We can avoid a bunch of unnecessary type parameters by picking them ahead of time. pub type Term = Terminal>; -pub type Frame<'a> = ratatui::Frame<'a, CrosstermBackend>; const MAX_RECORDED_EVENTS: usize = 10000; diff --git a/wicket/src/state/update.rs b/wicket/src/state/update.rs index 6d8a168614..77bbdd83d2 100644 --- a/wicket/src/state/update.rs +++ b/wicket/src/state/update.rs @@ -333,6 +333,7 @@ impl UpdateItem { } } +#[derive(Debug, Copy, Clone)] pub enum UpdateState { NotStarted, Starting, diff --git a/wicket/src/ui/controls/mod.rs b/wicket/src/ui/controls/mod.rs index 4305fb5809..a2682b8052 100644 --- a/wicket/src/ui/controls/mod.rs +++ b/wicket/src/ui/controls/mod.rs @@ -2,8 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::{Action, Cmd, Frame, State}; -use ratatui::layout::Rect; +use crate::{Action, Cmd, State}; +use ratatui::{layout::Rect, Frame}; /// A [`Control`] is the an item on a screen that can be selected and interacted with. /// Control's render [`ratatui::widgets::Widget`]s when drawn. diff --git a/wicket/src/ui/main.rs b/wicket/src/ui/main.rs index 58ea6c1771..379cbd03af 100644 --- a/wicket/src/ui/main.rs +++ b/wicket/src/ui/main.rs @@ -8,11 +8,12 @@ use super::{Control, OverviewPane, RackSetupPane, StatefulList, UpdatePane}; use crate::ui::defaults::colors::*; use crate::ui::defaults::style; use crate::ui::widgets::Fade; -use crate::{Action, Cmd, Frame, State, Term}; +use crate::{Action, Cmd, State, Term}; use ratatui::layout::{Alignment, Constraint, Direction, Layout, Rect}; use ratatui::style::{Modifier, Style}; use ratatui::text::{Line, Span}; use ratatui::widgets::{Block, BorderType, Borders, List, ListItem, Paragraph}; +use ratatui::Frame; use slog::{o, Logger}; use wicketd_client::types::GetLocationResponse; diff --git a/wicket/src/ui/panes/overview.rs b/wicket/src/ui/panes/overview.rs index e8cf50bb32..f2d4d4a7ab 100644 --- a/wicket/src/ui/panes/overview.rs +++ b/wicket/src/ui/panes/overview.rs @@ -16,11 +16,12 @@ use crate::ui::defaults::style; use crate::ui::widgets::IgnitionPopup; use crate::ui::widgets::{BoxConnector, BoxConnectorKind, Rack}; use crate::ui::wrap::wrap_text; -use crate::{Action, Cmd, Frame, State}; +use crate::{Action, Cmd, State}; use ratatui::layout::{Constraint, Direction, Layout, Rect}; use ratatui::style::Style; use ratatui::text::{Line, Span, Text}; use ratatui::widgets::{Block, BorderType, Borders, Paragraph}; +use ratatui::Frame; use wicketd_client::types::RotState; use wicketd_client::types::SpComponentCaboose; use wicketd_client::types::SpComponentInfo; diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 086d01ce9d..ab85c63819 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -16,7 +16,6 @@ use crate::ui::widgets::PopupScrollOffset; use crate::Action; use crate::Cmd; use crate::Control; -use crate::Frame; use crate::State; use ratatui::layout::Constraint; use ratatui::layout::Direction; @@ -29,6 +28,7 @@ use ratatui::widgets::Block; use ratatui::widgets::BorderType; use ratatui::widgets::Borders; use ratatui::widgets::Paragraph; +use ratatui::Frame; use std::borrow::Cow; use wicketd_client::types::Baseboard; use wicketd_client::types::CurrentRssUserConfig; diff --git a/wicket/src/ui/panes/update.rs b/wicket/src/ui/panes/update.rs index d14b90dfab..be21984997 100644 --- a/wicket/src/ui/panes/update.rs +++ b/wicket/src/ui/panes/update.rs @@ -17,7 +17,7 @@ use crate::ui::widgets::{ PopupScrollOffset, StatusView, }; use crate::ui::wrap::wrap_text; -use crate::{Action, Cmd, Frame, State}; +use crate::{Action, Cmd, State}; use indexmap::IndexMap; use omicron_common::api::internal::nexus::KnownArtifactKind; use ratatui::layout::{Alignment, Constraint, Direction, Layout, Rect}; @@ -26,6 +26,7 @@ use ratatui::widgets::{ Block, BorderType, Borders, Cell, List, ListItem, ListState, Paragraph, Row, Table, }; +use ratatui::Frame; use slog::{info, o, Logger}; use tui_tree_widget::{Tree, TreeItem, TreeState}; use update_engine::{ @@ -148,8 +149,11 @@ pub struct UpdatePane { /// TODO: Move following state into global `State` so that recorder snapshots /// capture all state. - tree_state: TreeState, - items: Vec>, + /// + /// TODO: The generic parameter is carried over from earlier versions + /// of tui-tree-widget, but there's likely a better index type. + tree_state: TreeState, + items: Vec>, // Per-component update state that isn't serializable. component_state: BTreeMap, @@ -175,14 +179,20 @@ impl UpdatePane { pub fn new(log: &Logger) -> UpdatePane { let log = log.new(o!("component" => "UpdatePane")); let mut tree_state = TreeState::default(); - tree_state.select_first(); + let items = ALL_COMPONENT_IDS + .iter() + .enumerate() + .map(|(index, id)| { + TreeItem::new(index, id.to_string_uppercase(), vec![]) + .expect("no children so no duplicate identifiers") + }) + .collect::>(); + tree_state.select_first(&items); + UpdatePane { log, tree_state, - items: ALL_COMPONENT_IDS - .iter() - .map(|id| TreeItem::new(id.to_string_uppercase(), vec![])) - .collect(), + items, help: vec![ ("Expand", ""), ("Collapse", ""), @@ -826,7 +836,8 @@ impl UpdatePane { .update_state .items .iter() - .map(|(id, states)| { + .enumerate() + .map(|(index, (id, states))| { let children: Vec<_> = states .iter() .flat_map(|(component, s)| { @@ -834,9 +845,8 @@ impl UpdatePane { artifact_version(id, component, &versions); let installed_versions = all_installed_versions(id, component, inventory); - let contents_rect = self.contents_rect; installed_versions.into_iter().map(move |v| { - let spans = vec![ + vec![ Span::styled(v.title, style::selected()), Span::styled(v.version, style::selected_line()), Span::styled( @@ -844,17 +854,20 @@ impl UpdatePane { style::selected(), ), Span::styled(s.to_string(), s.style()), - ]; - TreeItem::new_leaf(align_by( - 0, - MAX_COLUMN_WIDTH, - contents_rect, - spans, - )) + ] }) }) + .enumerate() + .map(|(leaf_index, spans)| { + let contents_rect = self.contents_rect; + TreeItem::new_leaf( + leaf_index, + align_by(0, MAX_COLUMN_WIDTH, contents_rect, spans), + ) + }) .collect(); - TreeItem::new(id.to_string_uppercase(), children) + TreeItem::new(index, id.to_string_uppercase(), children) + .expect("tree does not contain duplicate identifiers") }) .collect(); } @@ -1365,6 +1378,7 @@ impl UpdatePane { // Draw the contents let tree = Tree::new(self.items.clone()) + .expect("tree does not have duplicate identifiers") .block(block.clone().borders(Borders::LEFT | Borders::RIGHT)) .style(style::plain_text()) .highlight_style(style::highlighted()); @@ -1421,12 +1435,11 @@ impl UpdatePane { Constraint::Length(cell_width), Constraint::Length(cell_width), ]; - let header_table = Table::new(std::iter::empty()) + let header_table = Table::new(std::iter::empty(), &width_constraints) .header( Row::new(vec!["COMPONENT", "VERSION", "TARGET", "STATUS"]) .style(header_style), ) - .widths(&width_constraints) .block(block.clone().title("OVERVIEW (* = active)")); frame.render_widget(header_table, self.table_headers_rect); @@ -1458,12 +1471,11 @@ impl UpdatePane { ]) }) }); - let version_table = - Table::new(version_rows).widths(&width_constraints).block( - block - .clone() - .borders(Borders::LEFT | Borders::RIGHT | Borders::BOTTOM), - ); + let version_table = Table::new(version_rows, &width_constraints).block( + block + .clone() + .borders(Borders::LEFT | Borders::RIGHT | Borders::BOTTOM), + ); frame.render_widget(version_table, self.status_view_version_rect); // Ensure the version table is connected to the table headers @@ -2413,7 +2425,7 @@ impl Control for UpdatePane { Some(Action::Redraw) } Cmd::GotoTop => { - self.tree_state.select_first(); + self.tree_state.select_first(&self.items); state.rack_state.selected = ALL_COMPONENT_IDS[0]; Some(Action::Redraw) } diff --git a/wicket/src/ui/splash.rs b/wicket/src/ui/splash.rs index cc8ab0bff8..9da9fa8648 100644 --- a/wicket/src/ui/splash.rs +++ b/wicket/src/ui/splash.rs @@ -10,9 +10,10 @@ use super::defaults::colors::*; use super::defaults::dimensions::RectExt; use super::defaults::style; use super::widgets::{Logo, LogoState, LOGO_HEIGHT, LOGO_WIDTH}; -use crate::{Cmd, Frame, Term}; +use crate::{Cmd, Term}; use ratatui::style::Style; use ratatui::widgets::Block; +use ratatui::Frame; const TOTAL_FRAMES: usize = 100; diff --git a/wicket/src/ui/widgets/fade.rs b/wicket/src/ui/widgets/fade.rs index d1669cd5b7..5462a4ecf2 100644 --- a/wicket/src/ui/widgets/fade.rs +++ b/wicket/src/ui/widgets/fade.rs @@ -9,15 +9,6 @@ pub struct Fade {} impl Widget for Fade { fn render(self, area: Rect, buf: &mut Buffer) { - for x in area.left()..area.right() { - for y in area.top()..area.bottom() { - buf.set_string( - x, - y, - buf.get(x, y).symbol.clone(), - style::faded_background(), - ); - } - } + buf.set_style(area, style::faded_background()); } } diff --git a/wicket/src/ui/widgets/status_view.rs b/wicket/src/ui/widgets/status_view.rs index 7418fed512..b9e981c9bc 100644 --- a/wicket/src/ui/widgets/status_view.rs +++ b/wicket/src/ui/widgets/status_view.rs @@ -6,10 +6,9 @@ use ratatui::{ layout::{Alignment, Rect}, text::Text, widgets::{Block, Borders, List, Paragraph, StatefulWidget, Widget}, + Frame, }; -use crate::Frame; - use super::{BoxConnector, BoxConnectorKind}; /// A displayer for the status view. diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b6d61d9ea5..a15b972554 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -54,7 +54,8 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway- generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } -hashbrown = { version = "0.13.2" } +hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } @@ -160,7 +161,8 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway- generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } -hashbrown = { version = "0.13.2" } +hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } From e85af5b1e51bba3f8d0c8eea633307b90a2cb2a8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:37:19 -0800 Subject: [PATCH 32/91] Update Rust crate uuid to 1.7.0 (#4885) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5b80f9262..2c58c7c33c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9705,9 +9705,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom 0.2.10", "serde", diff --git a/Cargo.toml b/Cargo.toml index d47b5727ad..f5e5a13e0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -401,7 +401,7 @@ unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } usdt = "0.3" -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } walkdir = "2.4" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a15b972554..33434766d3 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -114,7 +114,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -222,7 +222,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } From 7bd6ea60e504c288a63db79a0deb44944897e84c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:37:41 -0800 Subject: [PATCH 33/91] Update Rust crate serde_with to 3.5.1 (#4884) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c58c7c33c..bdb288be18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7693,9 +7693,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.4.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cd236ccc1b7a29e7e2739f27c0b2dd199804abc4290e32f59f3b68d6405c23" +checksum = "f5c9fdb6b00a489875b22efd4b78fe2b363b72265cc5f6eb2e2b9ee270e6140c" dependencies = [ "base64", "chrono", @@ -7710,9 +7710,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.4.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93634eb5f75a2323b16de4748022ac4297f9e76b6dced2be287a099f41b5e788" +checksum = "dbff351eb4b33600a2e138dfa0b10b65a238ea8ff8fb2387c422c5022a3e8298" dependencies = [ "darling 0.20.3", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index f5e5a13e0c..fc8afecc9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -336,7 +336,7 @@ serde_json = "1.0.111" serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.4.0" +serde_with = "3.5.1" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" From c392c76f0cc8a0bdaca5cf7690c6d95dd071e7db Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:38:34 -0800 Subject: [PATCH 34/91] Update Rust crate regex to 1.10.3 (#4883) --- Cargo.lock | 13 +++++++------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 10 ++++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bdb288be18..2451404a1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5182,6 +5182,7 @@ name = "omicron-workspace-hack" version = "0.1.0" dependencies = [ "ahash", + "aho-corasick", "anyhow", "base16ct", "bit-set", @@ -5254,7 +5255,7 @@ dependencies = [ "rand 0.8.5", "rand_chacha 0.3.1", "regex", - "regex-automata 0.4.3", + "regex-automata 0.4.4", "regex-syntax 0.8.2", "reqwest", "ring 0.17.7", @@ -6780,13 +6781,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.3", + "regex-automata 0.4.4", "regex-syntax 0.8.2", ] @@ -6804,9 +6805,9 @@ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" dependencies = [ "aho-corasick", "memchr", diff --git a/Cargo.toml b/Cargo.toml index fc8afecc9c..fb4327f575 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -315,7 +315,7 @@ rayon = "1.8" rcgen = "0.12.0" reedline = "0.28.0" ref-cast = "1.0" -regex = "1.10.2" +regex = "1.10.3" regress = "0.7.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.7" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 33434766d3..cda4426c9b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -15,6 +15,7 @@ publish = false ### BEGIN HAKARI SECTION [dependencies] ahash = { version = "0.8.6" } +aho-corasick = { version = "1.0.4" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -83,8 +84,8 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.2" } -regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } +regex = { version = "1.10.3" } +regex-automata = { version = "0.4.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } @@ -122,6 +123,7 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [build-dependencies] ahash = { version = "0.8.6" } +aho-corasick = { version = "1.0.4" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -190,8 +192,8 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.2" } -regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } +regex = { version = "1.10.3" } +regex-automata = { version = "0.4.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } From 9ac047e54985d581a4e6b3f77077bd69d5ff20f5 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 24 Jan 2024 12:50:26 -0800 Subject: [PATCH 35/91] updates for tunnel routing (#3859) --- .github/buildomat/jobs/deploy.sh | 21 +++- Cargo.lock | 19 ++- Cargo.toml | 4 +- clients/ddm-admin-client/src/lib.rs | 18 ++- common/src/api/external/mod.rs | 2 +- docs/boundary-services-a-to-z.adoc | 117 ++---------------- docs/networking.adoc | 10 +- illumos-utils/src/opte/mod.rs | 20 --- illumos-utils/src/opte/port_manager.rs | 3 - nexus/src/app/rack.rs | 76 +----------- .../app/sagas/switch_port_settings_apply.rs | 84 +++++++++++++ .../app/sagas/switch_port_settings_clear.rs | 112 ++++++++++++++++- .../app/sagas/switch_port_settings_common.rs | 41 +----- package-manifest.toml | 12 +- sled-agent/src/bootstrap/early_networking.rs | 94 ++++++-------- smf/sled-agent/non-gimlet/config-rss.toml | 2 +- tools/ci_check_opte_ver.sh | 5 + tools/install_opte.sh | 10 ++ tools/maghemite_ddm_openapi_version | 4 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- tools/opte_version | 2 +- tools/opte_version_override | 5 + 23 files changed, 342 insertions(+), 327 deletions(-) create mode 100644 tools/opte_version_override diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index f4f1e0a999..e69cfb0078 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.27" +#: target = "lab-2.0-opte-0.28" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", @@ -33,6 +33,9 @@ _exit_trap() { local status=$? [[ $status -eq 0 ]] && exit 0 + # XXX paranoia + pfexec cp /tmp/opteadm /opt/oxide/opte/bin/opteadm + set +o errexit set -o xtrace banner evidence @@ -50,6 +53,7 @@ _exit_trap() { standalone \ dump-state pfexec /opt/oxide/opte/bin/opteadm list-ports + pfexec /opt/oxide/opte/bin/opteadm dump-v2b z_swadm link ls z_swadm addr list z_swadm route list @@ -97,6 +101,19 @@ z_swadm () { pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm $@ } +# XXX remove. This is just to test against a development branch of OPTE in CI. +set +x +OPTE_COMMIT="73d4669ea213d0b7aca35c4babb6fd09ed51d29e" +curl -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde +pfexec rem_drv xde || true +pfexec mv xde /kernel/drv/amd64/xde +pfexec add_drv xde || true +curl -sSfOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HM09S4M15WNXB2B2MX8R1GBT/yLalJU5vT4S4IEpwSeY4hPuspxw3JcINokZmlfNU14npHkzG/01HM09SJ2RQSFGW7MVKC9JKZ8D/01HM0A58D888AJ7YP6N1Q6T6ZD/opteadm +chmod +x opteadm +cp opteadm /tmp/opteadm +pfexec mv opteadm /opt/oxide/opte/bin/opteadm +set -x + # # XXX work around 14537 (UFS should not allow directories to be unlinked) which # is probably not yet fixed in xde branch? Once the xde branch merges from @@ -236,7 +253,7 @@ infra_ip_last = \"$UPLINK_IP\" /^routes/c\\ routes = \\[{nexthop = \"$GATEWAY_IP\", destination = \"0.0.0.0/0\"}\\] /^addresses/c\\ -addresses = \\[\"$UPLINK_IP/32\"\\] +addresses = \\[\"$UPLINK_IP/24\"\\] } " pkg/config-rss.toml diff -u pkg/config-rss.toml{~,} || true diff --git a/Cargo.lock b/Cargo.lock index 2451404a1e..0c3eb15179 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3257,7 +3257,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" [[package]] name = "illumos-utils" @@ -3657,7 +3657,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "quote", "syn 2.0.46", @@ -5411,7 +5411,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "cfg-if", "dyn-clone", @@ -5427,7 +5427,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -5439,7 +5439,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "libc", "libnet", @@ -5513,10 +5513,12 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ + "cfg-if", "illumos-sys-hdrs", "opte", + "poptrie", "serde", "smoltcp 0.11.0", "zerocopy 0.7.31", @@ -6153,6 +6155,11 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "poptrie" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/poptrie?branch=multipath#ca52bef3f87ff1a67d81b3c6e601dcb5fdbcc165" + [[package]] name = "portable-atomic" version = "1.4.3" diff --git a/Cargo.toml b/Cargo.toml index fb4327f575..093e972b42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -270,7 +270,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.10.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" @@ -278,7 +278,7 @@ openapiv3 = "2.0.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs index 93248c73a1..c32345d1dc 100644 --- a/clients/ddm-admin-client/src/lib.rs +++ b/clients/ddm-admin-client/src/lib.rs @@ -20,7 +20,7 @@ pub use inner::types; pub use inner::Error; use either::Either; -use inner::types::Ipv6Prefix; +use inner::types::{Ipv6Prefix, TunnelOrigin}; use inner::Client as InnerClient; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -108,6 +108,22 @@ impl Client { }); } + pub fn advertise_tunnel_endpoint(&self, endpoint: TunnelOrigin) { + let me = self.clone(); + tokio::spawn(async move { + retry_notify(retry_policy_internal_service_aggressive(), || async { + me.inner.advertise_tunnel_endpoints(&vec![endpoint.clone()]).await?; + Ok(()) + }, |err, duration| { + info!( + me.log, + "Failed to notify ddmd of tunnel endpoint (retry in {duration:?}"; + "err" => %err, + ); + }).await.unwrap(); + }); + } + /// Returns the addresses of connected sleds. /// /// Note: These sleds have not yet been verified. diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 68fcb0f9fa..a8aff00afa 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -3253,7 +3253,7 @@ mod test { let net_des = serde_json::from_str::(&ser).unwrap(); assert_eq!(net, net_des); - let net_str = "fd00:99::1/64"; + let net_str = "fd00:47::1/64"; let net = IpNet::from_str(net_str).unwrap(); let ser = serde_json::to_string(&net).unwrap(); diff --git a/docs/boundary-services-a-to-z.adoc b/docs/boundary-services-a-to-z.adoc index 6f4f2fcea6..e4c47ac7f9 100644 --- a/docs/boundary-services-a-to-z.adoc +++ b/docs/boundary-services-a-to-z.adoc @@ -1,115 +1,18 @@ = Boundary Services A-Z -NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded into xref:how-to-run.adoc[the main how-to-run docs]. +NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded +into xref:how-to-run.adoc[the main how-to-run docs]. -The virtual hardware making up SoftNPU is a bit different than what was previously used. What we now have looks like this. +The virtual hardware making up SoftNPU is depicted in the diagram below. image::plumbing.png[] -The `softnpu` zone will be configured and launched during the `create_virtual_hardware.sh` script. +The `softnpu` zone will be configured and launched during the +`create_virtual_hardware.sh` script. Once the control plane is running, `softnpu` can be configured via `dendrite` -using the `swadm` binary located in the `oxz_switch` zone. -An example script is provided in `tools/scrimlet/softnpu-init.sh`. -This script should work without modification for basic development setups, -but feel free to tweak it as needed. - ----- -$ ./tools/scrimlet/softnpu-init.sh -++ netstat -rn -f inet -++ grep default -++ awk -F ' ' '{print $2}' -+ GATEWAY_IP=10.85.0.1 -+ echo 'Using 10.85.0.1 as gateway ip' -Using 10.85.0.1 as gateway ip -++ arp 10.85.0.1 -++ awk -F ' ' '{print $4}' -+ GATEWAY_MAC=68:d7:9a:1f:77:a1 -+ echo 'Using 68:d7:9a:1f:77:a1 as gateway mac' -Using 68:d7:9a:1f:77:a1 as gateway mac -+ z_swadm link create rear0 --speed 100G --fec RS -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create rear0 --speed 100G --fec RS -+ z_swadm link create qsfp0 --speed 100G --fec RS -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create qsfp0 --speed 100G --fec RS -+ z_swadm addr add rear0/0 fe80::aae1:deff:fe01:701c -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fe80::aae1:deff:fe01:701c -+ z_swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d -+ z_swadm addr add rear0/0 fd00:99::1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fd00:99::1 -+ z_swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1 -+ z_swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 -+ z_swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1 -+ z_swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1 -+ z_swadm link ls -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link ls -Port/Link Media Speed FEC Enabled Link MAC -rear0/0 Copper 100G RS true Up a8:40:25:46:55:e3 -qsfp0/0 Copper 100G RS true Up a8:40:25:46:55:e4 -+ z_swadm addr list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr list -Link IPv4 IPv6 -rear0/0 fe80::aae1:deff:fe01:701c - fd00:99::1 -qsfp0/0 fe80::aae1:deff:fe01:701d -+ z_swadm route list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route list -Subnet Port Link Gateway -0.0.0.0/0 qsfp0 0 10.85.0.1 -fd00:1122:3344:101::/64 rear0 0 fe80::aae1:deff:fe00:1 -+ z_swadm arp list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp list -host mac age -10.85.0.1 68:d7:9a:1f:77:a1 0s -fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 0s ----- - -While following -https://github.com/oxidecomputer/omicron/blob/main/docs/how-to-run.adoc[how-to-run.adoc] -to set up IPs, images, disks, instances etc, pay particular attention to the -following. - -- The address range in the IP pool should be on a subnet in your local network that - can NAT out to the Internet. -- Be sure to set up an external IP for the instance you create. - -You will need to set up `proxy-arp` if your VM external IP addresses are on the -same L2 network as the router or other non-oxide hosts: ----- -pfexec /opt/oxide/softnpu/stuff/scadm \ - --server /opt/oxide/softnpu/stuff/server \ - --client /opt/oxide/softnpu/stuff/client \ - standalone \ - add-proxy-arp \ - $ip_pool_start \ - $ip_pool_end \ - $softnpu_mac ----- - -By the end, we have an instance up and running with external connectivity -configured via boundary services: ----- -ry@korgano:~/omicron$ ~/propolis/target/release/propolis-cli --server fd00:1122:3344:101::c serial - -debian login: root -Linux debian 5.10.0-9-amd64 #1 SMP Debian 5.10.70-1 (2021-09-30) x86_64 - -The programs included with the Debian GNU/Linux system are free software; -the exact distribution terms for each program are described in the -individual files in /usr/share/doc/*/copyright. - -Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent -permitted by applicable law. -root@debian:~# host oxide.computer -oxide.computer has address 76.76.21.61 -oxide.computer has address 76.76.21.22 -oxide.computer mail is handled by 5 alt2.aspmx.l.google.com. -oxide.computer mail is handled by 1 aspmx.l.google.com. -oxide.computer mail is handled by 10 aspmx3.googlemail.com. -oxide.computer mail is handled by 5 alt1.aspmx.l.google.com. -oxide.computer mail is handled by 10 aspmx2.googlemail.com. ----- +using the `swadm` binary located in the `oxz_switch` zone. This is not necessary +under normal operation, as the switch state will be managed automatically by the +control plane and networking daemons. An example script is provided in +`tools/scrimlet/softnpu-init.sh`. This script should work without modification +for basic development setups, but feel free to tweak it as needed. diff --git a/docs/networking.adoc b/docs/networking.adoc index 2ebad97842..84c95832c0 100644 --- a/docs/networking.adoc +++ b/docs/networking.adoc @@ -669,13 +669,13 @@ fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:c UG 2 640 cxgbe0 fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:c UG 2 2401 cxgbe0 fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:40c UG 2 51 cxgbe1 fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:40c UG 2 11090 cxgbe1 -fd00:99::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 +fdb2:ceeb:3ab7:8c9d::1/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:393::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:191::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:c UG 2 634578 cxgbe0 -fd00:99::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 +fd96:354:c1dc:606d::1/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:40c UG 2 14094545 cxgbe1 fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 @@ -733,7 +733,11 @@ fd00:1122:3344:3::/64 fe80::aa40:25ff:fe05:c UG 2 2437 cxgbe0 Recall that cxgbe0 and cxgbe1 are connected to separate switches in the rack. So we're seeing the prefixes for the other sleds in this deployment. We have two routes to reach each sled: one through each switch. The gateway is the link-local address _of each switch_ on the corresponding link. One notable exception: the route for this same sled (`fd00:1122:3344:104::/64`) points to `underlay0`, the GZ's VNIC on the sled's underlay network. In this way, traffic leaving the GZ (whether it originated in this GZ or arrived from one of the switches) is directed to the sled's underlay network etherstub and from there to the right zone VNIC. -(Questions: Why does 107 only have one route? What are the `fd00:99::` routes?) +(Questions: Why does 107 only have one route?) + +The `fdb2:ceeb:3ab7:8c9d::1/64` and `fd96:354:c1dc:606d::1/64` routes are +randomly generated boundary services tunnel endpoint addresses. See RFD 404 for +more details. There are similar routes for other sleds' prefixes on the bootstrap network. diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index 710e783181..d06b6b26e5 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -29,26 +29,6 @@ pub use oxide_vpc::api::DhcpCfg; pub use oxide_vpc::api::Vni; use std::net::IpAddr; -fn default_boundary_services() -> BoundaryServices { - use oxide_vpc::api::Ipv6Addr; - use oxide_vpc::api::MacAddr; - // TODO-completeness: Don't hardcode any of these values. - // - // Boundary Services will be started on several Sidecars during rack - // setup, and those addresses and VNIs will need to be propagated here. - // See https://github.com/oxidecomputer/omicron/issues/1382 - let ip = Ipv6Addr::from([0xfd00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]); - - // This MAC address is entirely irrelevant to the functionality of OPTE and - // the Oxide VPC. It's never used to actually forward packets. It only - // represents the "logical" destination of Boundary Services as a - // destination that OPTE as a virtual gateway forwards packets to as its - // next hop. - let mac = MacAddr::from_const([0xa8, 0x40, 0x25, 0xf9, 0x99, 0x99]); - let vni = Vni::new(99_u32).unwrap(); - BoundaryServices { ip, mac, vni } -} - /// Information about the gateway for an OPTE port #[derive(Debug, Clone, Copy)] #[allow(dead_code)] diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 3558ef1c78..c472996598 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -4,7 +4,6 @@ //! Manager for all OPTE ports on a Helios system -use crate::opte::default_boundary_services; use crate::opte::opte_firewall_rules; use crate::opte::params::DeleteVirtualNetworkInterfaceHost; use crate::opte::params::SetVirtualNetworkInterfaceHost; @@ -110,7 +109,6 @@ impl PortManager { let subnet = IpNetwork::from(nic.subnet); let vpc_subnet = IpCidr::from(subnet); let gateway = Gateway::from_subnet(&subnet); - let boundary_services = default_boundary_services(); // Describe the external IP addresses for this port. macro_rules! ip_cfg { @@ -219,7 +217,6 @@ impl PortManager { gateway_mac: MacAddr::from(gateway.mac.into_array()), vni, phys_ip: self.inner.underlay_ip.into(), - boundary_services, }; // Create the xde device. diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 23ee39415f..17e7a17444 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -33,7 +33,7 @@ use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfigCreate; use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::params::{ - AddressLotCreate, BgpPeerConfig, LoopbackAddressCreate, Route, SiloCreate, + AddressLotCreate, BgpPeerConfig, Route, SiloCreate, SwitchPortSettingsCreate, }; use nexus_types::external_api::shared::Baseboard; @@ -375,24 +375,7 @@ impl super::Nexus { let ipv4_block = AddressLotBlockCreate { first_address, last_address }; - let first_address = - IpAddr::from_str("fd00:99::1").map_err(|e| { - Error::internal_error(&format!( - "failed to parse `fd00:99::1` as `IpAddr`: {e}" - )) - })?; - - let last_address = - IpAddr::from_str("fd00:99::ffff").map_err(|e| { - Error::internal_error(&format!( - "failed to parse `fd00:99::ffff` as `IpAddr`: {e}" - )) - })?; - - let ipv6_block = - AddressLotBlockCreate { first_address, last_address }; - - let blocks = vec![ipv4_block, ipv6_block]; + let blocks = vec![ipv4_block]; let address_lot_params = AddressLotCreate { identity, kind, blocks }; @@ -412,24 +395,6 @@ impl super::Nexus { }, }?; - let address_lot_lookup = self - .address_lot_lookup( - &opctx, - NameOrId::Name(address_lot_name.clone()), - ) - .map_err(|e| { - Error::internal_error(&format!( - "unable to lookup infra address_lot: {e}" - )) - })?; - - let (.., authz_address_lot) = address_lot_lookup - .lookup_for(authz::Action::Modify) - .await - .map_err(|e| { - Error::internal_error(&format!("unable to retrieve authz_address_lot for infra address_lot: {e}")) - })?; - let mut bgp_configs = HashMap::new(); for bgp_config in &rack_network_config.bgp { @@ -542,43 +507,6 @@ impl super::Nexus { )) })?; - // TODO: #3603 Use separate address lots for loopback addresses and infra ips - let loopback_address_params = LoopbackAddressCreate { - address_lot: NameOrId::Name(address_lot_name.clone()), - rack_id, - switch_location: switch_location.clone(), - address: first_address, - mask: 64, - anycast: true, - }; - - if self - .loopback_address_lookup( - &opctx, - rack_id, - switch_location.clone().into(), - ipnetwork::IpNetwork::new( - loopback_address_params.address, - loopback_address_params.mask, - ) - .map_err(|_| { - Error::invalid_request("invalid loopback address") - })? - .into(), - )? - .lookup_for(authz::Action::Read) - .await - .is_err() - { - self.db_datastore - .loopback_address_create( - opctx, - &loopback_address_params, - None, - &authz_address_lot, - ) - .await?; - } let uplink_name = format!("default-uplink{idx}"); let name = Name::from_str(&uplink_name).unwrap(); diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 0d6bb52421..979ec54afd 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -15,6 +15,10 @@ use crate::app::sagas::{ use anyhow::Error; use db::datastore::SwitchPortSettingsCombinedResult; use dpd_client::types::PortId; +use mg_admin_client::types::{ + AddStaticRoute4Request, DeleteStaticRoute4Request, Prefix4, StaticRoute4, + StaticRoute4List, +}; use nexus_db_model::NETWORK_KEY; use nexus_db_queries::db::datastore::UpdatePrecondition; use nexus_db_queries::{authn, db}; @@ -52,6 +56,10 @@ declare_saga_actions! { + spa_ensure_switch_port_settings - spa_undo_ensure_switch_port_settings } + ENSURE_SWITCH_ROUTES -> "ensure_switch_routes" { + + spa_ensure_switch_routes + - spa_undo_ensure_switch_routes + } ENSURE_SWITCH_PORT_UPLINK -> "ensure_switch_port_uplink" { + spa_ensure_switch_port_uplink - spa_undo_ensure_switch_port_uplink @@ -210,6 +218,82 @@ async fn spa_ensure_switch_port_settings( Ok(()) } +async fn spa_ensure_switch_routes( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let settings = sagactx + .lookup::("switch_port_settings")?; + + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route add {e}")) + })?; + + Ok(()) +} + +async fn spa_undo_ensure_switch_routes( + sagactx: NexusActionContext, +) -> Result<(), Error> { + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let settings = sagactx + .lookup::("switch_port_settings")?; + + let mut rq = DeleteStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + async fn spa_undo_ensure_switch_port_settings( sagactx: NexusActionContext, ) -> Result<(), Error> { diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index 0d876f8159..ff79de8e8e 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -15,12 +15,16 @@ use crate::app::sagas::{ }; use anyhow::Error; use dpd_client::types::PortId; -use mg_admin_client::types::DeleteNeighborRequest; +use mg_admin_client::types::{ + AddStaticRoute4Request, DeleteNeighborRequest, DeleteStaticRoute4Request, + Prefix4, StaticRoute4, StaticRoute4List, +}; use nexus_db_model::NETWORK_KEY; use nexus_db_queries::authn; use nexus_db_queries::db::datastore::UpdatePrecondition; use omicron_common::api::external::{self, NameOrId, SwitchLocation}; use serde::{Deserialize, Serialize}; +use std::net::IpAddr; use std::str::FromStr; use std::sync::Arc; use steno::ActionError; @@ -43,6 +47,10 @@ declare_saga_actions! { + spa_clear_switch_port_settings - spa_undo_clear_switch_port_settings } + CLEAR_SWITCH_PORT_ROUTES -> "clear_switch_port_routes" { + + spa_clear_switch_port_routes + - spa_undo_clear_switch_port_routes + } CLEAR_SWITCH_PORT_UPLINK -> "clear_switch_port_uplink" { + spa_clear_switch_port_uplink - spa_undo_clear_switch_port_uplink @@ -351,6 +359,108 @@ async fn spa_undo_clear_switch_port_bgp_settings( .await?) } +async fn spa_clear_switch_port_routes( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nexus = osagactx.nexus(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let orig_port_settings_id = + sagactx.lookup::>("original_switch_port_settings_id")?; + + let id = match orig_port_settings_id { + Some(id) => id, + None => return Ok(()), + }; + + let settings = nexus + .switch_port_settings_get(&opctx, &NameOrId::Id(id)) + .await + .map_err(ActionError::action_failed)?; + + let mut rq = DeleteStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + +async fn spa_undo_clear_switch_port_routes( + sagactx: NexusActionContext, +) -> Result<(), Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nexus = osagactx.nexus(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let orig_port_settings_id = + sagactx.lookup::>("original_switch_port_settings_id")?; + + let id = match orig_port_settings_id { + Some(id) => id, + None => return Ok(()), + }; + + let settings = nexus + .switch_port_settings_get(&opctx, &NameOrId::Id(id)) + .await + .map_err(ActionError::action_failed)?; + + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + async fn spa_clear_switch_port_bootstore_network_settings( sagactx: NexusActionContext, ) -> Result<(), ActionError> { diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs index 9ef23ebf44..9c710d837d 100644 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -1,12 +1,14 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + use super::NexusActionContext; use crate::app::map_switch_zone_addrs; use crate::Nexus; use db::datastore::SwitchPortSettingsCombinedResult; use dpd_client::types::{ LinkCreate, LinkId, LinkSettings, PortFec, PortSettings, PortSpeed, - RouteSettingsV4, RouteSettingsV6, }; -use dpd_client::{Ipv4Cidr, Ipv6Cidr}; use internal_dns::ServiceName; use ipnetwork::IpNetwork; use mg_admin_client::types::Prefix4; @@ -85,41 +87,6 @@ pub(crate) fn api_to_dpd_port_settings( ); } - for r in &settings.routes { - match &r.dst { - IpNetwork::V4(n) => { - let gw = match r.gw.ip() { - IpAddr::V4(gw) => gw, - IpAddr::V6(_) => { - return Err( - "IPv4 destination cannot have IPv6 nexthop".into() - ) - } - }; - dpd_port_settings.v4_routes.insert( - Ipv4Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop: gw }], - ); - } - IpNetwork::V6(n) => { - let gw = match r.gw.ip() { - IpAddr::V6(gw) => gw, - IpAddr::V4(_) => { - return Err( - "IPv6 destination cannot have IPv4 nexthop".into() - ) - } - }; - dpd_port_settings.v6_routes.insert( - Ipv6Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV6 { link_id: link_id.0, nexthop: gw }], - ); - } - } - } - Ok(dpd_port_settings) } diff --git a/package-manifest.toml b/package-manifest.toml index fa6bba7a96..3525b121e4 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -438,10 +438,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "38851c79c85d53e997db748520fb27c82299ce7e58a550e35646a548498f1271" +source.sha256 = "1cf9cb514d11275d93c4e4760500539a778f23039374508ca07528fcaf0ba3f8" output.type = "tarball" [package.mg-ddm] @@ -454,10 +454,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "8cd94e9a6f6175081ce78f0281085a08a5306cde453d8e21deb28050945b1d88" +source.sha256 = "a9b959b4287ac2ec7b45ed99ccd00e1f134b8e3d501099cd669cee5de9525ae3" output.type = "zone" output.intermediate_only = true @@ -469,10 +469,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" +source.sha256 = "ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 75958a2f37..acad2b8d3c 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -6,21 +6,23 @@ use anyhow::{anyhow, Context}; use bootstore::schemes::v0 as bootstore; -use ddm_admin_client::{Client as DdmAdminClient, DdmError}; -use dpd_client::types::{Ipv6Entry, RouteSettingsV6}; +use ddm_admin_client::DdmError; use dpd_client::types::{ - LinkCreate, LinkId, LinkSettings, PortId, PortSettings, RouteSettingsV4, + LinkCreate, LinkId, LinkSettings, PortId, PortSettings, }; use dpd_client::Client as DpdClient; use futures::future; use gateway_client::Client as MgsClient; use internal_dns::resolver::{ResolveError, Resolver as DnsResolver}; use internal_dns::ServiceName; -use ipnetwork::{IpNetwork, Ipv6Network}; -use mg_admin_client::types::{ApplyRequest, BgpPeerConfig, Prefix4}; +use ipnetwork::Ipv6Network; +use mg_admin_client::types::{ + AddStaticRoute4Request, ApplyRequest, BgpPeerConfig, Prefix4, StaticRoute4, + StaticRoute4List, +}; use mg_admin_client::Client as MgdClient; -use omicron_common::address::{Ipv6Subnet, MGD_PORT, MGS_PORT}; -use omicron_common::address::{DDMD_PORT, DENDRITE_PORT}; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::address::{MGD_PORT, MGS_PORT}; use omicron_common::api::internal::shared::{ BgpConfig, PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, RackNetworkConfigV1, SwitchLocation, UplinkConfig, @@ -38,7 +40,6 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; use std::time::{Duration, Instant}; use thiserror::Error; -static BOUNDARY_SERVICES_ADDR: &str = "fd00:99::1"; const BGP_SESSION_RESOLUTION: u64 = 100; /// Errors that can occur during early network setup @@ -421,22 +422,11 @@ impl<'a> EarlyNetworkSetup<'a> { // configure uplink for each requested uplink in configuration that // matches our switch_location for port_config in &our_ports { - let (ipv6_entry, dpd_port_settings, port_id) = + let (dpd_port_settings, port_id) = self.build_port_config(port_config)?; self.wait_for_dendrite(&dpd).await; - info!( - self.log, - "Configuring boundary services loopback address on switch"; - "config" => #?ipv6_entry - ); - dpd.loopback_ipv6_create(&ipv6_entry).await.map_err(|e| { - EarlyNetworkSetupError::Dendrite(format!( - "unable to create inital switch loopback address: {e}" - )) - })?; - info!( self.log, "Configuring default uplink on switch"; @@ -453,13 +443,6 @@ impl<'a> EarlyNetworkSetup<'a> { "unable to apply uplink port configuration: {e}" )) })?; - - info!(self.log, "advertising boundary services loopback address"); - - let ddmd_addr = - SocketAddrV6::new(switch_zone_underlay_ip, DDMD_PORT, 0, 0); - let ddmd_client = DdmAdminClient::new(&self.log, ddmd_addr)?; - ddmd_client.advertise_prefix(Ipv6Subnet::new(ipv6_entry.addr)); } let mgd = MgdClient::new( @@ -548,22 +531,40 @@ impl<'a> EarlyNetworkSetup<'a> { } } + // Iterate through ports and apply static routing config. + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + for port in &our_ports { + for r in &port.routes { + let nexthop = match r.nexthop { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.destination.ip() { + IpAddr::V4(v4) => { + Prefix4 { value: v4, length: r.destination.prefix() } + } + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + } + mgd.inner.static_add_v4_route(&rq).await.map_err(|e| { + EarlyNetworkSetupError::BgpConfigurationError(format!( + "static routing configuration failed: {e}", + )) + })?; + Ok(our_ports) } fn build_port_config( &self, port_config: &PortConfigV1, - ) -> Result<(Ipv6Entry, PortSettings, PortId), EarlyNetworkSetupError> { + ) -> Result<(PortSettings, PortId), EarlyNetworkSetupError> { info!(self.log, "Building Port Configuration"); - let ipv6_entry = Ipv6Entry { - addr: BOUNDARY_SERVICES_ADDR.parse().map_err(|e| { - EarlyNetworkSetupError::BadConfig(format!( - "failed to parse `BOUNDARY_SERVICES_ADDR` as `Ipv6Addr`: {e}" - )) - })?, - tag: OMICRON_DPD_TAG.into(), - }; let mut dpd_port_settings = PortSettings { links: HashMap::new(), v4_routes: HashMap::new(), @@ -600,26 +601,7 @@ impl<'a> EarlyNetworkSetup<'a> { )) })?; - for r in &port_config.routes { - if let (IpNetwork::V4(dst), IpAddr::V4(nexthop)) = - (r.destination, r.nexthop) - { - dpd_port_settings.v4_routes.insert( - dst.to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop }], - ); - } - if let (IpNetwork::V6(dst), IpAddr::V6(nexthop)) = - (r.destination, r.nexthop) - { - dpd_port_settings.v6_routes.insert( - dst.to_string(), - vec![RouteSettingsV6 { link_id: link_id.0, nexthop }], - ); - } - } - - Ok((ipv6_entry, dpd_port_settings, port_id)) + Ok((dpd_port_settings, port_id)) } async fn wait_for_dendrite(&self, dpd: &DpdClient) { diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index fdc81c0f8f..12cb2afd24 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -103,7 +103,7 @@ bgp = [] # Routes associated with this port. routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. -addresses = ["192.168.1.30/32"] +addresses = ["192.168.1.30/24"] # Name of the uplink port. This should always be "qsfp0" when using softnpu. port = "qsfp0" # The speed of this port. diff --git a/tools/ci_check_opte_ver.sh b/tools/ci_check_opte_ver.sh index 26382690e1..7f05ec1f36 100755 --- a/tools/ci_check_opte_ver.sh +++ b/tools/ci_check_opte_ver.sh @@ -1,6 +1,11 @@ #!/bin/bash set -euo pipefail +source tools/opte_version_override +if [[ "x$OPTE_COMMIT" != "x" ]]; then + exit 0 +fi + # Grab all the oxidecomputer/opte dependencies' revisions readarray -t opte_deps_revs < <(toml get Cargo.toml workspace.dependencies | jq -r 'to_entries | .[] | select(.value.git? | contains("oxidecomputer/opte")?) | .value.rev') OPTE_REV="${opte_deps_revs[0]}" diff --git a/tools/install_opte.sh b/tools/install_opte.sh index 20a33b05a5..b572c305a7 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -97,3 +97,13 @@ if [[ "$RC" -ne 0 ]]; then echo "The \`opteadm\` administration tool is not on your path." echo "You may add \"/opt/oxide/opte/bin\" to your path to access it." fi + +source $OMICRON_TOP/tools/opte_version_override + +if [[ "x$OPTE_COMMIT" != "x" ]]; then + set +x + curl -fOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde + pfexec rem_drv xde || true + pfexec mv xde /kernel/drv/amd64/xde + pfexec add_drv xde || true +fi diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 37c099d7f5..be8772b7e6 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" -SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" +COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +SHA2="0b0dbc2f8bbc5d2d9be92d64c4865f8f9335355aae62f7de9f67f81dfb3f1803" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 329c05fc42..6bf1999c61 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" -SHA2="931efa310d972b1f8afba2308751fc6a2035afbaebba77b3a40a8358c123ba3c" +COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +SHA2="7618511f905d26394ef7c552339dd78835ce36a6def0d85b05b6d1e363a5e7b4" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 1d3cf98f94..b5fe84b662 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" -MGD_LINUX_SHA256="1bcadfd700902e3640843e0bb53d3defdbcd8d86c3279efa0953ae8d6437e2b0" \ No newline at end of file +CIDL_SHA256="ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" +MGD_LINUX_SHA256="93331c1001e3aa506a8c1b83346abba1995e489910bff2c94a86730b96617a34" \ No newline at end of file diff --git a/tools/opte_version b/tools/opte_version index 82d79dcf28..0a04873e11 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.27.214 +0.28.215 diff --git a/tools/opte_version_override b/tools/opte_version_override new file mode 100644 index 0000000000..80a6529b24 --- /dev/null +++ b/tools/opte_version_override @@ -0,0 +1,5 @@ +#!/bin/bash + +# only set this if you want to override the version of opte/xde installed by the +# install_opte.sh script +OPTE_COMMIT="" From cc643045191d92e539faa839d662a4198d81d718 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 24 Jan 2024 21:05:37 +0000 Subject: [PATCH 36/91] Live attach/detach of external IPs (#4694) This PR adds new endpoints to attach and detach external IPs to/from an individual instance at runtime, when instances are either stopped or started. These new endpoints are: * POST `/v1/floating-ips/{floating_ip}/attach` * POST `/v1/floating-ips/{floating_ip}/detach` * POST `/v1/instances/{instance}/external-ips/ephemeral` * DELETE `/v1/instances/{instance}/external-ips/ephemeral` These follow and enforce the same rules as external IPs registered during instance creation: at most one ephemeral IP, and at most 32 external IPs total. `/v1/floating-ips/{floating_ip}/attach` includes a `kind` field to account for future API resources which a FIP may be bound to -- such as internet gateways, load balancers, and services. ## Interaction with other instance lifecycle changes and sagas Both external IP modify sagas begin with an atomic update to external IP attach state conditioned on $\mathit{state}\in[ \mathit{started},\mathit{stopped}]$. As a result, we know that an external IP saga can only ever start before any other instance state change occurs. We then only need to think about how these other sagas/events must behave when called *during* an attach/detach, keeping in mind that these are worst-case orderings: attach/detach are likely to complete quickly. ### Instance start & migrate Both of these sagas alter an instance's functional sled ID, which controls whether NAT entry insertion and OPTE port state updates are performed. If an IP attach/detach is incomplete when either saga reaches `instance_ensure_dpd_config` or `instance_ensure_registered` (e.g., any IP associated with the target instance is in attaching/detaching state), the start/migrate will unwind with an HTTP 503. Generally, neither should undo in practice since IP attach/detach are fast operations -- particularly when an instance is formerly stopped. This is used solely to guarantee that only one saga is accessing a given external IP at a time, and that the update target remains unchanged. ### Instance stop & delete These operations are either not sagaized (stop), or cannot unwind (delete), and so we cannot block them using IP attach state. IP attach/detach will unwind if a given sled-agent is no longer responsible for an instance. Instance delete will force-detach IP addresses bound to an instance, and if this is seen then IP attach will deliberately unwind to potentially clean up NAT state. OPTE/DPD undo operations are best-effort in such a case to prevent stuck sagas. Instance stop and IP attach may interleave such that the latter adds additional NAT entries after other network state is cleared. Because we cannot unwind in this case, `instance_ensure_dpd_config` will now attempt to remove leftover conflicting RPW entries if they are detected, since we know they are a deviation from intended state. ## Additional/supporting changes * Pool/floating IP specifiers in instance create now take `NameOrId`, parameter names changed to match. * External IP create/bind in instance create no longer double-resolves name on saga unwind. * `views::ExternalIp` can now contain `FloatingIp` body. * DPD NAT insert/remove functions now perform single rule update via ID instead of index into the EIP list -- index-based was unstable under live addition/removal. * NAT RPW ensure is now more authoritative, and will remove conflicting entries if an initial insert fails. * Pool `NameOrId` resolution for floating IP allocation pulled up from `Datastore` into `Nexus`. --- Closes #4630 and #4628. --- dev-tools/omdb/src/bin/omdb/db.rs | 3 + end-to-end-tests/src/instance_launch.rs | 13 +- illumos-utils/src/opte/illumos.rs | 10 + illumos-utils/src/opte/non_illumos.rs | 10 + illumos-utils/src/opte/port_manager.rs | 116 +++ nexus/db-model/src/external_ip.rs | 120 ++- nexus/db-model/src/instance.rs | 17 +- nexus/db-model/src/instance_state.rs | 6 + nexus/db-model/src/ipv4_nat_entry.rs | 3 +- nexus/db-model/src/macaddr.rs | 13 +- nexus/db-model/src/schema.rs | 1 + nexus/db-queries/src/db/datastore/disk.rs | 4 +- .../src/db/datastore/external_ip.rs | 707 +++++++++++++++--- nexus/db-queries/src/db/datastore/instance.rs | 102 +-- .../src/db/datastore/ipv4_nat_entry.rs | 29 +- nexus/db-queries/src/db/datastore/mod.rs | 26 +- nexus/db-queries/src/db/pool_connection.rs | 1 + .../db-queries/src/db/queries/external_ip.rs | 131 +++- nexus/src/app/external_ip.rs | 84 ++- nexus/src/app/instance.rs | 78 ++ nexus/src/app/instance_network.rs | 384 ++++++---- nexus/src/app/mod.rs | 4 +- nexus/src/app/sagas/instance_common.rs | 336 ++++++++- nexus/src/app/sagas/instance_create.rs | 141 +++- nexus/src/app/sagas/instance_delete.rs | 2 +- nexus/src/app/sagas/instance_ip_attach.rs | 583 +++++++++++++++ nexus/src/app/sagas/instance_ip_detach.rs | 551 ++++++++++++++ nexus/src/app/sagas/instance_start.rs | 27 +- nexus/src/app/sagas/mod.rs | 8 + nexus/src/external_api/http_entrypoints.rs | 140 ++++ nexus/test-utils/src/resource_helpers.rs | 4 +- nexus/tests/integration_tests/disks.rs | 1 + nexus/tests/integration_tests/endpoints.rs | 62 +- nexus/tests/integration_tests/external_ips.rs | 490 +++++++++++- nexus/tests/integration_tests/instances.rs | 49 +- .../integration_tests/subnet_allocation.rs | 1 + nexus/tests/output/nexus_tags.txt | 4 + nexus/types/src/external_api/params.rs | 45 +- nexus/types/src/external_api/shared.rs | 4 +- nexus/types/src/external_api/views.rs | 53 +- openapi/nexus.json | 363 ++++++++- openapi/sled-agent.json | 115 +++ schema/crdb/25.0.0/up01.sql | 6 + schema/crdb/25.0.0/up02.sql | 4 + schema/crdb/25.0.0/up03.sql | 7 + schema/crdb/25.0.0/up04.sql | 7 + schema/crdb/25.0.0/up05.sql | 2 + schema/crdb/25.0.0/up06.sql | 4 + schema/crdb/25.0.0/up07.sql | 4 + schema/crdb/25.0.0/up08.sql | 2 + schema/crdb/25.0.0/up09.sql | 4 + schema/crdb/dbinit.sql | 38 +- sled-agent/src/http_entrypoints.rs | 36 +- sled-agent/src/instance.rs | 154 +++- sled-agent/src/instance_manager.rs | 37 + sled-agent/src/params.rs | 10 + sled-agent/src/sim/http_entrypoints.rs | 41 +- sled-agent/src/sim/sled_agent.rs | 64 +- sled-agent/src/sled_agent.rs | 40 +- 59 files changed, 4776 insertions(+), 525 deletions(-) create mode 100644 nexus/src/app/sagas/instance_ip_attach.rs create mode 100644 nexus/src/app/sagas/instance_ip_detach.rs create mode 100644 schema/crdb/25.0.0/up01.sql create mode 100644 schema/crdb/25.0.0/up02.sql create mode 100644 schema/crdb/25.0.0/up03.sql create mode 100644 schema/crdb/25.0.0/up04.sql create mode 100644 schema/crdb/25.0.0/up05.sql create mode 100644 schema/crdb/25.0.0/up06.sql create mode 100644 schema/crdb/25.0.0/up07.sql create mode 100644 schema/crdb/25.0.0/up08.sql create mode 100644 schema/crdb/25.0.0/up09.sql diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 23e9206506..a465183351 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -44,6 +44,7 @@ use nexus_db_model::ExternalIp; use nexus_db_model::HwBaseboardId; use nexus_db_model::Instance; use nexus_db_model::InvCollection; +use nexus_db_model::IpAttachState; use nexus_db_model::Project; use nexus_db_model::Region; use nexus_db_model::RegionSnapshot; @@ -1705,6 +1706,7 @@ async fn cmd_db_eips( ip: ipnetwork::IpNetwork, ports: PortRange, kind: String, + state: IpAttachState, owner: Owner, } @@ -1789,6 +1791,7 @@ async fn cmd_db_eips( first: ip.first_port.into(), last: ip.last_port.into(), }, + state: ip.state, kind: format!("{:?}", ip.kind), owner, }; diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index b3d1406070..2efd66bf91 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -5,9 +5,9 @@ use anyhow::{ensure, Context as _, Result}; use async_trait::async_trait; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ - ByteCount, DiskCreate, DiskSource, ExternalIpCreate, InstanceCpuCount, - InstanceCreate, InstanceDiskAttachment, InstanceNetworkInterfaceAttachment, - SshKeyCreate, + ByteCount, DiskCreate, DiskSource, ExternalIp, ExternalIpCreate, + InstanceCpuCount, InstanceCreate, InstanceDiskAttachment, + InstanceNetworkInterfaceAttachment, SshKeyCreate, }; use oxide_client::{ClientDisksExt, ClientInstancesExt, ClientSessionExt}; use russh::{ChannelMsg, Disconnect}; @@ -70,7 +70,7 @@ async fn instance_launch() -> Result<()> { name: disk_name.clone(), }], network_interfaces: InstanceNetworkInterfaceAttachment::Default, - external_ips: vec![ExternalIpCreate::Ephemeral { pool_name: None }], + external_ips: vec![ExternalIpCreate::Ephemeral { pool: None }], user_data: String::new(), start: true, }) @@ -87,7 +87,10 @@ async fn instance_launch() -> Result<()> { .items .first() .context("no external IPs")? - .ip; + .clone(); + let ExternalIp::Ephemeral { ip: ip_addr } = ip_addr else { + anyhow::bail!("IP bound to instance was not ephemeral as required.") + }; eprintln!("instance external IP: {}", ip_addr); // poll serial for login prompt, waiting 5 min max diff --git a/illumos-utils/src/opte/illumos.rs b/illumos-utils/src/opte/illumos.rs index 88e8d343b1..527172b976 100644 --- a/illumos-utils/src/opte/illumos.rs +++ b/illumos-utils/src/opte/illumos.rs @@ -11,6 +11,7 @@ use omicron_common::api::internal::shared::NetworkInterfaceKind; use opte_ioctl::OpteHdl; use slog::info; use slog::Logger; +use std::net::IpAddr; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -46,6 +47,15 @@ pub enum Error { #[error("Tried to release non-existent port ({0}, {1:?})")] ReleaseMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] + ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Could not find Primary NIC")] + NoPrimaryNic, + + #[error("Can't attach new ephemeral IP {0}, currently have {1}")] + ImplicitEphemeralIpDetach(IpAddr, IpAddr), } /// Delete all xde devices on the system. diff --git a/illumos-utils/src/opte/non_illumos.rs b/illumos-utils/src/opte/non_illumos.rs index ccd4990d5f..bf61249fb1 100644 --- a/illumos-utils/src/opte/non_illumos.rs +++ b/illumos-utils/src/opte/non_illumos.rs @@ -8,6 +8,7 @@ use slog::Logger; use crate::addrobj::AddrObject; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use std::net::IpAddr; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -16,6 +17,15 @@ pub enum Error { #[error("Tried to release non-existent port ({0}, {1:?})")] ReleaseMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] + ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Could not find Primary NIC")] + NoPrimaryNic, + + #[error("Can't attach new ephemeral IP {0}, currently have {1}")] + ImplicitEphemeralIpDetach(IpAddr, IpAddr), } pub fn initialize_xde_driver( diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index c472996598..2b2f622070 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -28,6 +28,7 @@ use oxide_vpc::api::MacAddr; use oxide_vpc::api::RouterTarget; use oxide_vpc::api::SNat4Cfg; use oxide_vpc::api::SNat6Cfg; +use oxide_vpc::api::SetExternalIpsReq; use oxide_vpc::api::VpcCfg; use slog::debug; use slog::error; @@ -398,6 +399,121 @@ impl PortManager { Ok((port, ticket)) } + /// Ensure external IPs for an OPTE port are up to date. + #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] + pub fn external_ips_ensure( + &self, + nic_id: Uuid, + nic_kind: NetworkInterfaceKind, + source_nat: Option, + ephemeral_ip: Option, + floating_ips: &[IpAddr], + ) -> Result<(), Error> { + let ports = self.inner.ports.lock().unwrap(); + let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| { + Error::ExternalIpUpdateMissingPort(nic_id, nic_kind) + })?; + + // XXX: duplicates parts of macro logic in `create_port`. + macro_rules! ext_ip_cfg { + ($ip:expr, $log_prefix:literal, $ip_t:path, $cidr_t:path, + $ipcfg_e:path, $ipcfg_t:ident, $snat_t:ident) => {{ + let snat = match source_nat { + Some(snat) => { + let $ip_t(snat_ip) = snat.ip else { + error!( + self.inner.log, + concat!($log_prefix, " SNAT config"); + "snat_ip" => ?snat.ip, + ); + return Err(Error::InvalidPortIpConfig); + }; + let ports = snat.first_port..=snat.last_port; + Some($snat_t { external_ip: snat_ip.into(), ports }) + } + None => None, + }; + let ephemeral_ip = match ephemeral_ip { + Some($ip_t(ip)) => Some(ip.into()), + Some(_) => { + error!( + self.inner.log, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, + ); + return Err(Error::InvalidPortIpConfig); + } + None => None, + }; + let floating_ips: Vec<_> = floating_ips + .iter() + .copied() + .map(|ip| match ip { + $ip_t(ip) => Ok(ip.into()), + _ => { + error!( + self.inner.log, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, + ); + Err(Error::InvalidPortIpConfig) + } + }) + .collect::, _>>()?; + + ExternalIpCfg { + ephemeral_ip, + snat, + floating_ips, + } + }} + } + + // TODO-completeness: support dual-stack. We'll need to explicitly store + // a v4 and a v6 ephemeral IP + SNat + gateway + ... in `InstanceInner` + // to have enough info to build both. + let mut v4_cfg = None; + let mut v6_cfg = None; + match port.gateway().ip { + IpAddr::V4(_) => { + v4_cfg = Some(ext_ip_cfg!( + ip, + "Expected IPv4", + IpAddr::V4, + IpCidr::Ip4, + IpCfg::Ipv4, + Ipv4Cfg, + SNat4Cfg + )) + } + IpAddr::V6(_) => { + v6_cfg = Some(ext_ip_cfg!( + ip, + "Expected IPv6", + IpAddr::V6, + IpCidr::Ip6, + IpCfg::Ipv6, + Ipv6Cfg, + SNat6Cfg + )) + } + } + + let req = SetExternalIpsReq { + port_name: port.name().into(), + external_ips_v4: v4_cfg, + external_ips_v6: v6_cfg, + }; + + #[cfg(target_os = "illumos")] + let hdl = opte_ioctl::OpteHdl::open(opte_ioctl::OpteHdl::XDE_CTL)?; + + #[cfg(target_os = "illumos")] + hdl.set_external_ips(&req)?; + + Ok(()) + } + #[cfg(target_os = "illumos")] pub fn firewall_rules_ensure( &self, diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index e95185658f..1e9def4182 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -23,6 +23,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadata; use serde::Deserialize; use serde::Serialize; +use sled_agent_client::types::InstanceExternalIpBody; use std::convert::TryFrom; use std::net::IpAddr; use uuid::Uuid; @@ -32,7 +33,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "ip_kind", schema = "public"))] pub struct IpKindEnum; - #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)] #[diesel(sql_type = IpKindEnum)] pub enum IpKind; @@ -41,6 +42,42 @@ impl_enum_type!( Floating => b"floating" ); +impl_enum_type!( + #[derive(SqlType, Debug, Clone, Copy, QueryId)] + #[diesel(postgres_type(name = "ip_attach_state"))] + pub struct IpAttachStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)] + #[diesel(sql_type = IpAttachStateEnum)] + pub enum IpAttachState; + + Detached => b"detached" + Attached => b"attached" + Detaching => b"detaching" + Attaching => b"attaching" +); + +impl std::fmt::Display for IpAttachState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + IpAttachState::Detached => "Detached", + IpAttachState::Attached => "Attached", + IpAttachState::Detaching => "Detaching", + IpAttachState::Attaching => "Attaching", + }) + } +} + +impl std::fmt::Display for IpKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + IpKind::Floating => "floating", + IpKind::Ephemeral => "ephemeral", + IpKind::SNat => "SNAT", + }) + } +} + /// The main model type for external IP addresses for instances /// and externally-facing services. /// @@ -51,7 +88,9 @@ impl_enum_type!( /// addresses and port ranges, while source NAT IPs are not discoverable in the /// API at all, and only provide outbound connectivity to instances, not /// inbound. -#[derive(Debug, Clone, Selectable, Queryable, Insertable)] +#[derive( + Debug, Clone, Selectable, Queryable, Insertable, Deserialize, Serialize, +)] #[diesel(table_name = external_ip)] pub struct ExternalIp { pub id: Uuid, @@ -76,6 +115,7 @@ pub struct ExternalIp { pub last_port: SqlU16, // Only Some(_) for instance Floating IPs pub project_id: Option, + pub state: IpAttachState, } /// A view type constructed from `ExternalIp` used to represent Floating IP @@ -125,6 +165,7 @@ pub struct IncompleteExternalIp { parent_id: Option, pool_id: Uuid, project_id: Option, + state: IpAttachState, // Optional address requesting that a specific IP address be allocated. explicit_ip: Option, // Optional range when requesting a specific SNAT range be allocated. @@ -137,34 +178,38 @@ impl IncompleteExternalIp { instance_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: false, parent_id: Some(instance_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } - pub fn for_ephemeral(id: Uuid, instance_id: Uuid, pool_id: Uuid) -> Self { + pub fn for_ephemeral(id: Uuid, pool_id: Uuid) -> Self { + let kind = IpKind::Ephemeral; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::Ephemeral, + kind, is_service: false, - parent_id: Some(instance_id), + parent_id: None, pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -175,18 +220,20 @@ impl IncompleteExternalIp { project_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: false, parent_id: None, pool_id, project_id: Some(project_id), explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -198,18 +245,20 @@ impl IncompleteExternalIp { explicit_ip: IpAddr, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: false, parent_id: None, pool_id, project_id: Some(project_id), explicit_ip: Some(explicit_ip.into()), explicit_port_range: None, + state: kind.initial_state(), } } @@ -233,6 +282,7 @@ impl IncompleteExternalIp { project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range: None, + state: IpAttachState::Attached, } } @@ -250,18 +300,20 @@ impl IncompleteExternalIp { NUM_SOURCE_NAT_PORTS, ); let explicit_port_range = Some((first_port.into(), last_port.into())); + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range, + state: kind.initial_state(), } } @@ -272,34 +324,38 @@ impl IncompleteExternalIp { service_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: IpAttachState::Attached, } } pub fn for_service_snat(id: Uuid, service_id: Uuid, pool_id: Uuid) -> Self { + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -339,6 +395,10 @@ impl IncompleteExternalIp { &self.project_id } + pub fn state(&self) -> &IpAttachState { + &self.state + } + pub fn explicit_ip(&self) -> &Option { &self.explicit_ip } @@ -348,6 +408,18 @@ impl IncompleteExternalIp { } } +impl IpKind { + /// The initial state which a new non-service IP should + /// be allocated in. + pub fn initial_state(&self) -> IpAttachState { + match &self { + IpKind::SNat => IpAttachState::Attached, + IpKind::Ephemeral => IpAttachState::Detached, + IpKind::Floating => IpAttachState::Detached, + } + } +} + impl TryFrom for shared::IpKind { type Error = Error; @@ -371,8 +443,15 @@ impl TryFrom for views::ExternalIp { "Service IPs should not be exposed in the API", )); } - let kind = ip.kind.try_into()?; - Ok(views::ExternalIp { kind, ip: ip.ip.ip() }) + match ip.kind { + IpKind::Floating => Ok(views::ExternalIp::Floating(ip.try_into()?)), + IpKind::Ephemeral => { + Ok(views::ExternalIp::Ephemeral { ip: ip.ip.ip() }) + } + IpKind::SNat => Err(Error::internal_error( + "SNAT IP addresses should not be exposed in the API", + )), + } } } @@ -450,3 +529,18 @@ impl From for views::FloatingIp { } } } + +impl TryFrom for InstanceExternalIpBody { + type Error = Error; + + fn try_from(value: ExternalIp) -> Result { + let ip = value.ip.ip(); + match value.kind { + IpKind::Ephemeral => Ok(InstanceExternalIpBody::Ephemeral(ip)), + IpKind::Floating => Ok(InstanceExternalIpBody::Floating(ip)), + IpKind::SNat => Err(Error::invalid_request( + "cannot dynamically add/remove SNAT allocation", + )), + } + } +} diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 9252926547..e10f8c2603 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -2,9 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::{ByteCount, Disk, Generation, InstanceCpuCount, InstanceState}; +use super::{ + ByteCount, Disk, ExternalIp, Generation, InstanceCpuCount, InstanceState, +}; use crate::collection::DatastoreAttachTargetConfig; -use crate::schema::{disk, instance}; +use crate::schema::{disk, external_ip, instance}; use chrono::{DateTime, Utc}; use db_macros::Resource; use nexus_types::external_api::params; @@ -101,6 +103,17 @@ impl DatastoreAttachTargetConfig for Instance { type ResourceTimeDeletedColumn = disk::dsl::time_deleted; } +impl DatastoreAttachTargetConfig for Instance { + type Id = Uuid; + + type CollectionIdColumn = instance::dsl::id; + type CollectionTimeDeletedColumn = instance::dsl::time_deleted; + + type ResourceIdColumn = external_ip::dsl::id; + type ResourceCollectionIdColumn = external_ip::dsl::parent_id; + type ResourceTimeDeletedColumn = external_ip::dsl::time_deleted; +} + /// Runtime state of the Instance, including the actual running state and minimal /// metadata /// diff --git a/nexus/db-model/src/instance_state.rs b/nexus/db-model/src/instance_state.rs index 7b98850b43..dca809758f 100644 --- a/nexus/db-model/src/instance_state.rs +++ b/nexus/db-model/src/instance_state.rs @@ -65,3 +65,9 @@ impl From for sled_agent_client::types::InstanceState { } } } + +impl From for InstanceState { + fn from(state: external::InstanceState) -> Self { + Self::new(state) + } +} diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs index 570a46b5e9..b0fa2b8eb9 100644 --- a/nexus/db-model/src/ipv4_nat_entry.rs +++ b/nexus/db-model/src/ipv4_nat_entry.rs @@ -5,6 +5,7 @@ use crate::{schema::ipv4_nat_entry, Ipv4Net, Ipv6Net, SqlU16, Vni}; use chrono::{DateTime, Utc}; use omicron_common::api::external; use schemars::JsonSchema; +use serde::Deserialize; use serde::Serialize; use uuid::Uuid; @@ -21,7 +22,7 @@ pub struct Ipv4NatValues { } /// Database representation of an Ipv4 NAT Entry. -#[derive(Queryable, Debug, Clone, Selectable)] +#[derive(Queryable, Debug, Clone, Selectable, Serialize, Deserialize)] #[diesel(table_name = ipv4_nat_entry)] pub struct Ipv4NatEntry { pub id: Uuid, diff --git a/nexus/db-model/src/macaddr.rs b/nexus/db-model/src/macaddr.rs index dceb8acf48..b3329598bd 100644 --- a/nexus/db-model/src/macaddr.rs +++ b/nexus/db-model/src/macaddr.rs @@ -8,8 +8,19 @@ use diesel::pg::Pg; use diesel::serialize::{self, ToSql}; use diesel::sql_types; use omicron_common::api::external; +use serde::Deserialize; +use serde::Serialize; -#[derive(Clone, Copy, Debug, PartialEq, AsExpression, FromSqlRow)] +#[derive( + Clone, + Copy, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize, +)] #[diesel(sql_type = sql_types::BigInt)] pub struct MacAddr(pub external::MacAddr); diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 2e7493716e..11cdf87f6c 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -574,6 +574,7 @@ table! { last_port -> Int4, project_id -> Nullable, + state -> crate::IpAttachStateEnum, } } diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 2055287e62..390376e627 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -206,7 +206,7 @@ impl DataStore { let (instance, disk) = query.attach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) .await - .or_else(|e| { + .or_else(|e: AttachError| { match e { AttachError::CollectionNotFound => { Err(Error::not_found_by_id( @@ -348,7 +348,7 @@ impl DataStore { ) .detach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) .await - .or_else(|e| { + .or_else(|e: DetachError| { match e { DetachError::CollectionNotFound => { Err(Error::not_found_by_id( diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 02ce950118..9d4d947476 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -9,6 +9,10 @@ use crate::authz; use crate::authz::ApiResource; use crate::context::OpContext; use crate::db; +use crate::db::collection_attach::AttachError; +use crate::db::collection_attach::DatastoreAttachTarget; +use crate::db::collection_detach::DatastoreDetachTarget; +use crate::db::collection_detach::DetachError; use crate::db::error::public_error_from_diesel; use crate::db::error::retryable; use crate::db::error::ErrorHandler; @@ -22,11 +26,17 @@ use crate::db::model::Name; use crate::db::pagination::paginated; use crate::db::pool::DbConnection; use crate::db::queries::external_ip::NextExternalIp; +use crate::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE; +use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES; +use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES_CREATING; +use crate::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::Instance; +use nexus_db_model::IpAttachState; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -35,13 +45,14 @@ use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use ref_cast::RefCast; use std::net::IpAddr; use uuid::Uuid; +const MAX_EXTERNAL_IPS_PLUS_SNAT: u32 = MAX_EXTERNAL_IPS_PER_INSTANCE + 1; + impl DataStore { /// Create an external IP address for source NAT for an instance. pub async fn allocate_instance_snat_ip( @@ -60,23 +71,43 @@ impl DataStore { } /// Create an Ephemeral IP address for an instance. + /// + /// For consistency between instance create and External IP attach/detach + /// operations, this IP will be created in the `Attaching` state to block + /// concurrent access. + /// Callers must call `external_ip_complete_op` on saga completion to move + /// the IP to `Attached`. + /// + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. pub async fn allocate_instance_ephemeral_ip( &self, opctx: &OpContext, ip_id: Uuid, instance_id: Uuid, - pool_name: Option, - ) -> CreateResult { - let pool = match pool_name { - Some(name) => { - let (.., authz_pool, pool) = LookupPath::new(opctx, &self) - .ip_pool_name(&name) + pool: Option, + creating_instance: bool, + ) -> CreateResult<(ExternalIp, bool)> { + // This is slightly hacky: we need to create an unbound ephemeral IP, and + // then attempt to bind it to respect two separate constraints: + // - At most one Ephemeral IP per instance + // - At most MAX external IPs per instance + // Naturally, we now *need* to destroy the ephemeral IP if the newly alloc'd + // IP was not attached, including on idempotent success. + let pool = match pool { + Some(authz_pool) => { + let (.., pool) = LookupPath::new(opctx, &self) + .ip_pool_id(authz_pool.id()) // any authenticated user can CreateChild on an IP pool. this is // meant to represent allocating an IP .fetch_for(authz::Action::CreateChild) .await?; // If this pool is not linked to the current silo, 404 + // As name resolution happens one layer up, we need to use the *original* + // authz Pool. if self.ip_pool_fetch_link(opctx, pool.id()).await.is_err() { return Err(authz_pool.not_found()); } @@ -91,9 +122,49 @@ impl DataStore { }; let pool_id = pool.identity.id; - let data = - IncompleteExternalIp::for_ephemeral(ip_id, instance_id, pool_id); - self.allocate_external_ip(opctx, data).await + let data = IncompleteExternalIp::for_ephemeral(ip_id, pool_id); + + // We might not be able to acquire a new IP, but in the event of an + // idempotent or double attach this failure is allowed. + let temp_ip = self.allocate_external_ip(opctx, data).await; + if let Err(e) = temp_ip { + let eip = self + .instance_lookup_ephemeral_ip(opctx, instance_id) + .await? + .ok_or(e)?; + + return Ok((eip, false)); + } + let temp_ip = temp_ip?; + + match self + .begin_attach_ip( + opctx, + temp_ip.id, + instance_id, + IpKind::Ephemeral, + creating_instance, + ) + .await + { + Err(e) => { + self.deallocate_external_ip(opctx, temp_ip.id).await?; + Err(e) + } + // Idempotent case: attach failed due to a caught UniqueViolation. + Ok(None) => { + self.deallocate_external_ip(opctx, temp_ip.id).await?; + let eip = self + .instance_lookup_ephemeral_ip(opctx, instance_id) + .await? + .ok_or_else(|| Error::internal_error( + "failed to lookup current ephemeral IP for idempotent attach" + ))?; + let do_saga = eip.state != IpAttachState::Attached; + Ok((eip, do_saga)) + } + Ok(Some(v)) => Ok(v), + } } /// Allocates an IP address for internal service usage. @@ -140,33 +211,34 @@ impl DataStore { opctx: &OpContext, project_id: Uuid, params: params::FloatingIpCreate, + pool: Option, ) -> CreateResult { let ip_id = Uuid::new_v4(); - // TODO: NameOrId resolution should happen a level higher, in the nexus function - let (.., authz_pool, pool) = match params.pool { - Some(NameOrId::Name(name)) => { - LookupPath::new(opctx, self) - .ip_pool_name(&Name(name)) - .fetch_for(authz::Action::Read) - .await? + // This implements the same pattern as in `allocate_instance_ephemeral_ip` to + // check that a chosen pool is valid from within the current silo. + let pool = match pool { + Some(authz_pool) => { + let (.., pool) = LookupPath::new(opctx, &self) + .ip_pool_id(authz_pool.id()) + .fetch_for(authz::Action::CreateChild) + .await?; + + if self.ip_pool_fetch_link(opctx, pool.id()).await.is_err() { + return Err(authz_pool.not_found()); + } + + pool } - Some(NameOrId::Id(id)) => { - LookupPath::new(opctx, self) - .ip_pool_id(id) - .fetch_for(authz::Action::Read) - .await? + // If no name given, use the default logic + None => { + let (.., pool) = self.ip_pools_fetch_default(&opctx).await?; + pool } - None => self.ip_pools_fetch_default(opctx).await?, }; let pool_id = pool.id(); - // If this pool is not linked to the current silo, 404 - if self.ip_pool_fetch_link(opctx, pool_id).await.is_err() { - return Err(authz_pool.not_found()); - } - let data = if let Some(ip) = params.address { IncompleteExternalIp::for_floating_explicit( ip_id, @@ -228,6 +300,7 @@ impl DataStore { ) } } + // Floating IP: name conflict DatabaseError(UniqueViolation, ..) if name.is_some() => { TransactionError::CustomError(public_error_from_diesel( e, @@ -299,7 +372,266 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } - /// Deallocate the external IP address with the provided ID. + /// Attempt to move a target external IP from detached to attaching, + /// checking that its parent instance does not have too many addresses + /// and is in a valid state. + /// + /// Returns the `ExternalIp` which was modified, where possible. This + /// is only nullable when trying to double-attach ephemeral IPs. + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + async fn begin_attach_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + kind: IpKind, + creating_instance: bool, + ) -> Result, Error> { + use db::schema::external_ip::dsl; + use db::schema::external_ip::table; + use db::schema::instance::dsl as inst_dsl; + use db::schema::instance::table as inst_table; + use diesel::result::DatabaseErrorKind::UniqueViolation; + use diesel::result::Error::DatabaseError; + + let safe_states = if creating_instance { + &SAFE_TO_ATTACH_INSTANCE_STATES_CREATING[..] + } else { + &SAFE_TO_ATTACH_INSTANCE_STATES[..] + }; + + let query = Instance::attach_resource( + instance_id, + ip_id, + inst_table + .into_boxed() + .filter(inst_dsl::state.eq_any(safe_states)) + .filter(inst_dsl::migration_id.is_null()), + table + .into_boxed() + .filter(dsl::state.eq(IpAttachState::Detached)) + .filter(dsl::kind.eq(kind)) + .filter(dsl::parent_id.is_null()), + MAX_EXTERNAL_IPS_PLUS_SNAT, + diesel::update(dsl::external_ip).set(( + dsl::parent_id.eq(Some(instance_id)), + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(IpAttachState::Attaching), + )), + ); + + let mut do_saga = true; + query.attach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(|(_, resource)| Some(resource)) + .or_else(|e: AttachError| match e { + AttachError::CollectionNotFound => { + Err(Error::not_found_by_id( + ResourceType::Instance, + &instance_id, + )) + }, + AttachError::ResourceNotFound => { + Err(if kind == IpKind::Ephemeral { + Error::internal_error("call-scoped ephemeral IP was lost") + } else { + Error::not_found_by_id( + ResourceType::FloatingIp, + &ip_id, + ) + }) + }, + AttachError::NoUpdate { attached_count, resource, collection } => { + match resource.state { + // Idempotent errors: is in progress or complete for same resource pair -- this is fine. + IpAttachState::Attaching if resource.parent_id == Some(instance_id) => + return Ok(Some(resource)), + IpAttachState::Attached if resource.parent_id == Some(instance_id) => { + do_saga = false; + return Ok(Some(resource)) + }, + IpAttachState::Attached => + return Err(Error::invalid_request(&format!( + "{kind} IP cannot be attached to one \ + instance while still attached to another" + ))), + // User can reattempt depending on how the current saga unfolds. + // NB; only floating IP can return this case, eph will return + // a UniqueViolation. + IpAttachState::Attaching | IpAttachState::Detaching + => return Err(Error::unavail(&format!( + "tried to attach {kind} IP mid-attach/detach: \ + attach will be safe to retry once operation on \ + same IP resource completes" + ))), + + IpAttachState::Detached => {}, + } + + if collection.runtime_state.migration_id.is_some() { + return Err(Error::unavail(&format!( + "tried to attach {kind} IP while instance was migrating: \ + detach will be safe to retry once migrate completes" + ))) + } + + Err(match &collection.runtime_state.nexus_state { + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) + => Error::unavail(&format!( + "tried to attach {kind} IP while instance was changing state: \ + attach will be safe to retry once start/stop completes" + )), + state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { + if attached_count >= MAX_EXTERNAL_IPS_PLUS_SNAT as i64 { + Error::invalid_request(&format!( + "an instance may not have more than \ + {MAX_EXTERNAL_IPS_PER_INSTANCE} external IP addresses", + )) + } else { + Error::internal_error(&format!("failed to attach {kind} IP")) + } + }, + state => Error::invalid_request(&format!( + "cannot attach {kind} IP to instance in {state} state" + )), + }) + }, + // This case occurs for both currently attaching and attached ephemeral IPs: + AttachError::DatabaseError(DatabaseError(UniqueViolation, ..)) + if kind == IpKind::Ephemeral => { + Ok(None) + }, + AttachError::DatabaseError(e) => { + Err(public_error_from_diesel(e, ErrorHandler::Server)) + }, + }) + .map(|eip| eip.map(|v| (v, do_saga))) + } + + /// Attempt to move a target external IP from attached to detaching, + /// checking that its parent instance is in a valid state. + /// + /// Returns the `ExternalIp` which was modified, where possible. This + /// is only nullable when trying to double-detach ephemeral IPs. + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + async fn begin_detach_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + kind: IpKind, + creating_instance: bool, + ) -> UpdateResult> { + use db::schema::external_ip::dsl; + use db::schema::external_ip::table; + use db::schema::instance::dsl as inst_dsl; + use db::schema::instance::table as inst_table; + + let safe_states = if creating_instance { + &SAFE_TO_ATTACH_INSTANCE_STATES_CREATING[..] + } else { + &SAFE_TO_ATTACH_INSTANCE_STATES[..] + }; + + let query = Instance::detach_resource( + instance_id, + ip_id, + inst_table + .into_boxed() + .filter(inst_dsl::state.eq_any(safe_states)) + .filter(inst_dsl::migration_id.is_null()), + table + .into_boxed() + .filter(dsl::state.eq(IpAttachState::Attached)) + .filter(dsl::kind.eq(kind)), + diesel::update(dsl::external_ip).set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(IpAttachState::Detaching), + )), + ); + + let mut do_saga = true; + query.detach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(Some) + .or_else(|e: DetachError| Err(match e { + DetachError::CollectionNotFound => { + Error::not_found_by_id( + ResourceType::Instance, + &instance_id, + ) + }, + DetachError::ResourceNotFound => { + if kind == IpKind::Ephemeral { + return Ok(None); + } else { + Error::not_found_by_id( + ResourceType::FloatingIp, + &ip_id, + ) + } + }, + DetachError::NoUpdate { resource, collection } => { + let parent_match = resource.parent_id == Some(instance_id); + match resource.state { + // Idempotent cases: already detached OR detaching from same instance. + IpAttachState::Detached => { + do_saga = false; + return Ok(Some(resource)) + }, + IpAttachState::Detaching if parent_match => return Ok(Some(resource)), + IpAttachState::Attached if !parent_match + => return Err(Error::invalid_request(&format!( + "{kind} IP is not attached to the target instance", + ))), + // User can reattempt depending on how the current saga unfolds. + IpAttachState::Attaching + | IpAttachState::Detaching => return Err(Error::unavail(&format!( + "tried to detach {kind} IP mid-attach/detach: \ + detach will be safe to retry once operation on \ + same IP resource completes" + ))), + IpAttachState::Attached => {}, + } + + if collection.runtime_state.migration_id.is_some() { + return Err(Error::unavail(&format!( + "tried to detach {kind} IP while instance was migrating: \ + detach will be safe to retry once migrate completes" + ))) + } + + match collection.runtime_state.nexus_state { + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) => Error::unavail(&format!( + "tried to attach {kind} IP while instance was changing state: \ + detach will be safe to retry once start/stop completes" + )), + state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { + Error::internal_error(&format!("failed to detach {kind} IP")) + }, + state => Error::invalid_request(&format!( + "cannot detach {kind} IP from instance in {state} state" + )), + } + }, + DetachError::DatabaseError(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + }, + + })) + .map(|eip| eip.map(|v| (v, do_saga))) + } + + /// Deallocate the external IP address with the provided ID. This is a complete + /// removal of the IP entry, in contrast with `begin_deallocate_ephemeral_ip`, + /// and should only be used for SNAT entries or cleanup of short-lived ephemeral + /// IPs on failure. /// /// To support idempotency, such as in saga operations, this method returns /// an extra boolean, rather than the usual `DeleteResult`. The meaning of @@ -329,7 +661,34 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Delete all external IP addresses associated with the provided instance + /// Moves an instance's ephemeral IP from 'Attached' to 'Detaching'. + /// + /// To support idempotency, this method will succeed if the instance + /// has no ephemeral IP or one is actively being removed. As a result, + /// information on an actual `ExternalIp` is best-effort. + pub async fn begin_deallocate_ephemeral_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + ) -> Result, Error> { + let _ = LookupPath::new(&opctx, self) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await?; + + self.begin_detach_ip( + opctx, + ip_id, + instance_id, + IpKind::Ephemeral, + false, + ) + .await + .map(|res| res.map(|(ip, _do_saga)| ip)) + } + + /// Delete all non-floating IP addresses associated with the provided instance /// ID. /// /// This method returns the number of records deleted, rather than the usual @@ -347,16 +706,22 @@ impl DataStore { .filter(dsl::is_service.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::kind.ne(IpKind::Floating)) - .set(dsl::time_deleted.eq(now)) + .set(( + dsl::time_deleted.eq(now), + dsl::state.eq(IpAttachState::Detached), + )) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Detach an individual Floating IP address from its parent instance. + /// Detach all Floating IP address from their parent instance. /// /// As in `deallocate_external_ip_by_instance_id`, this method returns the /// number of records altered, rather than an `UpdateResult`. + /// + /// This method ignores ongoing state transitions, and is only safely + /// usable from within the instance_delete saga. pub async fn detach_floating_ips_by_instance_id( &self, opctx: &OpContext, @@ -368,13 +733,18 @@ impl DataStore { .filter(dsl::is_service.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::kind.eq(IpKind::Floating)) - .set(dsl::parent_id.eq(Option::::None)) + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::parent_id.eq(Option::::None), + dsl::state.eq(IpAttachState::Detached), + )) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } /// Fetch all external IP addresses of any kind for the provided instance + /// in all attachment states. pub async fn instance_lookup_external_ips( &self, opctx: &OpContext, @@ -391,6 +761,20 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Fetch the ephmeral IP address assigned to the provided instance, if this + /// has been configured. + pub async fn instance_lookup_ephemeral_ip( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> LookupResult> { + Ok(self + .instance_lookup_external_ips(opctx, instance_id) + .await? + .into_iter() + .find(|v| v.kind == IpKind::Ephemeral)) + } + /// Fetch all Floating IP addresses for the provided project. pub async fn floating_ips_list( &self, @@ -425,26 +809,20 @@ impl DataStore { &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, ) -> DeleteResult { use db::schema::external_ip::dsl; - // Verify this FIP is not attached to any instances/services. - if db_fip.parent_id.is_some() { - return Err(Error::invalid_request( - "Floating IP cannot be deleted while attached to an instance", - )); - } - opctx.authorize(authz::Action::Delete, authz_fip).await?; let now = Utc::now(); - let updated_rows = diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) + let result = diesel::update(dsl::external_ip) + .filter(dsl::id.eq(authz_fip.id())) .filter(dsl::time_deleted.is_null()) .filter(dsl::parent_id.is_null()) + .filter(dsl::state.eq(IpAttachState::Detached)) .set(dsl::time_deleted.eq(now)) - .execute_async(&*self.pool_connection_authorized(opctx).await?) + .check_if_exists::(authz_fip.id()) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { public_error_from_diesel( @@ -453,103 +831,208 @@ impl DataStore { ) })?; - if updated_rows == 0 { - return Err(Error::invalid_request( - "deletion failed due to concurrent modification", - )); + match result.status { + // Verify this FIP is not attached to any instances/services. + UpdateStatus::NotUpdatedButExists if result.found.parent_id.is_some() => Err(Error::invalid_request( + "Floating IP cannot be deleted while attached to an instance", + )), + // Only remaining cause of `NotUpdated` is earlier soft-deletion. + // Return success in this case to maintain idempotency. + UpdateStatus::Updated | UpdateStatus::NotUpdatedButExists => Ok(()), } - Ok(()) } /// Attaches a Floating IP address to an instance. - pub async fn floating_ip_attach( + /// + /// This moves a floating IP into the 'attaching' state. Callers are + /// responsible for calling `external_ip_complete_op` to finalise the + /// IP in 'attached' state at saga completion. + /// + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + pub async fn floating_ip_begin_attach( &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, instance_id: Uuid, - ) -> UpdateResult { - use db::schema::external_ip::dsl; - - // Verify this FIP is not attached to any instances/services. - if db_fip.parent_id.is_some() { - return Err(Error::invalid_request( - "Floating IP cannot be attached to one instance while still attached to another", - )); - } - - let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + creating_instance: bool, + ) -> UpdateResult<(ExternalIp, bool)> { + let (.., authz_instance) = LookupPath::new(&opctx, self) .instance_id(instance_id) - .fetch_for(authz::Action::Modify) + .lookup_for(authz::Action::Modify) .await?; opctx.authorize(authz::Action::Modify, authz_fip).await?; opctx.authorize(authz::Action::Modify, &authz_instance).await?; - diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) - .filter(dsl::kind.eq(IpKind::Floating)) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.is_null()) - .set(( - dsl::parent_id.eq(Some(instance_id)), - dsl::time_modified.eq(Utc::now()), - )) - .returning(ExternalIp::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_fip), + self.begin_attach_ip( + opctx, + authz_fip.id(), + instance_id, + IpKind::Floating, + creating_instance, + ) + .await + .and_then(|v| { + v.ok_or_else(|| { + Error::internal_error( + "floating IP should never return `None` from begin_attach", ) }) - .and_then(|r| FloatingIp::try_from(r)) - .map_err(|e| Error::internal_error(&format!("{e}"))) + }) } /// Detaches a Floating IP address from an instance. - pub async fn floating_ip_detach( + /// + /// This moves a floating IP into the 'detaching' state. Callers are + /// responsible for calling `external_ip_complete_op` to finalise the + /// IP in 'detached' state at saga completion. + /// + /// To better handle idempotent detachment, this method returns an + /// additional bool: + /// - true: EIP was attached or detaching. proceed with saga. + /// - false: EIP was detached. No-op for remainder of saga. + pub async fn floating_ip_begin_detach( &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, - ) -> UpdateResult { - use db::schema::external_ip::dsl; - - let Some(instance_id) = db_fip.parent_id else { - return Err(Error::invalid_request( - "Floating IP is not attached to an instance", - )); - }; - - let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + instance_id: Uuid, + creating_instance: bool, + ) -> UpdateResult<(ExternalIp, bool)> { + let (.., authz_instance) = LookupPath::new(&opctx, self) .instance_id(instance_id) - .fetch_for(authz::Action::Modify) + .lookup_for(authz::Action::Modify) .await?; opctx.authorize(authz::Action::Modify, authz_fip).await?; opctx.authorize(authz::Action::Modify, &authz_instance).await?; - diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) - .filter(dsl::kind.eq(IpKind::Floating)) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.eq(instance_id)) - .set(( - dsl::parent_id.eq(Option::::None), - dsl::time_modified.eq(Utc::now()), - )) - .returning(ExternalIp::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_fip), + self.begin_detach_ip( + opctx, + authz_fip.id(), + instance_id, + IpKind::Floating, + creating_instance, + ) + .await + .and_then(|v| { + v.ok_or_else(|| { + Error::internal_error( + "floating IP should never return `None` from begin_detach", ) }) - .and_then(|r| FloatingIp::try_from(r)) - .map_err(|e| Error::internal_error(&format!("{e}"))) + }) + } + + /// Move an external IP from a transitional state (attaching, detaching) + /// to its intended end state. + /// + /// Returns the number of rows modified, this may be zero on: + /// - instance delete by another saga + /// - saga action rerun + /// + /// This is valid in both cases for idempotency. + pub async fn external_ip_complete_op( + &self, + opctx: &OpContext, + ip_id: Uuid, + ip_kind: IpKind, + expected_state: IpAttachState, + target_state: IpAttachState, + ) -> Result { + use db::schema::external_ip::dsl; + + if matches!( + expected_state, + IpAttachState::Attached | IpAttachState::Detached + ) { + return Err(Error::internal_error(&format!( + "{expected_state:?} is not a valid transition state for attach/detach" + ))); + } + + let part_out = diesel::update(dsl::external_ip) + .filter(dsl::id.eq(ip_id)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(expected_state)); + + let now = Utc::now(); + let conn = self.pool_connection_authorized(opctx).await?; + match (ip_kind, expected_state, target_state) { + (IpKind::SNat, _, _) => return Err(Error::internal_error( + "SNAT should not be removed via `external_ip_complete_op`, \ + use `deallocate_external_ip`", + )), + + (IpKind::Ephemeral, _, IpAttachState::Detached) => { + part_out + .set(( + dsl::parent_id.eq(Option::::None), + dsl::time_modified.eq(now), + dsl::time_deleted.eq(now), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + + (IpKind::Floating, _, IpAttachState::Detached) => { + part_out + .set(( + dsl::parent_id.eq(Option::::None), + dsl::time_modified.eq(now), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + + // Attaching->Attached gets separate logic because we choose to fail + // and unwind on instance delete. This covers two cases: + // - External IP is deleted. + // - Floating IP is suddenly `detached`. + (_, IpAttachState::Attaching, IpAttachState::Attached) => { + return part_out + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(target_state), + )) + .check_if_exists::(ip_id) + .execute_and_check( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + }) + .and_then(|r| match r.status { + UpdateStatus::Updated => Ok(1), + UpdateStatus::NotUpdatedButExists + if r.found.state == IpAttachState::Detached + || r.found.time_deleted.is_some() => + { + Err(Error::internal_error( + "unwinding due to concurrent instance delete", + )) + } + UpdateStatus::NotUpdatedButExists => Ok(0), + }) + } + + // Unwind from failed detach. + (_, _, IpAttachState::Attached) => { + part_out + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + _ => return Err(Error::internal_error("unreachable")), + } + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 188f5c30c9..c01f40e791 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -11,6 +11,7 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_detach_many::DatastoreDetachManyTarget; use crate::db::collection_detach_many::DetachManyError; +use crate::db::collection_detach_many::DetachManyFromCollectionStatement; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; @@ -28,6 +29,7 @@ use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::Disk; use nexus_db_model::VmmRuntimeState; use omicron_common::api; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -405,59 +407,63 @@ impl DataStore { let ok_to_detach_disk_state_labels: Vec<_> = ok_to_detach_disk_states.iter().map(|s| s.label()).collect(); - let _instance = Instance::detach_resources( - authz_instance.id(), - instance::table.into_boxed().filter( - instance::dsl::state - .eq_any(ok_to_delete_instance_states) - .and(instance::dsl::active_propolis_id.is_null()), - ), - disk::table.into_boxed().filter( - disk::dsl::disk_state.eq_any(ok_to_detach_disk_state_labels), - ), - diesel::update(instance::dsl::instance).set(( - instance::dsl::state.eq(destroyed), - instance::dsl::time_deleted.eq(Utc::now()), - )), - diesel::update(disk::dsl::disk).set(( - disk::dsl::disk_state.eq(detached_label), - disk::dsl::attach_instance_id.eq(Option::::None), - disk::dsl::slot.eq(Option::::None), - )), - ) - .detach_and_get_result_async( - &*self.pool_connection_authorized(opctx).await?, - ) - .await - .map_err(|e| match e { - DetachManyError::CollectionNotFound => Error::not_found_by_id( - ResourceType::Instance, - &authz_instance.id(), - ), - DetachManyError::NoUpdate { collection } => { - if collection.runtime_state.propolis_id.is_some() { - return Error::invalid_request( + let stmt: DetachManyFromCollectionStatement = + Instance::detach_resources( + authz_instance.id(), + instance::table.into_boxed().filter( + instance::dsl::state + .eq_any(ok_to_delete_instance_states) + .and(instance::dsl::active_propolis_id.is_null()), + ), + disk::table.into_boxed().filter( + disk::dsl::disk_state + .eq_any(ok_to_detach_disk_state_labels), + ), + diesel::update(instance::dsl::instance).set(( + instance::dsl::state.eq(destroyed), + instance::dsl::time_deleted.eq(Utc::now()), + )), + diesel::update(disk::dsl::disk).set(( + disk::dsl::disk_state.eq(detached_label), + disk::dsl::attach_instance_id.eq(Option::::None), + disk::dsl::slot.eq(Option::::None), + )), + ); + + let _instance = stmt + .detach_and_get_result_async( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| match e { + DetachManyError::CollectionNotFound => Error::not_found_by_id( + ResourceType::Instance, + &authz_instance.id(), + ), + DetachManyError::NoUpdate { collection } => { + if collection.runtime_state.propolis_id.is_some() { + return Error::invalid_request( "cannot delete instance: instance is running or has \ not yet fully stopped", ); - } - let instance_state = - collection.runtime_state.nexus_state.state(); - match instance_state { - api::external::InstanceState::Stopped - | api::external::InstanceState::Failed => { - Error::internal_error("cannot delete instance") } - _ => Error::invalid_request(&format!( - "instance cannot be deleted in state \"{}\"", - instance_state, - )), + let instance_state = + collection.runtime_state.nexus_state.state(); + match instance_state { + api::external::InstanceState::Stopped + | api::external::InstanceState::Failed => { + Error::internal_error("cannot delete instance") + } + _ => Error::invalid_request(&format!( + "instance cannot be deleted in state \"{}\"", + instance_state, + )), + } } - } - DetachManyError::DatabaseError(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - })?; + DetachManyError::DatabaseError(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs index a44fed4cdf..655a267fe1 100644 --- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -23,12 +23,14 @@ impl DataStore { &self, opctx: &OpContext, nat_entry: Ipv4NatValues, - ) -> CreateResult<()> { + ) -> CreateResult { use db::schema::ipv4_nat_entry::dsl; use diesel::sql_types; // Look up any NAT entries that already have the exact parameters // we're trying to INSERT. + // We want to return any existing entry, but not to mask the UniqueViolation + // when trying to use an existing IP + port range with a different target. let matching_entry_subquery = dsl::ipv4_nat_entry .filter(dsl::external_address.eq(nat_entry.external_address)) .filter(dsl::first_port.eq(nat_entry.first_port)) @@ -58,7 +60,7 @@ impl DataStore { )) .filter(diesel::dsl::not(diesel::dsl::exists(matching_entry_subquery))); - diesel::insert_into(dsl::ipv4_nat_entry) + let out = diesel::insert_into(dsl::ipv4_nat_entry) .values(new_entry_subquery) .into_columns(( dsl::external_address, @@ -68,11 +70,24 @@ impl DataStore { dsl::vni, dsl::mac, )) - .execute_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(()) + .returning(Ipv4NatEntry::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await; + + match out { + Ok(o) => Ok(o), + Err(diesel::result::Error::NotFound) => { + // Idempotent ensure. Annoyingly, we can't easily extract + // the existing row as part of the insert query: + // - (SELECT ..) UNION (INSERT INTO .. RETURNING ..) isn't + // allowed by crdb. + // - Can't ON CONFLICT with a partial constraint, so we can't + // do a no-op write and return the row that way either. + // So, we do another lookup. + self.ipv4_nat_find_by_values(opctx, nat_entry).await + } + Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), + } } pub async fn ipv4_nat_delete( diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index d61ff15a3d..5fd16e2633 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -408,6 +408,7 @@ mod test { use chrono::{Duration, Utc}; use futures::stream; use futures::StreamExt; + use nexus_db_model::IpAttachState; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; use omicron_common::api::external::DataPageParams; @@ -1625,7 +1626,8 @@ mod test { // Create a few records. let now = Utc::now(); let instance_id = Uuid::new_v4(); - let ips = (0..4) + let kinds = [IpKind::SNat, IpKind::Ephemeral]; + let ips = (0..2) .map(|i| ExternalIp { id: Uuid::new_v4(), name: None, @@ -1638,12 +1640,13 @@ mod test { project_id: None, is_service: false, parent_id: Some(instance_id), - kind: IpKind::Ephemeral, + kind: kinds[i as usize], ip: ipnetwork::IpNetwork::from(IpAddr::from(Ipv4Addr::new( 10, 0, 0, i, ))), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }) .collect::>(); diesel::insert_into(dsl::external_ip) @@ -1705,6 +1708,7 @@ mod test { ))), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }; diesel::insert_into(dsl::external_ip) .values(ip.clone()) @@ -1775,6 +1779,7 @@ mod test { ip: addresses.next().unwrap().into(), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }; // Combinations of NULL and non-NULL for: @@ -1782,6 +1787,7 @@ mod test { // - description // - parent (instance / service) UUID // - project UUID + // - attach state let names = [None, Some("foo")]; let descriptions = [None, Some("foo".to_string())]; let parent_ids = [None, Some(Uuid::new_v4())]; @@ -1822,6 +1828,12 @@ mod test { continue; } + let state = if parent_id.is_some() { + IpAttachState::Attached + } else { + IpAttachState::Detached + }; + let new_ip = ExternalIp { id: Uuid::new_v4(), name: name_local.clone(), @@ -1830,6 +1842,7 @@ mod test { is_service, parent_id: *parent_id, project_id: *project_id, + state, ..ip }; @@ -1902,6 +1915,11 @@ mod test { let name_local = name.map(|v| { db::model::Name(Name::try_from(v.to_string()).unwrap()) }); + let state = if parent_id.is_some() { + IpAttachState::Attached + } else { + IpAttachState::Detached + }; let new_ip = ExternalIp { id: Uuid::new_v4(), name: name_local, @@ -1911,6 +1929,7 @@ mod test { is_service, parent_id: *parent_id, project_id: *project_id, + state, ..ip }; let res = diesel::insert_into(dsl::external_ip) @@ -1918,9 +1937,10 @@ mod test { .execute_async(&*conn) .await; let ip_type = if is_service { "Service" } else { "Instance" }; + let null_snat_parent = parent_id.is_none() && kind == IpKind::SNat; if name.is_none() && description.is_none() - && parent_id.is_some() + && !null_snat_parent && project_id.is_none() { // Name/description must be NULL, instance ID cannot diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index 090c6865b7..e8ef721e98 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -47,6 +47,7 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "hw_rot_slot", "identity_type", "instance_state", + "ip_attach_state", "ip_kind", "ip_pool_resource_type", "network_interface_kind", diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 49403aac61..8114b9e363 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -26,10 +26,42 @@ use diesel::Column; use diesel::Expression; use diesel::QueryResult; use diesel::RunQueryDsl; +use nexus_db_model::InstanceState as DbInstanceState; +use nexus_db_model::IpAttachState; +use nexus_db_model::IpAttachStateEnum; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external; +use omicron_common::api::external::InstanceState as ApiInstanceState; use uuid::Uuid; +// Broadly, we want users to be able to attach/detach at will +// once an instance is created and functional. +pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = [ + DbInstanceState(ApiInstanceState::Stopped), + DbInstanceState(ApiInstanceState::Running), + DbInstanceState(ApiInstanceState::Creating), +]; +pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = [ + DbInstanceState(ApiInstanceState::Stopped), + DbInstanceState(ApiInstanceState::Running), +]; +// If we're in a state which will naturally resolve to either +// stopped/running, we want users to know that the request can be +// retried safely via Error::unavail. +// TODO: We currently stop if there's a migration or other state change. +// There may be a good case for RPWing +// external_ip_state -> { NAT RPW, sled-agent } in future. +pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbInstanceState; 5] = [ + DbInstanceState(ApiInstanceState::Starting), + DbInstanceState(ApiInstanceState::Stopping), + DbInstanceState(ApiInstanceState::Creating), + DbInstanceState(ApiInstanceState::Rebooting), + DbInstanceState(ApiInstanceState::Migrating), +]; + +/// The maximum number of disks that can be attached to an instance. +pub const MAX_EXTERNAL_IPS_PER_INSTANCE: u32 = 32; + type FromClause = diesel::internal::table_macro::StaticQueryFragmentInstance; type IpPoolRangeFromClause = FromClause; @@ -99,7 +131,8 @@ const MAX_PORT: u16 = u16::MAX; /// candidate_ip AS ip, /// CAST(candidate_first_port AS INT4) AS first_port, /// CAST(candidate_last_port AS INT4) AS last_port, -/// AS project_id +/// AS project_id, +/// AS state /// FROM /// SELECT * FROM ( /// -- Select all IP addresses by pool and range. @@ -378,6 +411,14 @@ impl NextExternalIp { out.push_bind_param::, Option>(self.ip.project_id())?; out.push_sql(" AS "); out.push_identifier(dsl::project_id::NAME)?; + out.push_sql(", "); + + // Initial state, mainly needed by Ephemeral/Floating IPs. + out.push_bind_param::( + self.ip.state(), + )?; + out.push_sql(" AS "); + out.push_identifier(dsl::state::NAME)?; out.push_sql(" FROM ("); self.push_address_sequence_subquery(out.reborrow())?; @@ -822,10 +863,12 @@ impl RunQueryDsl for NextExternalIp {} #[cfg(test)] mod tests { + use crate::authz; use crate::context::OpContext; use crate::db::datastore::DataStore; use crate::db::datastore::SERVICE_IP_POOL_NAME; use crate::db::identity::Resource; + use crate::db::lookup::LookupPath; use crate::db::model::IpKind; use crate::db::model::IpPool; use crate::db::model::IpPoolRange; @@ -833,9 +876,13 @@ mod tests { use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use dropshot::test_util::LogContext; + use nexus_db_model::ByteCount; + use nexus_db_model::Instance; + use nexus_db_model::InstanceCpuCount; use nexus_db_model::IpPoolResource; use nexus_db_model::IpPoolResourceType; use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::params::InstanceCreate; use nexus_types::external_api::shared::IpRange; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::Error; @@ -878,7 +925,7 @@ mod tests { name: &str, range: IpRange, is_default: bool, - ) { + ) -> authz::IpPool { let pool = IpPool::new(&IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("ip pool {}", name), @@ -902,6 +949,13 @@ mod tests { .expect("Failed to associate IP pool with silo"); self.initialize_ip_pool(name, range).await; + + LookupPath::new(&self.opctx, &self.db_datastore) + .ip_pool_id(pool.id()) + .lookup_for(authz::Action::Read) + .await + .unwrap() + .0 } async fn initialize_ip_pool(&self, name: &str, range: IpRange) { @@ -937,6 +991,37 @@ mod tests { .expect("Failed to create IP Pool range"); } + async fn create_instance(&self, name: &str) -> Uuid { + let instance_id = Uuid::new_v4(); + let project_id = Uuid::new_v4(); + let instance = Instance::new(instance_id, project_id, &InstanceCreate { + identity: IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("instance {}", name) }, + ncpus: InstanceCpuCount(omicron_common::api::external::InstanceCpuCount(1)).into(), + memory: ByteCount(omicron_common::api::external::ByteCount::from_gibibytes_u32(1)).into(), + hostname: "test".into(), + user_data: vec![], + network_interfaces: Default::default(), + external_ips: vec![], + disks: vec![], + start: false, + }); + + let conn = self + .db_datastore + .pool_connection_authorized(&self.opctx) + .await + .unwrap(); + + use crate::db::schema::instance::dsl as instance_dsl; + diesel::insert_into(instance_dsl::instance) + .values(instance.clone()) + .execute_async(&*conn) + .await + .expect("Failed to create Instance"); + + instance_id + } + async fn default_pool_id(&self) -> Uuid { let (.., pool) = self .db_datastore @@ -1021,7 +1106,7 @@ mod tests { // Allocate an Ephemeral IP, which should take the entire port range of // the only address in the pool. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("for-eph").await; let ephemeral_ip = context .db_datastore .allocate_instance_ephemeral_ip( @@ -1029,16 +1114,18 @@ mod tests { Uuid::new_v4(), instance_id, /* pool_name = */ None, + true, ) .await - .expect("Failed to allocate Ephemeral IP when there is space"); + .expect("Failed to allocate Ephemeral IP when there is space") + .0; assert_eq!(ephemeral_ip.ip.ip(), range.last_address()); assert_eq!(ephemeral_ip.first_port.0, 0); assert_eq!(ephemeral_ip.last_port.0, super::MAX_PORT); // At this point, we should be able to allocate neither a new Ephemeral // nor any SNAT IPs. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("for-snat").await; let res = context .db_datastore .allocate_instance_snat_ip( @@ -1069,6 +1156,7 @@ mod tests { Uuid::new_v4(), instance_id, /* pool_name = */ None, + true, ) .await; assert!( @@ -1203,7 +1291,7 @@ mod tests { .unwrap(); context.create_ip_pool("default", range, true).await; - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("all-the-ports").await; let id = Uuid::new_v4(); let pool_name = None; @@ -1214,9 +1302,11 @@ mod tests { id, instance_id, pool_name, + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; assert_eq!(ip.kind, IpKind::Ephemeral); assert_eq!(ip.ip.ip(), range.first_address()); assert_eq!(ip.first_port.0, 0); @@ -1729,13 +1819,12 @@ mod tests { Ipv4Addr::new(10, 0, 0, 6), )) .unwrap(); - context.create_ip_pool("p1", second_range, false).await; + let p1 = context.create_ip_pool("p1", second_range, false).await; // Allocating an address on an instance in the second pool should be // respected, even though there are IPs available in the first. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("test").await; let id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); let ip = context .db_datastore @@ -1743,10 +1832,12 @@ mod tests { &context.opctx, id, instance_id, - pool_name, + Some(p1), + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; assert_eq!(ip.kind, IpKind::Ephemeral); assert_eq!(ip.ip.ip(), second_range.first_address()); assert_eq!(ip.first_port.0, 0); @@ -1772,24 +1863,26 @@ mod tests { let last_address = Ipv4Addr::new(10, 0, 0, 6); let second_range = IpRange::try_from((first_address, last_address)).unwrap(); - context.create_ip_pool("p1", second_range, false).await; + let p1 = context.create_ip_pool("p1", second_range, false).await; // Allocate all available addresses in the second pool. - let instance_id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); let first_octet = first_address.octets()[3]; let last_octet = last_address.octets()[3]; for octet in first_octet..=last_octet { + let instance_id = + context.create_instance(&format!("o{octet}")).await; let ip = context .db_datastore .allocate_instance_ephemeral_ip( &context.opctx, Uuid::new_v4(), instance_id, - pool_name.clone(), + Some(p1.clone()), + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; println!("{ip:#?}"); if let IpAddr::V4(addr) = ip.ip.ip() { assert_eq!(addr.octets()[3], octet); @@ -1799,13 +1892,15 @@ mod tests { } // Allocating another address should _fail_, and not use the first pool. + let instance_id = context.create_instance("final").await; context .db_datastore .allocate_instance_ephemeral_ip( &context.opctx, Uuid::new_v4(), instance_id, - pool_name, + Some(p1), + true, ) .await .expect_err("Should not use IP addresses from a different pool"); diff --git a/nexus/src/app/external_ip.rs b/nexus/src/app/external_ip.rs index 404f597288..45b05fbb0b 100644 --- a/nexus/src/app/external_ip.rs +++ b/nexus/src/app/external_ip.rs @@ -4,14 +4,18 @@ //! External IP addresses for instances +use std::sync::Arc; + use crate::external_api::views::ExternalIp; use crate::external_api::views::FloatingIp; +use nexus_db_model::IpAttachState; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::IpKind; use nexus_types::external_api::params; +use nexus_types::external_api::views; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; @@ -19,6 +23,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::UpdateResult; impl super::Nexus { pub(crate) async fn instance_list_external_ips( @@ -34,7 +39,9 @@ impl super::Nexus { .await? .into_iter() .filter_map(|ip| { - if ip.kind == IpKind::SNat { + if ip.kind == IpKind::SNat + || ip.state != IpAttachState::Attached + { None } else { Some(ip.try_into().unwrap()) @@ -102,9 +109,19 @@ impl super::Nexus { let (.., authz_project) = project_lookup.lookup_for(authz::Action::CreateChild).await?; + let pool = match ¶ms.pool { + Some(pool) => Some( + self.ip_pool_lookup(opctx, pool)? + .lookup_for(authz::Action::Read) + .await? + .0, + ), + None => None, + }; + Ok(self .db_datastore - .allocate_floating_ip(opctx, authz_project.id(), params) + .allocate_floating_ip(opctx, authz_project.id(), params, pool) .await? .try_into() .unwrap()) @@ -115,9 +132,68 @@ impl super::Nexus { opctx: &OpContext, ip_lookup: lookup::FloatingIp<'_>, ) -> DeleteResult { + let (.., authz_fip) = + ip_lookup.lookup_for(authz::Action::Delete).await?; + + self.db_datastore.floating_ip_delete(opctx, &authz_fip).await + } + + pub(crate) async fn floating_ip_attach( + self: &Arc, + opctx: &OpContext, + fip_selector: params::FloatingIpSelector, + target: params::FloatingIpAttach, + ) -> UpdateResult { + match target.kind { + params::FloatingIpParentKind::Instance => { + let instance_selector = params::InstanceSelector { + project: fip_selector.project, + instance: target.parent, + }; + let instance = + self.instance_lookup(opctx, instance_selector)?; + let attach_params = ¶ms::ExternalIpCreate::Floating { + floating_ip: fip_selector.floating_ip, + }; + self.instance_attach_external_ip( + opctx, + &instance, + attach_params, + ) + .await + .and_then(FloatingIp::try_from) + } + } + } + + pub(crate) async fn floating_ip_detach( + self: &Arc, + opctx: &OpContext, + ip_lookup: lookup::FloatingIp<'_>, + ) -> UpdateResult { + // XXX: Today, this only happens for instances. + // In future, we will need to separate out by the *type* of + // parent attached to a floating IP. We don't yet store this + // in db for user-facing FIPs (is_service => internal-only + // at this point). let (.., authz_fip, db_fip) = - ip_lookup.fetch_for(authz::Action::Delete).await?; + ip_lookup.fetch_for(authz::Action::Modify).await?; + + let Some(parent_id) = db_fip.parent_id else { + return Ok(db_fip.into()); + }; + + let instance_selector = params::InstanceSelector { + project: None, + instance: parent_id.into(), + }; + let instance = self.instance_lookup(opctx, instance_selector)?; + let attach_params = ¶ms::ExternalIpDetach::Floating { + floating_ip: authz_fip.id().into(), + }; - self.db_datastore.floating_ip_delete(opctx, &authz_fip, &db_fip).await + self.instance_detach_external_ip(opctx, &instance, attach_params) + .await + .and_then(FloatingIp::try_from) } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 778c5e2fe1..f924653525 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -17,6 +17,7 @@ use crate::external_api::params; use cancel_safe_futures::prelude::*; use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; +use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; use nexus_db_queries::authn; use nexus_db_queries::authz; @@ -26,6 +27,7 @@ use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; use omicron_common::address::PROPOLIS_PORT; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::ByteCount; @@ -1052,6 +1054,15 @@ impl super::Nexus { )); } + // If there are any external IPs not yet fully attached/detached,then + // there are attach/detach sagas in progress. That should complete in + // its own time, so return a 503 to indicate a possible retry. + if external_ips.iter().any(|v| v.state != IpAttachState::Attached) { + return Err(Error::unavail( + "External IP attach/detach is in progress during instance_ensure_registered" + )); + } + // Partition remaining external IPs by class: we can have at most // one ephemeral ip. let (ephemeral_ips, floating_ips): (Vec<_>, Vec<_>) = external_ips @@ -1904,6 +1915,73 @@ impl super::Nexus { Ok(()) } + + /// Attach an external IP to an instance. + pub(crate) async fn instance_attach_external_ip( + self: &Arc, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + ext_ip: ¶ms::ExternalIpCreate, + ) -> UpdateResult { + let (.., authz_project, authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let saga_params = sagas::instance_ip_attach::Params { + create_params: ext_ip.clone(), + authz_instance, + project_id: authz_project.id(), + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + }; + + let saga_outputs = self + .execute_saga::( + saga_params, + ) + .await?; + + saga_outputs + .lookup_node_output::("output") + .map_err(|e| Error::internal_error(&format!("{:#}", &e))) + .internal_context("looking up output from ip attach saga") + } + + /// Detach an external IP from an instance. + pub(crate) async fn instance_detach_external_ip( + self: &Arc, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + ext_ip: ¶ms::ExternalIpDetach, + ) -> UpdateResult { + let (.., authz_project, authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let saga_params = sagas::instance_ip_detach::Params { + delete_params: ext_ip.clone(), + authz_instance, + project_id: authz_project.id(), + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + }; + + let saga_outputs = self + .execute_saga::( + saga_params, + ) + .await?; + + saga_outputs + .lookup_node_output::>("output") + .map_err(|e| Error::internal_error(&format!("{:#}", &e))) + .internal_context("looking up output from ip detach saga") + .and_then(|eip| { + // Saga idempotency means we'll get Ok(None) on double detach + // of an ephemeral IP. Convert this case to an error here. + eip.ok_or_else(|| { + Error::invalid_request( + "instance does not have an ephemeral IP attached", + ) + }) + }) + } } #[cfg(test)] diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 8f97642c88..c0bc5d237b 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -7,6 +7,9 @@ use crate::app::sagas::retry_until_known_result; use ipnetwork::IpNetwork; use ipnetwork::Ipv6Network; +use nexus_db_model::ExternalIp; +use nexus_db_model::IpAttachState; +use nexus_db_model::Ipv4NatEntry; use nexus_db_model::Ipv4NatValues; use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; @@ -24,7 +27,6 @@ use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use std::collections::HashSet; use std::str::FromStr; -use std::sync::Arc; use uuid::Uuid; impl super::Nexus { @@ -276,6 +278,10 @@ impl super::Nexus { /// Ensures that the Dendrite configuration for the supplied instance is /// up-to-date. /// + /// Returns a list of live NAT RPW table entries from this call. Generally + /// these should only be needed for specific unwind operations, like in + /// the IP attach saga. + /// /// # Parameters /// /// - `opctx`: An operation context that grants read and list-children @@ -283,22 +289,21 @@ impl super::Nexus { /// - `instance_id`: The ID of the instance to act on. /// - `sled_ip_address`: The internal IP address assigned to the sled's /// sled agent. - /// - `ip_index_filter`: An optional filter on the index into the instance's + /// - `ip_filter`: An optional filter on the index into the instance's /// external IP array. - /// - If this is `Some(n)`, this routine configures DPD state for only the - /// Nth external IP in the collection returned from CRDB. The caller is - /// responsible for ensuring that the IP collection has stable indices - /// when making this call. + /// - If this is `Some(id)`, this routine configures DPD state for only the + /// external IP with `id` in the collection returned from CRDB. This will + /// proceed even when the target IP is 'attaching'. /// - If this is `None`, this routine configures DPD for all external - /// IPs. + /// IPs and *will back out* if any IPs are not yet fully attached to + /// the instance. pub(crate) async fn instance_ensure_dpd_config( &self, opctx: &OpContext, instance_id: Uuid, sled_ip_address: &std::net::SocketAddrV6, - ip_index_filter: Option, - dpd_client: &Arc, - ) -> Result<(), Error> { + ip_filter: Option, + ) -> Result, Error> { let log = &self.log; info!(log, "looking up instance's primary network interface"; @@ -309,6 +314,9 @@ impl super::Nexus { .lookup_for(authz::Action::ListChildren) .await?; + // XXX: Need to abstract over v6 and v4 entries here. + let mut nat_entries = vec![]; + // All external IPs map to the primary network interface, so find that // interface. If there is no such interface, there's no way to route // traffic destined to those IPs, so there's nothing to configure and @@ -324,7 +332,7 @@ impl super::Nexus { None => { info!(log, "Instance has no primary network interface"; "instance_id" => %instance_id); - return Ok(()); + return Ok(nat_entries); } }; @@ -344,49 +352,104 @@ impl super::Nexus { .instance_lookup_external_ips(&opctx, instance_id) .await?; - if let Some(wanted_index) = ip_index_filter { - if let None = ips.get(wanted_index) { + let (ips_of_interest, must_all_be_attached) = if let Some(wanted_id) = + ip_filter + { + if let Some(ip) = ips.iter().find(|v| v.id == wanted_id) { + (std::slice::from_ref(ip), false) + } else { return Err(Error::internal_error(&format!( - "failed to find external ip address at index: {}", - wanted_index + "failed to find external ip address with id: {wanted_id}, saw {ips:?}", ))); } + } else { + (&ips[..], true) + }; + + // This is performed so that an IP attach/detach will block the + // instance_start saga. Return service unavailable to indicate + // the request is retryable. + if must_all_be_attached + && ips_of_interest + .iter() + .any(|ip| ip.state != IpAttachState::Attached) + { + return Err(Error::unavail( + "cannot push all DPD state: IP attach/detach in progress", + )); } let sled_address = Ipv6Net(Ipv6Network::new(*sled_ip_address.ip(), 128).unwrap()); - for target_ip in ips - .iter() - .enumerate() - .filter(|(index, _)| { - if let Some(wanted_index) = ip_index_filter { - *index == wanted_index - } else { - true - } - }) - .map(|(_, ip)| ip) - { + // If all of our IPs are attached or are guaranteed to be owned + // by the saga calling this fn, then we need to disregard and + // remove conflicting rows. No other instance/service should be + // using these as its own, and we are dealing with detritus, e.g., + // the case where we have a concurrent stop -> detach followed + // by an attach to another instance, or other ongoing attach saga + // cleanup. + let mut err_and_limit = None; + for (i, external_ip) in ips_of_interest.iter().enumerate() { // For each external ip, add a nat entry to the database - self.ensure_nat_entry( - target_ip, - sled_address, - &network_interface, - mac_address, - opctx, - ) - .await?; + if let Ok(id) = self + .ensure_nat_entry( + external_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await + { + nat_entries.push(id); + continue; + } + + // We seem to be blocked by a bad row -- take it out and retry. + // This will return Ok() for a non-existent row. + if let Err(e) = self + .external_ip_delete_dpd_config_inner(opctx, external_ip) + .await + { + err_and_limit = Some((e, i)); + break; + }; + + match self + .ensure_nat_entry( + external_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await + { + Ok(id) => nat_entries.push(id), + Err(e) => { + err_and_limit = Some((e, i)); + break; + } + } } - // Notify dendrite that there are changes for it to reconcile. - // In the event of a failure to notify dendrite, we'll log an error - // and rely on dendrite's RPW timer to catch it up. - if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { - error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); - }; + // In the event of an unresolvable failure, we need to remove + // the entries we just added because the undo won't call into + // `instance_delete_dpd_config`. These entries won't stop a + // future caller, but it's better not to pollute switch state. + if let Some((e, max)) = err_and_limit { + for external_ip in &ips_of_interest[..max] { + let _ = self + .external_ip_delete_dpd_config_inner(opctx, external_ip) + .await; + } + return Err(e); + } - Ok(()) + self.notify_dendrite_nat_state(Some(instance_id), true).await?; + + Ok(nat_entries) } async fn ensure_nat_entry( @@ -396,7 +459,7 @@ impl super::Nexus { network_interface: &sled_agent_client::types::NetworkInterface, mac_address: macaddr::MacAddr6, opctx: &OpContext, - ) -> Result<(), Error> { + ) -> Result { match target_ip.ip { IpNetwork::V4(v4net) => { let nat_entry = Ipv4NatValues { @@ -409,9 +472,10 @@ impl super::Nexus { omicron_common::api::external::MacAddr(mac_address), ), }; - self.db_datastore + Ok(self + .db_datastore .ensure_ipv4_nat_entry(opctx, nat_entry) - .await?; + .await?) } IpNetwork::V6(_v6net) => { // TODO: implement handling of v6 nat. @@ -419,13 +483,16 @@ impl super::Nexus { internal_message: "ipv6 nat is not yet implemented".into(), }); } - }; - Ok(()) + } } /// Attempts to delete all of the Dendrite NAT configuration for the /// instance identified by `authz_instance`. /// + /// Unlike `instance_ensure_dpd_config`, this function will disregard the + /// attachment states of any external IPs because likely callers (instance + /// delete) cannot be piecewise undone. + /// /// # Return value /// /// - `Ok(())` if all NAT entries were successfully deleted. @@ -435,6 +502,12 @@ impl super::Nexus { /// - If an operation fails while this routine is walking NAT entries, it /// will continue trying to delete subsequent entries but will return the /// first error it encountered. + /// - `ip_filter`: An optional filter on the index into the instance's + /// external IP array. + /// - If this is `Some(id)`, this routine configures DPD state for only the + /// external IP with `id` in the collection returned from CRDB. + /// - If this is `None`, this routine configures DPD for all external + /// IPs. pub(crate) async fn instance_delete_dpd_config( &self, opctx: &OpContext, @@ -451,37 +524,122 @@ impl super::Nexus { .instance_lookup_external_ips(opctx, instance_id) .await?; - let mut errors = vec![]; for entry in external_ips { - // Soft delete the NAT entry - match self - .db_datastore - .ipv4_nat_delete_by_external_ip(&opctx, &entry) - .await - { - Ok(_) => Ok(()), - Err(err) => match err { - Error::ObjectNotFound { .. } => { - warn!(log, "no matching nat entries to soft delete"); - Ok(()) - } - _ => { - let message = format!( - "failed to delete nat entry due to error: {err:?}" - ); - error!(log, "{}", message); - Err(Error::internal_error(&message)) - } - }, - }?; + self.external_ip_delete_dpd_config_inner(opctx, &entry).await?; } + self.notify_dendrite_nat_state(Some(instance_id), false).await + } + + /// Attempts to delete Dendrite NAT configuration for a single external IP. + /// + /// This function is primarily used to detach an IP which currently belongs + /// to a known instance. + pub(crate) async fn external_ip_delete_dpd_config( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> Result<(), Error> { + let log = &self.log; + let instance_id = external_ip.parent_id; + + info!(log, "deleting individual NAT entry from dpd configuration"; + "instance_id" => ?instance_id, + "external_ip" => %external_ip.ip); + + self.external_ip_delete_dpd_config_inner(opctx, external_ip).await?; + + self.notify_dendrite_nat_state(instance_id, false).await + } + + /// Attempts to soft-delete Dendrite NAT configuration for a specific entry + /// via ID. + /// + /// This function is needed to safely cleanup in at least one unwind scenario + /// where a potential second user could need to use the same (IP, portset) pair, + /// e.g. a rapid reattach or a reallocated ephemeral IP. + pub(crate) async fn delete_dpd_config_by_entry( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> Result<(), Error> { + let log = &self.log; + + info!(log, "deleting individual NAT entry from dpd configuration"; + "id" => ?nat_entry.id, + "version_added" => %nat_entry.external_address.0); + + match self.db_datastore.ipv4_nat_delete(&opctx, nat_entry).await { + Ok(_) => {} + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + return Err(Error::internal_error(&message)); + } + }, + } + + self.notify_dendrite_nat_state(None, false).await + } + + /// Soft-delete an individual external IP from the NAT RPW, without + /// triggering a Dendrite notification. + async fn external_ip_delete_dpd_config_inner( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> Result<(), Error> { + let log = &self.log; + + // Soft delete the NAT entry + match self + .db_datastore + .ipv4_nat_delete_by_external_ip(&opctx, external_ip) + .await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + Ok(()) + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + Err(Error::internal_error(&message)) + } + }, + } + } + + /// Informs all available boundary switches that the set of NAT entries + /// has changed. + /// + /// When `fail_fast` is set, this function will return on any error when + /// acquiring a handle to a DPD client. Otherwise, it will attempt to notify + /// all clients and then finally return the first error. + async fn notify_dendrite_nat_state( + &self, + instance_id: Option, + fail_fast: bool, + ) -> Result<(), Error> { + // Querying boundary switches also requires fleet access and the use of the + // instance allocator context. let boundary_switches = self.boundary_switches(&self.opctx_alloc).await?; + let mut errors = vec![]; for switch in &boundary_switches { debug!(&self.log, "notifying dendrite of updates"; - "instance_id" => %authz_instance.id(), + "instance_id" => ?instance_id, "switch" => switch.to_string()); let client_result = self.dpd_clients.get(switch).ok_or_else(|| { @@ -494,7 +652,11 @@ impl super::Nexus { Ok(client) => client, Err(new_error) => { errors.push(new_error); - continue; + if fail_fast { + break; + } else { + continue; + } } }; @@ -506,7 +668,7 @@ impl super::Nexus { }; } - if let Some(e) = errors.into_iter().nth(0) { + if let Some(e) = errors.into_iter().next() { return Err(e); } @@ -525,58 +687,9 @@ impl super::Nexus { ) -> Result<(), Error> { self.delete_instance_v2p_mappings(opctx, authz_instance.id()).await?; - let external_ips = self - .datastore() - .instance_lookup_external_ips(opctx, authz_instance.id()) - .await?; - - let boundary_switches = self.boundary_switches(opctx).await?; - for external_ip in external_ips { - match self - .db_datastore - .ipv4_nat_delete_by_external_ip(&opctx, &external_ip) - .await - { - Ok(_) => Ok(()), - Err(err) => match err { - Error::ObjectNotFound { .. } => { - warn!( - self.log, - "no matching nat entries to soft delete" - ); - Ok(()) - } - _ => { - let message = format!( - "failed to delete nat entry due to error: {err:?}" - ); - error!(self.log, "{}", message); - Err(Error::internal_error(&message)) - } - }, - }?; - } - - for switch in &boundary_switches { - debug!(&self.log, "notifying dendrite of updates"; - "instance_id" => %authz_instance.id(), - "switch" => switch.to_string()); - - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - )) - })?; + self.instance_delete_dpd_config(opctx, authz_instance).await?; - // Notify dendrite that there are changes for it to reconcile. - // In the event of a failure to notify dendrite, we'll log an error - // and rely on dendrite's RPW timer to catch it up. - if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { - error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); - }; - } - - Ok(()) + self.notify_dendrite_nat_state(Some(authz_instance.id()), true).await } /// Given old and new instance runtime states, determines the desired @@ -715,24 +828,13 @@ impl super::Nexus { .fetch() .await?; - let boundary_switches = - self.boundary_switches(&self.opctx_alloc).await?; - - for switch in &boundary_switches { - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "could not find dpd client for {switch}" - )) - })?; - self.instance_ensure_dpd_config( - opctx, - instance_id, - &sled.address(), - None, - dpd_client, - ) - .await?; - } + self.instance_ensure_dpd_config( + opctx, + instance_id, + &sled.address(), + None, + ) + .await?; Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 80bfd5ef22..d643969924 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -87,7 +87,9 @@ pub(crate) const MAX_NICS_PER_INSTANCE: usize = 8; // The value here is arbitrary, but we need *a* limit for the instance // create saga to have a bounded DAG. We might want to only enforce // this during instance create (rather than live attach) in future. -pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = 32; +pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = + nexus_db_queries::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE + as usize; pub(crate) const MAX_EPHEMERAL_IPS_PER_INSTANCE: usize = 1; pub const MAX_VCPU_PER_INSTANCE: u16 = 64; diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 8f9197b03b..445abd5daf 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -8,12 +8,22 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::Nexus; use chrono::Utc; -use nexus_db_model::{ByteCount, SledReservationConstraints, SledResource}; -use nexus_db_queries::{context::OpContext, db, db::DataStore}; +use nexus_db_model::{ + ByteCount, ExternalIp, IpAttachState, Ipv4NatEntry, + SledReservationConstraints, SledResource, +}; +use nexus_db_queries::authz; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; +use nexus_db_queries::{authn, context::OpContext, db, db::DataStore}; +use omicron_common::api::external::Error; use omicron_common::api::external::InstanceState; +use serde::{Deserialize, Serialize}; use steno::ActionError; use uuid::Uuid; +use super::NexusActionContext; + /// Reserves resources for a new VMM whose instance has `ncpus` guest logical /// processors and `guest_memory` bytes of guest RAM. The selected sled is /// random within the set of sleds allowed by the supplied `constraints`. @@ -133,3 +143,325 @@ pub(super) async fn allocate_vmm_ipv6( .await .map_err(ActionError::action_failed) } + +/// External IP state needed for IP attach/detachment. +/// +/// This holds a record of the mid-processing external IP, where possible. +/// there are cases where this might not be known (e.g., double detach of an +/// ephemeral IP). +/// In particular we need to explicitly no-op if not `do_saga`, to prevent +/// failures borne from instance state changes from knocking out a valid IP binding. +#[derive(Debug, Deserialize, Serialize)] +pub struct ModifyStateForExternalIp { + pub external_ip: Option, + pub do_saga: bool, +} + +/// Move an external IP from one state to another as a saga operation, +/// returning `Ok(true)` if the record was successfully moved and `Ok(false)` +/// if the record was lost. +/// +/// Returns `Err` if given an illegal state transition or several rows +/// were updated, which are programmer errors. +pub async fn instance_ip_move_state( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + from: IpAttachState, + to: IpAttachState, + new_ip: &ModifyStateForExternalIp, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + if !new_ip.do_saga { + return Ok(true); + } + let Some(new_ip) = new_ip.external_ip.as_ref() else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + match datastore + .external_ip_complete_op(&opctx, new_ip.id, new_ip.kind, from, to) + .await + .map_err(ActionError::action_failed)? + { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(ActionError::action_failed(Error::internal_error( + "ip state change affected > 1 row", + ))), + } +} + +pub async fn instance_ip_get_instance_state( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + authz_instance: &authz::Instance, + verb: &str, +) -> Result, ActionError> { + // XXX: we can get instance state (but not sled ID) in same transaction + // as attach (but not detach) wth current design. We need to re-query + // for sled ID anyhow, so keep consistent between attach/detach. + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + let inst_and_vmm = datastore + .instance_fetch_with_vmm(&opctx, authz_instance) + .await + .map_err(ActionError::action_failed)?; + + let found_state = inst_and_vmm.instance().runtime_state.nexus_state.0; + let mut sled_id = inst_and_vmm.sled_id(); + + // Arriving here means we started in a correct state (running/stopped). + // We need to consider how we interact with the other sagas/ops: + // - starting: our claim on an IP will block it from moving past + // DPD_ensure and instance_start will undo. If we complete + // before then, it can move past and will fill in routes/opte. + // Act as though we have no sled_id. + // - stopping: this is not sagaized, and the propolis/sled-agent might + // go away. Act as though stopped if we catch it here, + // otherwise convert OPTE ensure to 'service unavailable' + // and undo. + // - deleting: can only be called from stopped -- we won't push to dpd + // or sled-agent, and IP record might be deleted or forcibly + // detached. Catch here just in case. + match found_state { + InstanceState::Stopped + | InstanceState::Starting + | InstanceState::Stopping => { + sled_id = None; + } + InstanceState::Running => {} + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state.into()) => { + return Err(ActionError::action_failed(Error::unavail(&format!( + "can't {verb} in transient state {state}" + )))) + } + InstanceState::Destroyed => { + return Err(ActionError::action_failed(Error::not_found_by_id( + omicron_common::api::external::ResourceType::Instance, + &authz_instance.id(), + ))) + } + // Final cases are repairing/failed. + _ => { + return Err(ActionError::action_failed(Error::invalid_request( + "cannot modify instance IPs, instance is in unhealthy state", + ))) + } + } + + Ok(sled_id) +} + +/// Adds a NAT entry to DPD, routing packets bound for `target_ip` to a +/// target sled. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_add_nat( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + // No physical sled? Don't push NAT. + let Some(sled_uuid) = sled_uuid else { + return Ok(None); + }; + + if !target_ip.do_saga { + return Ok(None); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + // Querying sleds requires fleet access; use the instance allocator context + // for this. + let (.., sled) = LookupPath::new(&osagactx.nexus().opctx_alloc, &datastore) + .sled_id(sled_uuid) + .fetch() + .await + .map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .instance_ensure_dpd_config( + &opctx, + authz_instance.id(), + &sled.address(), + Some(target_ip.id), + ) + .await + .and_then(|v| { + v.into_iter().next().map(Some).ok_or_else(|| { + Error::internal_error( + "NAT RPW failed to return concrete NAT entry", + ) + }) + }) + .map_err(ActionError::action_failed) +} + +/// Remove a single NAT entry from DPD, dropping packets bound for `target_ip`. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_remove_nat( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + // No physical sled? Don't push NAT. + if sled_uuid.is_none() { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + osagactx + .nexus() + .external_ip_delete_dpd_config(&opctx, &target_ip) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +/// Inform the OPTE port for a running instance that it should start +/// sending/receiving traffic on a given IP address. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_add_opte( + sagactx: &NexusActionContext, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // No physical sled? Don't inform OPTE. + let Some(sled_uuid) = sled_uuid else { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + let sled_agent_body = + target_ip.try_into().map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .sled_client(&sled_uuid) + .await + .map_err(|_| { + ActionError::action_failed(Error::unavail( + "sled agent client went away mid-attach/detach", + )) + })? + .instance_put_external_ip(&authz_instance.id(), &sled_agent_body) + .await + .map_err(|e| { + ActionError::action_failed(match e { + progenitor_client::Error::CommunicationError(_) => { + Error::unavail( + "sled agent client went away mid-attach/detach", + ) + } + e => Error::internal_error(&format!("{e}")), + }) + })?; + + Ok(()) +} + +/// Inform the OPTE port for a running instance that it should cease +/// sending/receiving traffic on a given IP address. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_remove_opte( + sagactx: &NexusActionContext, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // No physical sled? Don't inform OPTE. + let Some(sled_uuid) = sled_uuid else { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + let sled_agent_body = + target_ip.try_into().map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .sled_client(&sled_uuid) + .await + .map_err(|_| { + ActionError::action_failed(Error::unavail( + "sled agent client went away mid-attach/detach", + )) + })? + .instance_delete_external_ip(&authz_instance.id(), &sled_agent_body) + .await + .map_err(|e| { + ActionError::action_failed(match e { + progenitor_client::Error::CommunicationError(_) => { + Error::unavail( + "sled agent client went away mid-attach/detach", + ) + } + e => Error::internal_error(&format!("{e}")), + }) + })?; + + Ok(()) +} diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index c4c9c4e083..3aa491d978 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -10,7 +10,7 @@ use crate::app::{ MAX_NICS_PER_INSTANCE, }; use crate::external_api::params; -use nexus_db_model::NetworkInterfaceKind; +use nexus_db_model::{ExternalIp, NetworkInterfaceKind}; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::queries::network_interface::InsertError as InsertNicError; @@ -21,7 +21,9 @@ use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; +use omicron_common::api::external::NameOrId; use omicron_common::api::internal::shared::SwitchLocation; +use ref_cast::RefCast; use serde::Deserialize; use serde::Serialize; use slog::warn; @@ -223,7 +225,7 @@ impl NexusSaga for SagaInstanceCreate { SagaName::new(&format!("instance-create-external-ip{i}")); let mut subsaga_builder = DagBuilder::new(subsaga_name); subsaga_builder.append(Node::action( - "output", + format!("external-ip-{i}").as_str(), format!("CreateExternalIp{i}").as_str(), CREATE_EXTERNAL_IP.as_ref(), )); @@ -597,7 +599,7 @@ async fn sic_allocate_instance_snat_ip_undo( /// index `ip_index`, and return its ID if one is created (or None). async fn sic_allocate_instance_external_ip( sagactx: NexusActionContext, -) -> Result<(), ActionError> { +) -> Result, ActionError> { // XXX: may wish to restructure partially: we have at most one ephemeral // and then at most $n$ floating. let osagactx = sagactx.user_data(); @@ -607,7 +609,7 @@ async fn sic_allocate_instance_external_ip( let ip_index = repeat_saga_params.which; let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) else { - return Ok(()); + return Ok(None); }; let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -615,39 +617,80 @@ async fn sic_allocate_instance_external_ip( ); let instance_id = repeat_saga_params.instance_id; - match ip_params { + // We perform the 'complete_op' in this saga stage because our IPs are + // created in the attaching state, and we need to move them to attached. + // We *can* do so because the `creating` state will block the IP attach/detach + // sagas from running, so we can safely undo in event of later error in this saga + // without worrying they have been detached by another API call. + // Runtime state should never be able to make 'complete_op' fallible. + let ip = match ip_params { // Allocate a new IP address from the target, possibly default, pool - params::ExternalIpCreate::Ephemeral { ref pool_name } => { - let pool_name = - pool_name.as_ref().map(|name| db::model::Name(name.clone())); + params::ExternalIpCreate::Ephemeral { pool } => { + let pool = if let Some(name_or_id) = pool { + Some( + osagactx + .nexus() + .ip_pool_lookup(&opctx, name_or_id) + .map_err(ActionError::action_failed)? + .lookup_for(authz::Action::CreateChild) + .await + .map_err(ActionError::action_failed)? + .0, + ) + } else { + None + }; + let ip_id = repeat_saga_params.new_id; datastore .allocate_instance_ephemeral_ip( &opctx, ip_id, instance_id, - pool_name, + pool, + true, ) .await - .map_err(ActionError::action_failed)?; + .map_err(ActionError::action_failed)? + .0 } // Set the parent of an existing floating IP to the new instance's ID. - params::ExternalIpCreate::Floating { ref floating_ip_name } => { - let floating_ip_name = db::model::Name(floating_ip_name.clone()); - let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) - .project_id(saga_params.project_id) - .floating_ip_name(&floating_ip_name) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + params::ExternalIpCreate::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(saga_params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; datastore - .floating_ip_attach(&opctx, &authz_fip, &db_fip, instance_id) + .floating_ip_begin_attach(&opctx, &authz_fip, instance_id, true) .await - .map_err(ActionError::action_failed)?; + .map_err(ActionError::action_failed)? + .0 } - } - Ok(()) + }; + + // Ignore row count here, this is infallible with correct + // (state, state', kind) but may be zero on repeat call for + // idempotency. + _ = datastore + .external_ip_complete_op( + &opctx, + ip.id, + ip.kind, + nexus_db_model::IpAttachState::Attaching, + nexus_db_model::IpAttachState::Attached, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(Some(ip)) } async fn sic_allocate_instance_external_ip_undo( @@ -662,6 +705,16 @@ async fn sic_allocate_instance_external_ip_undo( &sagactx, &saga_params.serialized_authn, ); + + // We store and lookup `ExternalIp` so that we can detach + // and/or deallocate without double name resolution. + let new_ip = sagactx + .lookup::>(&format!("external-ip-{ip_index}"))?; + + let Some(ip) = new_ip else { + return Ok(()); + }; + let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) else { return Ok(()); @@ -669,18 +722,42 @@ async fn sic_allocate_instance_external_ip_undo( match ip_params { params::ExternalIpCreate::Ephemeral { .. } => { - let ip_id = repeat_saga_params.new_id; - datastore.deallocate_external_ip(&opctx, ip_id).await?; + datastore.deallocate_external_ip(&opctx, ip.id).await?; } - params::ExternalIpCreate::Floating { floating_ip_name } => { - let floating_ip_name = db::model::Name(floating_ip_name.clone()); - let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) - .project_id(saga_params.project_id) - .floating_ip_name(&floating_ip_name) - .fetch_for(authz::Action::Modify) + params::ExternalIpCreate::Floating { .. } => { + let (.., authz_fip) = LookupPath::new(&opctx, &datastore) + .floating_ip_id(ip.id) + .lookup_for(authz::Action::Modify) + .await?; + + datastore + .floating_ip_begin_detach( + &opctx, + &authz_fip, + repeat_saga_params.instance_id, + true, + ) .await?; - datastore.floating_ip_detach(&opctx, &authz_fip, &db_fip).await?; + let n_rows = datastore + .external_ip_complete_op( + &opctx, + ip.id, + ip.kind, + nexus_db_model::IpAttachState::Detaching, + nexus_db_model::IpAttachState::Detached, + ) + .await + .map_err(ActionError::action_failed)?; + + if n_rows != 1 { + error!( + osagactx.log(), + "sic_allocate_instance_external_ip_undo: failed to \ + completely detach ip {}", + ip.id + ); + } } } Ok(()) @@ -953,7 +1030,7 @@ pub mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, + pool: None, }], disks: vec![params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 013bececee..aaf5dcb033 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -240,7 +240,7 @@ mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, + pool: None, }], disks: vec![params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs new file mode 100644 index 0000000000..be7f81368e --- /dev/null +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -0,0 +1,583 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::instance_common::{ + instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, + instance_ip_move_state, instance_ip_remove_opte, ModifyStateForExternalIp, +}; +use super::{ActionRegistry, NexusActionContext, NexusSaga}; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use crate::external_api::params; +use nexus_db_model::{IpAttachState, Ipv4NatEntry}; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; +use omicron_common::api::external::{Error, NameOrId}; +use ref_cast::RefCast; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use uuid::Uuid; + +// The IP attach/detach sagas do some resource locking -- because we +// allow them to be called in [Running, Stopped], they must contend +// with each other/themselves, instance start, instance delete, and +// the instance stop action (noting the latter is not a saga). +// +// The main means of access control here is an external IP's `state`. +// Entering either saga begins with an atomic swap from Attached/Detached +// to Attaching/Detaching. This prevents concurrent attach/detach on the +// same EIP, and prevents instance start and migrate from completing with an +// Error::unavail via instance_ensure_registered and/or DPD. +// +// Overlap with stop is handled by treating comms failures with +// sled-agent as temporary errors and unwinding. For the delete case, we +// allow the detach completion to have a missing record -- both instance delete +// and detach will leave NAT in the correct state. For attach, if we make it +// to completion and an IP is `detached`, we unwind as a precaution. +// See `instance_common::instance_ip_get_instance_state` for more info. +// +// One more consequence of sled state being able to change beneath us +// is that the central undo actions (DPD/OPTE state) *must* be best-effort. +// This is not bad per-se: instance stop does not itself remove NAT routing +// rules. The only reason these should fail is because an instance has stopped, +// or DPD has died. + +declare_saga_actions! { + instance_ip_attach; + ATTACH_EXTERNAL_IP -> "target_ip" { + + siia_begin_attach_ip + - siia_begin_attach_ip_undo + } + + INSTANCE_STATE -> "instance_state" { + + siia_get_instance_state + } + + REGISTER_NAT -> "nat_entry" { + + siia_nat + - siia_nat_undo + } + + ENSURE_OPTE_PORT -> "no_result1" { + + siia_update_opte + - siia_update_opte_undo + } + + COMPLETE_ATTACH -> "output" { + + siia_complete_attach + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub create_params: params::ExternalIpCreate, + pub authz_instance: authz::Instance, + pub project_id: Uuid, + /// Authentication context to use to fetch the instance's current state from + /// the database. + pub serialized_authn: authn::saga::Serialized, +} + +async fn siia_begin_attach_ip( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + match ¶ms.create_params { + // Allocate a new IP address from the target, possibly default, pool + params::ExternalIpCreate::Ephemeral { pool } => { + let pool = if let Some(name_or_id) = pool { + Some( + osagactx + .nexus() + .ip_pool_lookup(&opctx, name_or_id) + .map_err(ActionError::action_failed)? + .lookup_for(authz::Action::CreateChild) + .await + .map_err(ActionError::action_failed)? + .0, + ) + } else { + None + }; + + datastore + .allocate_instance_ephemeral_ip( + &opctx, + Uuid::new_v4(), + params.authz_instance.id(), + pool, + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + // Set the parent of an existing floating IP to the new instance's ID. + params::ExternalIpCreate::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + datastore + .floating_ip_begin_attach( + &opctx, + &authz_fip, + params.authz_instance.id(), + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + } +} + +async fn siia_begin_attach_ip_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + warn!(log, "siia_begin_attach_ip_undo: Reverting detached->attaching"); + let params = sagactx.saga_params::()?; + let new_ip = sagactx.lookup::("target_ip")?; + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Attaching, + IpAttachState::Detached, + &new_ip, + ) + .await? + { + error!(log, "siia_begin_attach_ip_undo: external IP was deleted") + } + + Ok(()) +} + +async fn siia_get_instance_state( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + instance_ip_get_instance_state( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + "attach", + ) + .await +} + +// XXX: Need to abstract over v4 and v6 NAT entries when the time comes. +async fn siia_nat( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_add_nat( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await +} + +async fn siia_nat_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nat_entry = sagactx.lookup::>("nat_entry")?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let Some(nat_entry) = nat_entry else { + // Seeing `None` here means that we never pushed DPD state in + // the first instance. Nothing to undo. + return Ok(()); + }; + + // This requires some explanation in one case, where we can fail because an + // instance may have moved running -> stopped -> deleted. + // An instance delete will cause us to unwind and return to this stage *but* + // the ExternalIp will no longer have a useful parent (or even a + // different parent!). + // + // Internally, we delete the NAT entry *without* checking its instance state because + // it may either be `None`, or another instance may have attached. The + // first case is fine, but we need to consider NAT RPW semantics for the second: + // * The NAT entry table will ensure uniqueness on (external IP, low_port, + // high_port) for non-deleted rows. + // * Instance start and IP attach on a running instance will try to insert such + // a row, fail, and then delete this row before moving forwards. + // - Until either side deletes the row, we're polluting switch NAT. + // - We can't guarantee quick reuse to remove this rule via attach. + // - This will lead to a *new* NAT entry we need to protect, so we need to be careful + // that we only remove *our* incarnation. This is likelier to be hit + // if an ephemeral IP is deallocated, reallocated, and reused in a short timeframe. + // * Instance create will successfully set parent, since it won't attempt to ensure + // DPD has correct NAT state unless set to `start: true`. + // So it is safe/necessary to remove using the old entry here to target the + // exact row we created.. + + if let Err(e) = osagactx + .nexus() + .delete_dpd_config_by_entry(&opctx, &nat_entry) + .await + .map_err(ActionError::action_failed) + { + error!(log, "siia_nat_undo: failed to notify DPD: {e}"); + } + + Ok(()) +} + +async fn siia_update_opte( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_add_opte(&sagactx, ¶ms.authz_instance, sled_id, target_ip) + .await +} + +async fn siia_update_opte_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_remove_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siia_update_opte_undo: failed to notify sled-agent: {e}"); + } + Ok(()) +} + +async fn siia_complete_attach( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let target_ip = sagactx.lookup::("target_ip")?; + + // There is a clause in `external_ip_complete_op` which specifically + // causes an unwind here if the instance delete saga fires and an IP is either + // detached or deleted. + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Attaching, + IpAttachState::Attached, + &target_ip, + ) + .await? + { + warn!(log, "siia_complete_attach: call was idempotent") + } + + target_ip + .external_ip + .ok_or_else(|| { + Error::internal_error( + "must always have a defined external IP during instance attach", + ) + }) + .and_then(TryInto::try_into) + .map_err(ActionError::action_failed) +} + +#[derive(Debug)] +pub struct SagaInstanceIpAttach; +impl NexusSaga for SagaInstanceIpAttach { + const NAME: &'static str = "external-ip-attach"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + instance_ip_attach_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(attach_external_ip_action()); + builder.append(instance_state_action()); + builder.append(register_nat_action()); + builder.append(ensure_opte_port_action()); + builder.append(complete_attach_action()); + Ok(builder.build()?) + } +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use crate::app::{saga::create_saga_dag, sagas::test_helpers}; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; + use dropshot::test_util::ClientTestContext; + use nexus_db_model::{ExternalIp, IpKind}; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_floating_ip, create_instance, + create_project, + }; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{Name, SimpleIdentity}; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "cafe"; + const INSTANCE_NAME: &str = "menu"; + const FIP_NAME: &str = "affogato"; + + pub async fn ip_manip_test_setup(client: &ClientTestContext) -> Uuid { + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + create_floating_ip( + client, + FIP_NAME, + &project.identity.id.to_string(), + None, + None, + ) + .await; + + project.id() + } + + pub async fn new_test_params( + opctx: &OpContext, + datastore: &db::DataStore, + use_floating: bool, + ) -> Params { + let create_params = if use_floating { + params::ExternalIpCreate::Floating { + floating_ip: FIP_NAME.parse::().unwrap().into(), + } + } else { + params::ExternalIpCreate::Ephemeral { pool: None } + }; + + let (.., authz_project, authz_instance) = + LookupPath::new(opctx, datastore) + .project_name(&db::model::Name(PROJECT_NAME.parse().unwrap())) + .instance_name(&db::model::Name(INSTANCE_NAME.parse().unwrap())) + .lookup_for(authz::Action::Modify) + .await + .unwrap(); + + Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + project_id: authz_project.id(), + create_params, + authz_instance, + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + let sled_agent = &cptestctx.sled_agent.sled_agent; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + + let dag = create_saga_dag::(params).unwrap(); + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + nexus.run_saga(saga).await.expect("Attach saga should succeed"); + } + + let instance_id = instance.id(); + + // Sled agent has a record of the new external IPs. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.iter().any(|v| matches!( + v, + omicron_sled_agent::params::InstanceExternalIpBody::Floating(_) + ))); + assert!(my_eips.iter().any(|v| matches!( + v, + omicron_sled_agent::params::InstanceExternalIpBody::Ephemeral(_) + ))); + + // DB has records for SNAT plus the new IPs. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 3); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + instance_id: Uuid, + ) { + use nexus_db_queries::db::schema::external_ip::dsl; + + let sled_agent = &cptestctx.sled_agent.sled_agent; + let datastore = cptestctx.server.apictx().nexus.datastore(); + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + // No Floating IPs exist in states other than 'detached'. + assert!(dsl::external_ip + .filter(dsl::kind.eq(IpKind::Floating)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::state.ne(IpAttachState::Detached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // All ephemeral IPs are removed. + assert!(dsl::external_ip + .filter(dsl::kind.eq(IpKind::Ephemeral)) + .filter(dsl::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // No IP bindings remain on sled-agent. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.is_empty()); + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind::( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float) ), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind_idempotently::< + SagaInstanceIpAttach, + _, + _, + >( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float)), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let _instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + let dag = create_saga_dag::(params).unwrap(); + test_helpers::actions_succeed_idempotently(nexus, dag).await; + } + } +} diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs new file mode 100644 index 0000000000..da6c92077d --- /dev/null +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -0,0 +1,551 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::instance_common::{ + instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, + instance_ip_move_state, instance_ip_remove_nat, instance_ip_remove_opte, + ModifyStateForExternalIp, +}; +use super::{ActionRegistry, NexusActionContext, NexusSaga}; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use crate::external_api::params; +use nexus_db_model::IpAttachState; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; +use omicron_common::api::external::NameOrId; +use ref_cast::RefCast; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use uuid::Uuid; + +// This runs on similar logic to instance IP attach: see its head +// comment for an explanation of the structure wrt. other sagas. + +declare_saga_actions! { + instance_ip_detach; + DETACH_EXTERNAL_IP -> "target_ip" { + + siid_begin_detach_ip + - siid_begin_detach_ip_undo + } + + INSTANCE_STATE -> "instance_state" { + + siid_get_instance_state + } + + REMOVE_NAT -> "no_result0" { + + siid_nat + - siid_nat_undo + } + + REMOVE_OPTE_PORT -> "no_result1" { + + siid_update_opte + - siid_update_opte_undo + } + + COMPLETE_DETACH -> "output" { + + siid_complete_detach + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub delete_params: params::ExternalIpDetach, + pub authz_instance: authz::Instance, + pub project_id: Uuid, + /// Authentication context to use to fetch the instance's current state from + /// the database. + pub serialized_authn: authn::saga::Serialized, +} + +async fn siid_begin_detach_ip( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + match ¶ms.delete_params { + params::ExternalIpDetach::Ephemeral => { + let eip = datastore + .instance_lookup_ephemeral_ip( + &opctx, + params.authz_instance.id(), + ) + .await + .map_err(ActionError::action_failed)?; + + if let Some(eph_ip) = eip { + datastore + .begin_deallocate_ephemeral_ip( + &opctx, + eph_ip.id, + params.authz_instance.id(), + ) + .await + .map_err(ActionError::action_failed) + .map(|external_ip| ModifyStateForExternalIp { + do_saga: external_ip.is_some(), + external_ip, + }) + } else { + Ok(ModifyStateForExternalIp { + do_saga: false, + external_ip: None, + }) + } + } + params::ExternalIpDetach::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + datastore + .floating_ip_begin_detach( + &opctx, + &authz_fip, + params.authz_instance.id(), + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + } +} + +async fn siid_begin_detach_ip_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + warn!(log, "siid_begin_detach_ip_undo: Reverting attached->detaching"); + let params = sagactx.saga_params::()?; + let new_ip = sagactx.lookup::("target_ip")?; + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Detaching, + IpAttachState::Attached, + &new_ip, + ) + .await? + { + error!(log, "siid_begin_detach_ip_undo: external IP was deleted") + } + + Ok(()) +} + +async fn siid_get_instance_state( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + instance_ip_get_instance_state( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + "detach", + ) + .await +} + +async fn siid_nat(sagactx: NexusActionContext) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_remove_nat( + &sagactx, + ¶ms.serialized_authn, + sled_id, + target_ip, + ) + .await +} + +async fn siid_nat_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_add_nat( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siid_nat_undo: failed to notify DPD: {e}"); + } + + Ok(()) +} + +async fn siid_update_opte( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_remove_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await +} + +async fn siid_update_opte_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_add_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siid_update_opte_undo: failed to notify sled-agent: {e}"); + } + Ok(()) +} + +async fn siid_complete_detach( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let target_ip = sagactx.lookup::("target_ip")?; + + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Detaching, + IpAttachState::Detached, + &target_ip, + ) + .await? + { + warn!( + log, + "siid_complete_detach: external IP was deleted or call was idempotent" + ) + } + + target_ip + .external_ip + .map(TryInto::try_into) + .transpose() + .map_err(ActionError::action_failed) +} + +#[derive(Debug)] +pub struct SagaInstanceIpDetach; +impl NexusSaga for SagaInstanceIpDetach { + const NAME: &'static str = "external-ip-detach"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + instance_ip_detach_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(detach_external_ip_action()); + builder.append(instance_state_action()); + builder.append(remove_nat_action()); + builder.append(remove_opte_port_action()); + builder.append(complete_detach_action()); + Ok(builder.build()?) + } +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use crate::{ + app::{ + saga::create_saga_dag, + sagas::{ + instance_ip_attach::{self, test::ip_manip_test_setup}, + test_helpers, + }, + }, + Nexus, + }; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; + use nexus_db_model::{ExternalIp, IpKind}; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::create_instance; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{Name, SimpleIdentity}; + use std::sync::Arc; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "cafe"; + const INSTANCE_NAME: &str = "menu"; + const FIP_NAME: &str = "affogato"; + + async fn new_test_params( + opctx: &OpContext, + datastore: &db::DataStore, + use_floating: bool, + ) -> Params { + let delete_params = if use_floating { + params::ExternalIpDetach::Floating { + floating_ip: FIP_NAME.parse::().unwrap().into(), + } + } else { + params::ExternalIpDetach::Ephemeral + }; + + let (.., authz_project, authz_instance) = + LookupPath::new(opctx, datastore) + .project_name(&db::model::Name(PROJECT_NAME.parse().unwrap())) + .instance_name(&db::model::Name(INSTANCE_NAME.parse().unwrap())) + .lookup_for(authz::Action::Modify) + .await + .unwrap(); + + Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + project_id: authz_project.id(), + delete_params, + authz_instance, + } + } + + async fn attach_instance_ips(nexus: &Arc, opctx: &OpContext) { + let datastore = &nexus.db_datastore; + + let proj_name = db::model::Name(PROJECT_NAME.parse().unwrap()); + let inst_name = db::model::Name(INSTANCE_NAME.parse().unwrap()); + let lookup = LookupPath::new(opctx, datastore) + .project_name(&proj_name) + .instance_name(&inst_name); + + for use_float in [false, true] { + let params = instance_ip_attach::test::new_test_params( + opctx, datastore, use_float, + ) + .await; + nexus + .instance_attach_external_ip( + opctx, + &lookup, + ¶ms.create_params, + ) + .await + .unwrap(); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + let sled_agent = &cptestctx.sled_agent.sled_agent; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _ = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + + let dag = create_saga_dag::(params).unwrap(); + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + nexus.run_saga(saga).await.expect("Detach saga should succeed"); + } + + let instance_id = instance.id(); + + // Sled agent has removed its records of the external IPs. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.is_empty()); + + // DB only has record for SNAT. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 1); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + instance_id: Uuid, + ) { + use nexus_db_queries::db::schema::external_ip::dsl; + + let opctx = test_helpers::test_opctx(cptestctx); + let sled_agent = &cptestctx.sled_agent.sled_agent; + let datastore = cptestctx.server.apictx().nexus.datastore(); + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + // No IPs in transitional states w/ current instance. + assert!(dsl::external_ip + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::state.ne(IpAttachState::Attached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // No external IPs in detached state. + assert!(dsl::external_ip + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(IpAttachState::Detached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // Instance still has one Ephemeral IP, and one Floating IP. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 3); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + + // No IP bindings remain on sled-agent. + let eips = &*sled_agent.external_ips.lock().await; + for (_nic_id, eip_set) in eips { + assert_eq!(eip_set.len(), 2); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind::( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float) ), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind_idempotently::< + SagaInstanceIpDetach, + _, + _, + >( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float)), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let _instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + let dag = create_saga_dag::(params).unwrap(); + test_helpers::actions_succeed_idempotently(nexus, dag).await; + } + } +} diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 9d12bd8031..92c927e1ce 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -405,35 +405,12 @@ async fn sis_dpd_ensure( .await .map_err(ActionError::action_failed)?; - // Querying boundary switches also requires fleet access and the use of the - // instance allocator context. - let boundary_switches = osagactx + osagactx .nexus() - .boundary_switches(&osagactx.nexus().opctx_alloc) + .instance_ensure_dpd_config(&opctx, instance_id, &sled.address(), None) .await .map_err(ActionError::action_failed)?; - for switch in boundary_switches { - let dpd_client = - osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find client for switch {switch}" - ))) - })?; - - osagactx - .nexus() - .instance_ensure_dpd_config( - &opctx, - instance_id, - &sled.address(), - None, - dpd_client, - ) - .await - .map_err(ActionError::action_failed)?; - } - Ok(()) } diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index c5918d32ef..1bd85ecf32 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -26,6 +26,8 @@ pub mod image_delete; mod instance_common; pub mod instance_create; pub mod instance_delete; +pub mod instance_ip_attach; +pub mod instance_ip_detach; pub mod instance_migrate; pub mod instance_start; pub mod loopback_address_create; @@ -130,6 +132,12 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); ::register_actions( &mut registry, ); diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 65b03a9fdf..a6cb9e80fe 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -142,6 +142,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(floating_ip_create)?; api.register(floating_ip_view)?; api.register(floating_ip_delete)?; + api.register(floating_ip_attach)?; + api.register(floating_ip_detach)?; api.register(disk_list)?; api.register(disk_create)?; @@ -200,6 +202,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(instance_network_interface_delete)?; api.register(instance_external_ip_list)?; + api.register(instance_ephemeral_ip_attach)?; + api.register(instance_ephemeral_ip_detach)?; api.register(vpc_router_list)?; api.register(vpc_router_view)?; @@ -1976,6 +1980,69 @@ async fn floating_ip_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Attach a floating IP to an instance or other resource +#[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/attach", + tags = ["floating-ips"], +}] +async fn floating_ip_attach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, + target: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let ip = nexus + .floating_ip_attach( + &opctx, + floating_ip_selector, + target.into_inner(), + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Detach a floating IP from an instance or other resource +#[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/detach", + tags = ["floating-ips"], +}] +async fn floating_ip_detach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let fip_lookup = + nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; + let ip = nexus.floating_ip_detach(&opctx, fip_lookup).await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Disks /// List disks @@ -3884,6 +3951,79 @@ async fn instance_external_ip_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Allocate and attach an ephemeral IP to an instance +#[endpoint { + method = POST, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], +}] +async fn instance_ephemeral_ip_attach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, + ip_to_create: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let ip = nexus + .instance_attach_external_ip( + &opctx, + &instance_lookup, + ¶ms::ExternalIpCreate::Ephemeral { + pool: ip_to_create.into_inner().pool, + }, + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Detach and deallocate an ephemeral IP from an instance +#[endpoint { + method = DELETE, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], +}] +async fn instance_ephemeral_ip_detach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + nexus + .instance_detach_external_ip( + &opctx, + &instance_lookup, + ¶ms::ExternalIpDetach::Ephemeral, + ) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Snapshots /// List snapshots diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 4fe03f204c..d82a934686 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -492,6 +492,7 @@ pub async fn create_instance( Vec::::new(), // External IPs= Vec::::new(), + true, ) .await } @@ -504,6 +505,7 @@ pub async fn create_instance_with( nics: ¶ms::InstanceNetworkInterfaceAttachment, disks: Vec, external_ips: Vec, + start: bool, ) -> Instance { let url = format!("/v1/instances?project={}", project_name); object_create( @@ -523,7 +525,7 @@ pub async fn create_instance_with( network_interfaces: nics.clone(), external_ips, disks, - start: true, + start, }, ) .await diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index b9023a8212..379042c849 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1747,6 +1747,7 @@ async fn create_instance_with_disk(client: &ClientTestContext) { params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, )], Vec::::new(), + true, ) .await; } diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 8beffe43a5..4f606f2bff 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -390,6 +390,12 @@ pub static DEMO_INSTANCE_DISKS_DETACH_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); +pub static DEMO_INSTANCE_EPHEMERAL_IP_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/external-ips/ephemeral?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); pub static DEMO_INSTANCE_NICS_URL: Lazy = Lazy::new(|| { format!( "/v1/network-interfaces?project={}&instance={}", @@ -414,7 +420,7 @@ pub static DEMO_INSTANCE_CREATE: Lazy = user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some(DEMO_IP_POOL_NAME.clone()), + pool: Some(DEMO_IP_POOL_NAME.clone().into()), }], disks: vec![], start: true, @@ -720,6 +726,19 @@ pub static DEMO_FLOAT_IP_URL: Lazy = Lazy::new(|| { ) }); +pub static DEMO_FLOATING_IP_ATTACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/floating-ips/{}/attach?{}", + *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_FLOATING_IP_DETACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/floating-ips/{}/detach?{}", + *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_SELECTOR + ) +}); + pub static DEMO_FLOAT_IP_CREATE: Lazy = Lazy::new(|| params::FloatingIpCreate { identity: IdentityMetadataCreateParams { @@ -730,6 +749,13 @@ pub static DEMO_FLOAT_IP_CREATE: Lazy = pool: None, }); +pub static DEMO_FLOAT_IP_ATTACH: Lazy = + Lazy::new(|| params::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: DEMO_FLOAT_IP_NAME.clone().into(), + }); +pub static DEMO_EPHEMERAL_IP_ATTACH: Lazy = + Lazy::new(|| params::EphemeralIpCreate { pool: None }); // Identity providers pub const IDENTITY_PROVIDERS_URL: &'static str = "/v1/system/identity-providers?silo=demo-silo"; @@ -1767,6 +1793,18 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { allowed_methods: vec![AllowedMethod::Get], }, + VerifyEndpoint { + url: &DEMO_INSTANCE_EPHEMERAL_IP_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_EPHEMERAL_IP_ATTACH).unwrap() + ), + AllowedMethod::Delete, + ], + }, + /* IAM */ VerifyEndpoint { @@ -2240,5 +2278,27 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Delete, ], }, + + VerifyEndpoint { + url: &DEMO_FLOATING_IP_ATTACH_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_FLOAT_IP_ATTACH).unwrap(), + ), + ], + }, + + VerifyEndpoint { + url: &DEMO_FLOATING_IP_DETACH_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&()).unwrap(), + ), + ], + }, ] }); diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index 3b6127ceb1..57f813d505 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -7,6 +7,7 @@ use std::net::IpAddr; use std::net::Ipv4Addr; +use crate::integration_tests::instances::fetch_instance_external_ips; use crate::integration_tests::instances::instance_simulate; use dropshot::test_util::ClientTestContext; use dropshot::HttpErrorResponseBody; @@ -30,12 +31,14 @@ use nexus_test_utils::resource_helpers::object_delete_error; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::shared; +use nexus_types::external_api::views; use nexus_types::external_api::views::FloatingIp; use nexus_types::identity::Resource; use omicron_common::address::IpRange; use omicron_common::address::Ipv4Range; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; +use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use uuid::Uuid; @@ -47,10 +50,33 @@ const PROJECT_NAME: &str = "rootbeer-float"; const FIP_NAMES: &[&str] = &["vanilla", "chocolate", "strawberry", "pistachio", "caramel"]; +const INSTANCE_NAMES: &[&str] = &["anonymous-diner", "anonymous-restaurant"]; + pub fn get_floating_ips_url(project_name: &str) -> String { format!("/v1/floating-ips?project={project_name}") } +pub fn instance_ephemeral_ip_url( + instance_name: &str, + project_name: &str, +) -> String { + format!("/v1/instances/{instance_name}/external-ips/ephemeral?project={project_name}") +} + +pub fn attach_floating_ip_url( + floating_ip_name: &str, + project_name: &str, +) -> String { + format!("/v1/floating-ips/{floating_ip_name}/attach?project={project_name}") +} + +pub fn detach_floating_ip_url( + floating_ip_name: &str, + project_name: &str, +) -> String { + format!("/v1/floating-ips/{floating_ip_name}/detach?project={project_name}") +} + pub fn get_floating_ip_by_name_url( fip_name: &str, project_name: &str, @@ -392,7 +418,9 @@ async fn test_floating_ip_delete(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { +async fn test_floating_ip_create_attachment( + cptestctx: &ControlPlaneTestContext, +) { let client = &cptestctx.external_client; let apictx = &cptestctx.server.apictx(); let nexus = &apictx.nexus; @@ -410,16 +438,13 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { .await; // Bind the floating IP to an instance at create time. - let instance_name = "anonymous-diner"; - let instance = create_instance_with( - &client, - PROJECT_NAME, + let instance_name = INSTANCE_NAMES[0]; + let instance = instance_for_external_ips( + client, instance_name, - ¶ms::InstanceNetworkInterfaceAttachment::Default, - vec![], - vec![params::ExternalIpCreate::Floating { - floating_ip_name: FIP_NAMES[0].parse().unwrap(), - }], + true, + false, + &FIP_NAMES[..1], ) .await; @@ -430,20 +455,12 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { assert_eq!(fetched_fip.instance_id, Some(instance.identity.id)); // Try to delete the floating IP, which should fail. - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new( - client, - Method::DELETE, - &get_floating_ip_by_id_url(&fip.identity.id), - ) - .expect_status(Some(StatusCode::BAD_REQUEST)), + let error = object_delete_error( + client, + &get_floating_ip_by_id_url(&fip.identity.id), + StatusCode::BAD_REQUEST, ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + .await; assert_eq!( error.message, format!("Floating IP cannot be deleted while attached to an instance"), @@ -497,6 +514,340 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { .unwrap(); } +#[nexus_test] +async fn test_external_ip_live_attach_detach( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 2 instances, and a floating IP for each instance. + // One instance will be started, and one will be stopped. + let mut fips = vec![]; + for i in 0..2 { + fips.push( + create_floating_ip( + client, + FIP_NAMES[i], + project.identity.name.as_str(), + None, + None, + ) + .await, + ); + } + + let mut instances = vec![]; + for (i, start) in [false, true].iter().enumerate() { + let instance = instance_for_external_ips( + client, + INSTANCE_NAMES[i], + *start, + false, + &[], + ) + .await; + + if *start { + instance_simulate(nexus, &instance.identity.id).await; + instance_simulate(nexus, &instance.identity.id).await; + } + + // Verify that each instance has no external IPs. + assert_eq!( + fetch_instance_external_ips( + client, + INSTANCE_NAMES[i], + PROJECT_NAME + ) + .await + .len(), + 0 + ); + + instances.push(instance); + } + + // Attach a floating IP and ephemeral IP to each instance. + let mut recorded_ephs = vec![]; + for (instance, fip) in instances.iter().zip(&fips) { + let instance_name = instance.identity.name.as_str(); + let eph_resp = ephemeral_ip_attach(client, instance_name, None).await; + let fip_resp = floating_ip_attach( + client, + instance_name, + fip.identity.name.as_str(), + ) + .await; + + // Verify both appear correctly. + // This implicitly checks FIP parent_id matches the instance, + // and state has fully moved into 'Attached'. + let eip_list = + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) + .await; + + assert_eq!(eip_list.len(), 2); + assert!(eip_list.contains(&eph_resp)); + assert!(eip_list + .iter() + .any(|v| matches!(v, views::ExternalIp::Floating(..)) + && v.ip() == fip_resp.ip)); + assert_eq!(fip.ip, fip_resp.ip); + + // Check for idempotency: repeat requests should return same values. + let eph_resp_2 = ephemeral_ip_attach(client, instance_name, None).await; + let fip_resp_2 = floating_ip_attach( + client, + instance_name, + fip.identity.name.as_str(), + ) + .await; + + assert_eq!(eph_resp, eph_resp_2); + assert_eq!(fip_resp.ip, fip_resp_2.ip); + + recorded_ephs.push(eph_resp); + } + + // Detach a floating IP and ephemeral IP from each instance. + for (instance, fip) in instances.iter().zip(&fips) { + let instance_name = instance.identity.name.as_str(); + ephemeral_ip_detach(client, instance_name).await; + let fip_resp = + floating_ip_detach(client, fip.identity.name.as_str()).await; + + // Verify both are removed, and that their bodies match the known FIP/EIP combo. + let eip_list = + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) + .await; + + assert_eq!(eip_list.len(), 0); + assert_eq!(fip.ip, fip_resp.ip); + + // Check for idempotency: repeat requests should return same values for FIP, + // but in ephemeral case there is no currently known IP so we return an error. + let fip_resp_2 = + floating_ip_detach(client, fip.identity.name.as_str()).await; + assert_eq!(fip_resp.ip, fip_resp_2.ip); + + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + let error = + object_delete_error(client, &url, StatusCode::BAD_REQUEST).await; + assert_eq!( + error.message, + "instance does not have an ephemeral IP attached".to_string() + ); + } +} + +#[nexus_test] +async fn test_external_ip_attach_detach_fail_if_in_use_by_other( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 2 instances, bind a FIP to each. + let mut instances = vec![]; + let mut fips = vec![]; + for i in 0..2 { + let fip = create_floating_ip( + client, + FIP_NAMES[i], + project.identity.name.as_str(), + None, + None, + ) + .await; + let instance = instance_for_external_ips( + client, + INSTANCE_NAMES[i], + true, + false, + &[FIP_NAMES[i]], + ) + .await; + + instance_simulate(nexus, &instance.identity.id).await; + instance_simulate(nexus, &instance.identity.id).await; + + instances.push(instance); + fips.push(fip); + } + + // Attach in-use FIP to *other* instance should fail. + let url = + attach_floating_ip_url(fips[1].identity.name.as_str(), PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: INSTANCE_NAMES[0].parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "floating IP cannot be attached to one instance while still attached to another".to_string()); +} + +#[nexus_test] +async fn test_external_ip_attach_fails_after_maximum( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 33 floating IPs, and bind the first 32 to an instance. + let mut fip_names = vec![]; + for i in 0..33 { + let fip_name = format!("fip-{i}"); + create_floating_ip( + client, + &fip_name, + project.identity.name.as_str(), + None, + None, + ) + .await; + fip_names.push(fip_name); + } + + let fip_name_slice = + fip_names.iter().map(String::as_str).collect::>(); + let instance_name = INSTANCE_NAMES[0]; + instance_for_external_ips( + client, + instance_name, + true, + false, + &fip_name_slice[..32], + ) + .await; + + // Attempt to attach the final FIP should fail. + let url = attach_floating_ip_url(fip_name_slice[32], PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: instance_name.parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "an instance may not have more than 32 external IP addresses" + .to_string() + ); + + // Attempt to attach an ephemeral IP should fail. + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::EphemeralIpCreate { pool: None })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "an instance may not have more than 32 external IP addresses" + .to_string() + ); +} + +#[nexus_test] +async fn test_external_ip_attach_ephemeral_at_pool_exhaustion( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + let other_pool_range = IpRange::V4( + Ipv4Range::new(Ipv4Addr::new(10, 1, 0, 1), Ipv4Addr::new(10, 1, 0, 1)) + .unwrap(), + ); + create_ip_pool(&client, "other-pool", Some(other_pool_range)).await; + let silo_id = DEFAULT_SILO.id(); + link_ip_pool(&client, "other-pool", &silo_id, false).await; + + create_project(client, PROJECT_NAME).await; + + // Create two instances, to which we will later add eph IPs from 'other-pool'. + for name in &INSTANCE_NAMES[..2] { + instance_for_external_ips(client, name, false, false, &[]).await; + } + + let pool_name: Name = "other-pool".parse().unwrap(); + + // Attach a new EIP from other-pool to both instances. + // This should succeed for the first, and fail for the second + // due to pool exhaustion. + let eph_resp = ephemeral_ip_attach( + client, + INSTANCE_NAMES[0], + Some(pool_name.as_str()), + ) + .await; + assert_eq!(eph_resp.ip(), other_pool_range.first_address()); + assert_eq!(eph_resp.ip(), other_pool_range.last_address()); + + let url = instance_ephemeral_ip_url(INSTANCE_NAMES[1], PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::ExternalIpCreate::Ephemeral { + pool: Some(pool_name.clone().into()), + })) + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "Insufficient capacity: No external IP addresses available".to_string() + ); + + // Idempotent re-add to the first instance should succeed even if + // an internal attempt to alloc a new EIP would fail. + let eph_resp_2 = ephemeral_ip_attach( + client, + INSTANCE_NAMES[0], + Some(pool_name.as_str()), + ) + .await; + assert_eq!(eph_resp_2, eph_resp); +} + pub async fn floating_ip_get( client: &ClientTestContext, fip_url: &str, @@ -521,3 +872,96 @@ async fn floating_ip_get_as( panic!("failed to make \"get\" request to {fip_url}: {e}") }) } + +async fn instance_for_external_ips( + client: &ClientTestContext, + instance_name: &str, + start: bool, + use_ephemeral_ip: bool, + floating_ip_names: &[&str], +) -> Instance { + let mut fips: Vec<_> = floating_ip_names + .iter() + .map(|s| params::ExternalIpCreate::Floating { + floating_ip: s.parse::().unwrap().into(), + }) + .collect(); + if use_ephemeral_ip { + fips.push(params::ExternalIpCreate::Ephemeral { pool: None }) + } + create_instance_with( + &client, + PROJECT_NAME, + instance_name, + ¶ms::InstanceNetworkInterfaceAttachment::Default, + vec![], + fips, + start, + ) + .await +} + +async fn ephemeral_ip_attach( + client: &ClientTestContext, + instance_name: &str, + pool_name: Option<&str>, +) -> views::ExternalIp { + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::EphemeralIpCreate { + pool: pool_name.map(|v| v.parse::().unwrap().into()), + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + +async fn ephemeral_ip_detach(client: &ClientTestContext, instance_name: &str) { + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + object_delete(client, &url).await; +} + +async fn floating_ip_attach( + client: &ClientTestContext, + instance_name: &str, + floating_ip_name: &str, +) -> views::FloatingIp { + let url = attach_floating_ip_url(floating_ip_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: instance_name.parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + +async fn floating_ip_detach( + client: &ClientTestContext, + floating_ip_name: &str, +) -> views::FloatingIp { + let url = detach_floating_ip_url(floating_ip_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 2f4e913185..8d97df6cda 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -649,6 +649,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { ¶ms::InstanceNetworkInterfaceAttachment::Default, Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -752,6 +753,7 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { // located with their instances. Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -1104,6 +1106,7 @@ async fn test_instance_metrics_with_migration( ¶ms::InstanceNetworkInterfaceAttachment::Default, Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -3644,7 +3647,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( let ip = fetch_instance_ephemeral_ip(client, "pool1-inst").await; assert!( - ip.ip >= range1.first_address() && ip.ip <= range1.last_address(), + ip.ip() >= range1.first_address() && ip.ip() <= range1.last_address(), "Expected ephemeral IP to come from pool1" ); @@ -3652,7 +3655,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( create_instance_with_pool(client, "pool2-inst", Some("pool2")).await; let ip = fetch_instance_ephemeral_ip(client, "pool2-inst").await; assert!( - ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + ip.ip() >= range2.first_address() && ip.ip() <= range2.last_address(), "Expected ephemeral IP to come from pool2" ); @@ -3667,7 +3670,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( create_instance_with_pool(client, "pool2-inst2", None).await; let ip = fetch_instance_ephemeral_ip(client, "pool2-inst2").await; assert!( - ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + ip.ip() >= range2.first_address() && ip.ip() <= range2.last_address(), "Expected ephemeral IP to come from pool2" ); @@ -3705,7 +3708,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("pool1".parse().unwrap()), + pool: Some("pool1".parse::().unwrap().into()), }], disks: vec![], start: true, @@ -3769,7 +3772,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("orphan-pool".parse().unwrap()), + pool: Some("orphan-pool".parse::().unwrap().into()), }], disks: vec![], start: true, @@ -3829,7 +3832,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, // <--- the only important thing here + pool: None, // <--- the only important thing here }], disks: vec![], start: true, @@ -3845,7 +3848,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( // same deal if you specify a pool that doesn't exist let body = params::InstanceCreate { external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("nonexistent-pool".parse().unwrap()), + pool: Some("nonexistent-pool".parse::().unwrap().into()), }], ..body }; @@ -3879,7 +3882,7 @@ async fn test_instance_attach_several_external_ips( // Create several floating IPs for the instance, totalling 8 IPs. let mut external_ip_create = - vec![params::ExternalIpCreate::Ephemeral { pool_name: None }]; + vec![params::ExternalIpCreate::Ephemeral { pool: None }]; let mut fips = vec![]; for i in 1..8 { let name = format!("fip-{i}"); @@ -3887,7 +3890,7 @@ async fn test_instance_attach_several_external_ips( create_floating_ip(&client, &name, PROJECT_NAME, None, None).await, ); external_ip_create.push(params::ExternalIpCreate::Floating { - floating_ip_name: name.parse().unwrap(), + floating_ip: name.parse::().unwrap().into(), }); } @@ -3900,30 +3903,31 @@ async fn test_instance_attach_several_external_ips( ¶ms::InstanceNetworkInterfaceAttachment::Default, vec![], external_ip_create, + true, ) .await; // Verify that all external IPs are visible on the instance and have // been allocated in order. let external_ips = - fetch_instance_external_ips(&client, instance_name).await; + fetch_instance_external_ips(&client, instance_name, PROJECT_NAME).await; assert_eq!(external_ips.len(), 8); eprintln!("{external_ips:?}"); for (i, eip) in external_ips .iter() - .sorted_unstable_by(|a, b| a.ip.cmp(&b.ip)) + .sorted_unstable_by(|a, b| a.ip().cmp(&b.ip())) .enumerate() { let last_octet = i + if i != external_ips.len() - 1 { - assert_eq!(eip.kind, IpKind::Floating); + assert_eq!(eip.kind(), IpKind::Floating); 1 } else { // SNAT will occupy 1.0.0.8 here, since it it alloc'd before // the ephemeral. - assert_eq!(eip.kind, IpKind::Ephemeral); + assert_eq!(eip.kind(), IpKind::Ephemeral); 2 }; - assert_eq!(eip.ip, Ipv4Addr::new(10, 0, 0, last_octet as u8)); + assert_eq!(eip.ip(), Ipv4Addr::new(10, 0, 0, last_octet as u8)); } // Verify that all floating IPs are bound to their parent instance. @@ -3948,7 +3952,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( // don't need any IP pools because request fails at parse time let ephemeral_create = params::ExternalIpCreate::Ephemeral { - pool_name: Some("default".parse().unwrap()), + pool: Some("default".parse::().unwrap().into()), }; let create_params = params::InstanceCreate { identity: IdentityMetadataCreateParams { @@ -3992,19 +3996,20 @@ async fn create_instance_with_pool( ¶ms::InstanceNetworkInterfaceAttachment::Default, vec![], vec![params::ExternalIpCreate::Ephemeral { - pool_name: pool_name.map(|name| name.parse().unwrap()), + pool: pool_name.map(|name| name.parse::().unwrap().into()), }], + true, ) .await } -async fn fetch_instance_external_ips( +pub async fn fetch_instance_external_ips( client: &ClientTestContext, instance_name: &str, + project_name: &str, ) -> Vec { let ips_url = format!( - "/v1/instances/{}/external-ips?project={}", - instance_name, PROJECT_NAME + "/v1/instances/{instance_name}/external-ips?project={project_name}", ); let ips = NexusRequest::object_get(client, &ips_url) .authn_as(AuthnMode::PrivilegedUser) @@ -4020,10 +4025,10 @@ async fn fetch_instance_ephemeral_ip( client: &ClientTestContext, instance_name: &str, ) -> views::ExternalIp { - fetch_instance_external_ips(client, instance_name) + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) .await .into_iter() - .find(|v| v.kind == IpKind::Ephemeral) + .find(|v| v.kind() == IpKind::Ephemeral) .unwrap() } @@ -4087,7 +4092,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some(Name::try_from(String::from("default")).unwrap()), + pool: Some("default".parse::().unwrap().into()), }], disks: vec![], start: true, diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 91a933754c..e36b213f7e 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -143,6 +143,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) { Vec::::new(), // External IPs= Vec::::new(), + true, ) .await; } diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index bd79a9c3e9..8bd2f34de5 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -12,8 +12,10 @@ disk_view GET /v1/disks/{disk} API operations found with tag "floating-ips" OPERATION ID METHOD URL PATH +floating_ip_attach POST /v1/floating-ips/{floating_ip}/attach floating_ip_create POST /v1/floating-ips floating_ip_delete DELETE /v1/floating-ips/{floating_ip} +floating_ip_detach POST /v1/floating-ips/{floating_ip}/detach floating_ip_list GET /v1/floating-ips floating_ip_view GET /v1/floating-ips/{floating_ip} @@ -40,6 +42,8 @@ instance_delete DELETE /v1/instances/{instance} instance_disk_attach POST /v1/instances/{instance}/disks/attach instance_disk_detach POST /v1/instances/{instance}/disks/detach instance_disk_list GET /v1/instances/{instance}/disks +instance_ephemeral_ip_attach POST /v1/instances/{instance}/external-ips/ephemeral +instance_ephemeral_ip_detach DELETE /v1/instances/{instance}/external-ips/ephemeral instance_external_ip_list GET /v1/instances/{instance}/external-ips instance_list GET /v1/instances instance_migrate POST /v1/instances/{instance}/migrate diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 750e83c2a2..62c8224461 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -71,7 +71,7 @@ path_param!(VpcPath, vpc, "VPC"); path_param!(SubnetPath, subnet, "subnet"); path_param!(RouterPath, router, "router"); path_param!(RoutePath, route, "route"); -path_param!(FloatingIpPath, floating_ip, "Floating IP"); +path_param!(FloatingIpPath, floating_ip, "floating IP"); path_param!(DiskPath, disk, "disk"); path_param!(SnapshotPath, snapshot, "snapshot"); path_param!(ImagePath, image, "image"); @@ -890,6 +890,23 @@ pub struct FloatingIpCreate { pub pool: Option, } +/// The type of resource that a floating IP is attached to +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum FloatingIpParentKind { + Instance, +} + +/// Parameters for attaching a floating IP address to another resource +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct FloatingIpAttach { + /// Name or ID of the resource that this IP address should be attached to + pub parent: NameOrId, + + /// The type of `parent`'s resource + pub kind: FloatingIpParentKind, +} + // INSTANCES /// Describes an attachment of an `InstanceNetworkInterface` to an `Instance`, @@ -954,14 +971,30 @@ pub struct InstanceDiskAttach { #[serde(tag = "type", rename_all = "snake_case")] pub enum ExternalIpCreate { /// An IP address providing both inbound and outbound access. The address is - /// automatically-assigned from the provided IP Pool, or all available pools - /// if not specified. - Ephemeral { pool_name: Option }, + /// automatically-assigned from the provided IP Pool, or the current silo's + /// default pool if not specified. + Ephemeral { pool: Option }, /// An IP address providing both inbound and outbound access. The address is - /// an existing Floating IP object assigned to the current project. + /// an existing floating IP object assigned to the current project. /// /// The floating IP must not be in use by another instance or service. - Floating { floating_ip_name: Name }, + Floating { floating_ip: NameOrId }, +} + +/// Parameters for creating an ephemeral IP address for an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub struct EphemeralIpCreate { + /// Name or ID of the IP pool used to allocate an address + pub pool: Option, +} + +/// Parameters for detaching an external IP from an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ExternalIpDetach { + Ephemeral, + Floating { floating_ip: NameOrId }, } /// Create-time parameters for an `Instance` diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index a4c5ae1e62..f6b4db18a3 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -221,7 +221,9 @@ pub enum ServiceUsingCertificate { } /// The kind of an external IP address for an instance -#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema, PartialEq)] +#[derive( + Debug, Clone, Copy, Deserialize, Eq, Serialize, JsonSchema, PartialEq, +)] #[serde(rename_all = "snake_case")] pub enum IpKind { Ephemeral, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 314dd4ed00..5e31be7af8 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -12,8 +12,8 @@ use api_identity::ObjectIdentity; use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::{ - ByteCount, Digest, IdentityMetadata, InstanceState, Ipv4Net, Ipv6Net, Name, - ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, + ByteCount, Digest, Error, IdentityMetadata, InstanceState, Ipv4Net, + Ipv6Net, Name, ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -337,16 +337,34 @@ pub struct IpPoolRange { // INSTANCE EXTERNAL IP ADDRESSES -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct ExternalIp { - pub ip: IpAddr, - pub kind: IpKind, +#[derive(Debug, Clone, Deserialize, PartialEq, Serialize, JsonSchema)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ExternalIp { + Ephemeral { ip: IpAddr }, + Floating(FloatingIp), +} + +impl ExternalIp { + pub fn ip(&self) -> IpAddr { + match self { + Self::Ephemeral { ip } => *ip, + Self::Floating(float) => float.ip, + } + } + + pub fn kind(&self) -> IpKind { + match self { + Self::Ephemeral { .. } => IpKind::Ephemeral, + Self::Floating(_) => IpKind::Floating, + } + } } /// A Floating IP is a well-known IP address which can be attached /// and detached from instances. -#[derive(ObjectIdentity, Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[derive( + ObjectIdentity, Debug, PartialEq, Clone, Deserialize, Serialize, JsonSchema, +)] #[serde(rename_all = "snake_case")] pub struct FloatingIp { #[serde(flatten)] @@ -360,6 +378,25 @@ pub struct FloatingIp { pub instance_id: Option, } +impl From for ExternalIp { + fn from(value: FloatingIp) -> Self { + ExternalIp::Floating(value) + } +} + +impl TryFrom for FloatingIp { + type Error = Error; + + fn try_from(value: ExternalIp) -> Result { + match value { + ExternalIp::Ephemeral { .. } => Err(Error::internal_error( + "tried to convert an ephemeral IP into a floating IP", + )), + ExternalIp::Floating(v) => Ok(v), + } + } +} + // RACKS /// View of an Rack diff --git a/openapi/nexus.json b/openapi/nexus.json index 2dd4037430..59206ed010 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -930,7 +930,7 @@ { "in": "path", "name": "floating_ip", - "description": "Name or ID of the Floating IP", + "description": "Name or ID of the floating IP", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -974,7 +974,7 @@ { "in": "path", "name": "floating_ip", - "description": "Name or ID of the Floating IP", + "description": "Name or ID of the floating IP", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -1002,6 +1002,108 @@ } } }, + "/v1/floating-ips/{floating_ip}/attach": { + "post": { + "tags": [ + "floating-ips" + ], + "summary": "Attach a floating IP to an instance or other resource", + "operationId": "floating_ip_attach", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIpAttach" + } + } + }, + "required": true + }, + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/floating-ips/{floating_ip}/detach": { + "post": { + "tags": [ + "floating-ips" + ], + "summary": "Detach a floating IP from an instance or other resource", + "operationId": "floating_ip_detach", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/groups": { "get": { "tags": [ @@ -1826,6 +1928,99 @@ } } }, + "/v1/instances/{instance}/external-ips/ephemeral": { + "post": { + "tags": [ + "instances" + ], + "summary": "Allocate and attach an ephemeral IP to an instance", + "operationId": "instance_ephemeral_ip_attach", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EphemeralIpCreate" + } + } + }, + "required": true + }, + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ExternalIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "instances" + ], + "summary": "Detach and deallocate an ephemeral IP from an instance", + "operationId": "instance_ephemeral_ip_detach", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/instances/{instance}/migrate": { "post": { "tags": [ @@ -11005,6 +11200,21 @@ } ] }, + "EphemeralIpCreate": { + "description": "Parameters for creating an ephemeral IP address for an instance.", + "type": "object", + "properties": { + "pool": { + "nullable": true, + "description": "Name or ID of the IP pool used to allocate an address", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + } + }, "Error": { "description": "Error information from a response.", "type": "object", @@ -11025,33 +11235,105 @@ ] }, "ExternalIp": { - "type": "object", - "properties": { - "ip": { - "type": "string", - "format": "ip" + "oneOf": [ + { + "type": "object", + "properties": { + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "type": "string", + "enum": [ + "ephemeral" + ] + } + }, + "required": [ + "ip", + "kind" + ] }, - "kind": { - "$ref": "#/components/schemas/IpKind" + { + "description": "A Floating IP is a well-known IP address which can be attached and detached from instances.", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "instance_id": { + "nullable": true, + "description": "The ID of the instance that this Floating IP is attached to, if it is presently in use.", + "type": "string", + "format": "uuid" + }, + "ip": { + "description": "The IP address held by this resource.", + "type": "string", + "format": "ip" + }, + "kind": { + "type": "string", + "enum": [ + "floating" + ] + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "project_id": { + "description": "The project this resource exists within.", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "ip", + "kind", + "name", + "project_id", + "time_created", + "time_modified" + ] } - }, - "required": [ - "ip", - "kind" ] }, "ExternalIpCreate": { "description": "Parameters for creating an external IP address for instances.", "oneOf": [ { - "description": "An IP address providing both inbound and outbound access. The address is automatically-assigned from the provided IP Pool, or all available pools if not specified.", + "description": "An IP address providing both inbound and outbound access. The address is automatically-assigned from the provided IP Pool, or the current silo's default pool if not specified.", "type": "object", "properties": { - "pool_name": { + "pool": { "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/Name" + "$ref": "#/components/schemas/NameOrId" } ] }, @@ -11067,11 +11349,11 @@ ] }, { - "description": "An IP address providing both inbound and outbound access. The address is an existing Floating IP object assigned to the current project.\n\nThe floating IP must not be in use by another instance or service.", + "description": "An IP address providing both inbound and outbound access. The address is an existing floating IP object assigned to the current project.\n\nThe floating IP must not be in use by another instance or service.", "type": "object", "properties": { - "floating_ip_name": { - "$ref": "#/components/schemas/Name" + "floating_ip": { + "$ref": "#/components/schemas/NameOrId" }, "type": { "type": "string", @@ -11081,7 +11363,7 @@ } }, "required": [ - "floating_ip_name", + "floating_ip", "type" ] } @@ -11226,6 +11508,32 @@ "time_modified" ] }, + "FloatingIpAttach": { + "description": "Parameters for attaching a floating IP address to another resource", + "type": "object", + "properties": { + "kind": { + "description": "The type of `parent`'s resource", + "allOf": [ + { + "$ref": "#/components/schemas/FloatingIpParentKind" + } + ] + }, + "parent": { + "description": "Name or ID of the resource that this IP address should be attached to", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + }, + "required": [ + "kind", + "parent" + ] + }, "FloatingIpCreate": { "description": "Parameters for creating a new floating IP address for instances.", "type": "object", @@ -11257,6 +11565,13 @@ "name" ] }, + "FloatingIpParentKind": { + "description": "The type of resource that a floating IP is attached to", + "type": "string", + "enum": [ + "instance" + ] + }, "FloatingIpResultsPage": { "description": "A single page of results", "type": "object", @@ -12481,14 +12796,6 @@ } ] }, - "IpKind": { - "description": "The kind of an external IP address for an instance", - "type": "string", - "enum": [ - "ephemeral", - "floating" - ] - }, "IpNet": { "oneOf": [ { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index b5b9d3fd5b..3e3f6abec6 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -327,6 +327,78 @@ } } }, + "/instances/{instance_id}/external-ip": { + "put": { + "operationId": "instance_put_external_ip", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "operationId": "instance_delete_external_ip", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/instances/{instance_id}/migration-ids": { "put": { "operationId": "instance_put_migration_ids", @@ -4541,6 +4613,49 @@ "vmm_runtime" ] }, + "InstanceExternalIpBody": { + "description": "Used to dynamically update external IPs attached to an instance.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ephemeral" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "floating" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, "InstanceHardware": { "description": "Describes the instance hardware.", "type": "object", diff --git a/schema/crdb/25.0.0/up01.sql b/schema/crdb/25.0.0/up01.sql new file mode 100644 index 0000000000..0cb511fb91 --- /dev/null +++ b/schema/crdb/25.0.0/up01.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.ip_attach_state AS ENUM ( + 'detached', + 'attached', + 'detaching', + 'attaching' +); diff --git a/schema/crdb/25.0.0/up02.sql b/schema/crdb/25.0.0/up02.sql new file mode 100644 index 0000000000..324a907dd4 --- /dev/null +++ b/schema/crdb/25.0.0/up02.sql @@ -0,0 +1,4 @@ +-- Intentionally nullable for now as we need to backfill using the current +-- value of parent_id. +ALTER TABLE omicron.public.external_ip +ADD COLUMN IF NOT EXISTS state omicron.public.ip_attach_state; diff --git a/schema/crdb/25.0.0/up03.sql b/schema/crdb/25.0.0/up03.sql new file mode 100644 index 0000000000..ea1d461250 --- /dev/null +++ b/schema/crdb/25.0.0/up03.sql @@ -0,0 +1,7 @@ +-- initialise external ip state for detached IPs. +set + local disallow_full_table_scans = off; + +UPDATE omicron.public.external_ip +SET state = 'detached' +WHERE parent_id IS NULL; diff --git a/schema/crdb/25.0.0/up04.sql b/schema/crdb/25.0.0/up04.sql new file mode 100644 index 0000000000..7bf89d6626 --- /dev/null +++ b/schema/crdb/25.0.0/up04.sql @@ -0,0 +1,7 @@ +-- initialise external ip state for attached IPs. +set + local disallow_full_table_scans = off; + +UPDATE omicron.public.external_ip +SET state = 'attached' +WHERE parent_id IS NOT NULL; diff --git a/schema/crdb/25.0.0/up05.sql b/schema/crdb/25.0.0/up05.sql new file mode 100644 index 0000000000..894806a3dc --- /dev/null +++ b/schema/crdb/25.0.0/up05.sql @@ -0,0 +1,2 @@ +-- Now move the new column to its intended state of non-nullable. +ALTER TABLE omicron.public.external_ip ALTER COLUMN state SET NOT NULL; diff --git a/schema/crdb/25.0.0/up06.sql b/schema/crdb/25.0.0/up06.sql new file mode 100644 index 0000000000..ca19081e37 --- /dev/null +++ b/schema/crdb/25.0.0/up06.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.external_ip +ADD CONSTRAINT IF NOT EXISTS detached_null_parent_id CHECK ( + (state = 'detached') OR (parent_id IS NOT NULL) +); diff --git a/schema/crdb/25.0.0/up07.sql b/schema/crdb/25.0.0/up07.sql new file mode 100644 index 0000000000..00f9310c2e --- /dev/null +++ b/schema/crdb/25.0.0/up07.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS one_ephemeral_ip_per_instance ON omicron.public.external_ip ( + parent_id +) + WHERE kind = 'ephemeral' AND parent_id IS NOT NULL AND time_deleted IS NULL; diff --git a/schema/crdb/25.0.0/up08.sql b/schema/crdb/25.0.0/up08.sql new file mode 100644 index 0000000000..3d85aaad05 --- /dev/null +++ b/schema/crdb/25.0.0/up08.sql @@ -0,0 +1,2 @@ +ALTER TABLE IF EXISTS omicron.public.external_ip +DROP CONSTRAINT IF EXISTS null_non_fip_parent_id; diff --git a/schema/crdb/25.0.0/up09.sql b/schema/crdb/25.0.0/up09.sql new file mode 100644 index 0000000000..bac963cce5 --- /dev/null +++ b/schema/crdb/25.0.0/up09.sql @@ -0,0 +1,4 @@ +ALTER TABLE IF EXISTS omicron.public.external_ip +ADD CONSTRAINT IF NOT EXISTS null_snat_parent_id CHECK ( + (kind != 'snat') OR (parent_id IS NOT NULL) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index f3ca5c4b85..86d88f5fe9 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1669,6 +1669,13 @@ CREATE TYPE IF NOT EXISTS omicron.public.ip_kind AS ENUM ( 'floating' ); +CREATE TYPE IF NOT EXISTS omicron.public.ip_attach_state AS ENUM ( + 'detached', + 'attached', + 'detaching', + 'attaching' +); + /* * External IP addresses used for guest instances and externally-facing * services. @@ -1714,6 +1721,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( /* FK to the `project` table. */ project_id UUID, + /* State of this IP with regard to instance attach/detach + * operations. This is mainly used to prevent concurrent use + * across sagas and allow rollback to correct state. + */ + state omicron.public.ip_attach_state NOT NULL, + /* The name must be non-NULL iff this is a floating IP. */ CONSTRAINT null_fip_name CHECK ( (kind != 'floating' AND name IS NULL) OR @@ -1735,16 +1748,27 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( ), /* - * Only nullable if this is a floating IP, which may exist not - * attached to any instance or service yet. + * Only nullable if this is a floating/ephemeral IP, which may exist not + * attached to any instance or service yet. Ephemeral IPs should not generally + * exist without parent instances/services, but need to temporarily exist in + * this state for live attachment. */ - CONSTRAINT null_non_fip_parent_id CHECK ( - (kind != 'floating' AND parent_id is NOT NULL) OR (kind = 'floating') + CONSTRAINT null_snat_parent_id CHECK ( + (kind != 'snat') OR (parent_id IS NOT NULL) ), /* Ephemeral IPs are not supported for services. */ CONSTRAINT ephemeral_kind_service CHECK ( (kind = 'ephemeral' AND is_service = FALSE) OR (kind != 'ephemeral') + ), + + /* + * (Not detached) => non-null parent_id. + * This is not a two-way implication because SNAT IPs + * cannot have a null parent_id. + */ + CONSTRAINT detached_null_parent_id CHECK ( + (state = 'detached') OR (parent_id IS NOT NULL) ) ); @@ -1777,6 +1801,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_external_ip_by_parent ON omicron.public ) WHERE parent_id IS NOT NULL AND time_deleted IS NULL; +/* Enforce a limit of one Ephemeral IP per instance */ +CREATE UNIQUE INDEX IF NOT EXISTS one_ephemeral_ip_per_instance ON omicron.public.external_ip ( + parent_id +) + WHERE kind = 'ephemeral' AND parent_id IS NOT NULL AND time_deleted IS NULL; + /* Enforce name-uniqueness of floating (service) IPs at fleet level. */ CREATE UNIQUE INDEX IF NOT EXISTS lookup_floating_ip_by_name on omicron.public.external_ip ( name diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 39d1ae26a0..0798aed664 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -9,7 +9,7 @@ use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, - InstancePutMigrationIdsBody, InstancePutStateBody, + InstanceExternalIpBody, InstancePutMigrationIdsBody, InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRulesEnsureBody, ZoneBundleId, ZoneBundleMetadata, Zpool, @@ -53,6 +53,8 @@ pub fn api() -> SledApiDescription { api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; + api.register(instance_put_external_ip)?; + api.register(instance_delete_external_ip)?; api.register(instance_register)?; api.register(instance_unregister)?; api.register(omicron_zones_get)?; @@ -467,6 +469,38 @@ async fn instance_put_migration_ids( )) } +#[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + +#[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + /// Path parameters for Disk requests (sled agent API) #[derive(Deserialize, JsonSchema)] struct DiskPathParam { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 057402c57a..3bbe0762f8 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -10,8 +10,8 @@ use crate::common::instance::{ }; use crate::instance_manager::{InstanceManagerServices, InstanceTicket}; use crate::nexus::NexusClientWithResolver; -use crate::params::ZoneBundleCause; use crate::params::ZoneBundleMetadata; +use crate::params::{InstanceExternalIpBody, ZoneBundleCause}; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule, @@ -558,6 +558,110 @@ impl InstanceInner { Ok(()) } + + pub async fn add_external_ip( + &mut self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + // v4 + v6 handling is delegated to `external_ips_ensure`. + // If OPTE is unhappy, we undo at `Instance` level. + + match ip { + // For idempotency of add/delete, we want to return + // success on 'already done'. + InstanceExternalIpBody::Ephemeral(ip) + if Some(ip) == self.ephemeral_ip.as_ref() => + { + return Ok(()); + } + InstanceExternalIpBody::Floating(ip) + if self.floating_ips.contains(ip) => + { + return Ok(()); + } + // New Ephemeral IP while current exists -- error without + // explicit delete. + InstanceExternalIpBody::Ephemeral(ip) + if self.ephemeral_ip.is_some() => + { + return Err(Error::Opte( + illumos_utils::opte::Error::ImplicitEphemeralIpDetach( + *ip, + self.ephemeral_ip.unwrap(), + ), + )); + } + // Not found, proceed with OPTE update. + InstanceExternalIpBody::Ephemeral(ip) => { + self.ephemeral_ip = Some(*ip); + } + InstanceExternalIpBody::Floating(ip) => { + self.floating_ips.push(*ip); + } + } + + let Some(primary_nic) = self.requested_nics.get(0) else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + self.port_manager.external_ips_ensure( + primary_nic.id, + primary_nic.kind, + Some(self.source_nat), + self.ephemeral_ip, + &self.floating_ips, + )?; + + Ok(()) + } + + pub async fn delete_external_ip( + &mut self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + // v4 + v6 handling is delegated to `external_ips_ensure`. + // If OPTE is unhappy, we undo at `Instance` level. + + match ip { + // For idempotency of add/delete, we want to return + // success on 'already done'. + // IP Mismatch and 'deleted in past' can't really be + // disambiguated here. + InstanceExternalIpBody::Ephemeral(ip) + if self.ephemeral_ip != Some(*ip) => + { + return Ok(()); + } + InstanceExternalIpBody::Ephemeral(_) => { + self.ephemeral_ip = None; + } + InstanceExternalIpBody::Floating(ip) => { + let floating_index = + self.floating_ips.iter().position(|v| v == ip); + if let Some(pos) = floating_index { + // Swap remove is valid here, OPTE is not sensitive + // to Floating Ip ordering. + self.floating_ips.swap_remove(pos); + } else { + return Ok(()); + } + } + } + + let Some(primary_nic) = self.requested_nics.get(0) else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + self.port_manager.external_ips_ensure( + primary_nic.id, + primary_nic.kind, + Some(self.source_nat), + self.ephemeral_ip, + &self.floating_ips, + )?; + + Ok(()) + } } /// A reference to a single instance running a running Propolis server. @@ -1094,4 +1198,52 @@ impl Instance { Err(Error::InstanceNotRunning(inner.properties.id)) } } + + pub async fn add_external_ip( + &self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let mut inner = self.inner.lock().await; + + // The internal call can either fail on adding the IP + // to the list, or on the OPTE step. + // Be cautious and reset state if either fails. + // Note we don't need to re-ensure port manager/OPTE state + // since that's the last call we make internally. + let old_eph = inner.ephemeral_ip; + let out = inner.add_external_ip(ip).await; + + if out.is_err() { + inner.ephemeral_ip = old_eph; + if let InstanceExternalIpBody::Floating(ip) = ip { + inner.floating_ips.retain(|v| v != ip); + } + } + + out + } + + pub async fn delete_external_ip( + &self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let mut inner = self.inner.lock().await; + + // Similar logic to `add_external_ip`, except here we + // need to readd the floating IP if it was removed. + // OPTE doesn't care about the order of floating IPs. + let old_eph = inner.ephemeral_ip; + let out = inner.delete_external_ip(ip).await; + + if out.is_err() { + inner.ephemeral_ip = old_eph; + if let InstanceExternalIpBody::Floating(ip) = ip { + if !inner.floating_ips.contains(ip) { + inner.floating_ips.push(*ip); + } + } + } + + out + } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index c1b7e402a4..b66b0400e1 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -7,6 +7,7 @@ use crate::instance::propolis_zone_name; use crate::instance::Instance; use crate::nexus::NexusClientWithResolver; +use crate::params::InstanceExternalIpBody; use crate::params::ZoneBundleMetadata; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, @@ -434,6 +435,42 @@ impl InstanceManager { }; instance.request_zone_bundle().await } + + pub async fn add_external_ip( + &self, + instance_id: Uuid, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let instance = { + let instances = self.inner.instances.lock().unwrap(); + instances.get(&instance_id).map(|(_id, v)| v.clone()) + }; + + let Some(instance) = instance else { + return Err(Error::NoSuchInstance(instance_id)); + }; + + instance.add_external_ip(ip).await?; + Ok(()) + } + + pub async fn delete_external_ip( + &self, + instance_id: Uuid, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let instance = { + let instances = self.inner.instances.lock().unwrap(); + instances.get(&instance_id).map(|(_id, v)| v.clone()) + }; + + let Some(instance) = instance else { + return Err(Error::NoSuchInstance(instance_id)); + }; + + instance.delete_external_ip(ip).await?; + Ok(()) + } } /// Represents membership of an instance in the [`InstanceManager`]. diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 9120bafa9a..f14a13aa41 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -818,6 +818,16 @@ pub struct CleanupContextUpdate { pub storage_limit: Option, } +/// Used to dynamically update external IPs attached to an instance. +#[derive( + Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, JsonSchema, Serialize, +)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum InstanceExternalIpBody { + Ephemeral(IpAddr), + Floating(IpAddr), +} + // Our SledRole and Baseboard types do not have to be identical to the Nexus // ones, but they generally should be, and this avoids duplication. If it // becomes easier to maintain a separate copy, we should do that. diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index e5d7752511..09ffdf5dc4 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -8,9 +8,10 @@ use crate::bootstrap::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; use crate::params::{ - DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, - InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, - Inventory, OmicronZonesConfig, VpcFirewallRulesEnsureBody, + DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, + InstancePutMigrationIdsBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, VpcFirewallRulesEnsureBody, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -45,6 +46,8 @@ pub fn api() -> SledApiDescription { api.register(instance_put_state)?; api.register(instance_register)?; api.register(instance_unregister)?; + api.register(instance_put_external_ip)?; + api.register(instance_delete_external_ip)?; api.register(instance_poke_post)?; api.register(disk_put)?; api.register(disk_poke_post)?; @@ -152,6 +155,38 @@ async fn instance_put_migration_ids( )) } +#[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_put_external_ip( + rqctx: RequestContext>, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + +#[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_delete_external_ip( + rqctx: RequestContext>, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + #[endpoint { method = POST, path = "/instances/{instance_id}/poke", diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 8a76bf6abc..56cfaf57c8 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -12,9 +12,10 @@ use super::storage::CrucibleData; use super::storage::Storage; use crate::nexus::NexusClient; use crate::params::{ - DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, + DiskStateRequested, InstanceExternalIpBody, InstanceHardware, + InstanceMigrationSourceParams, InstancePutStateResponse, + InstanceStateRequested, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, SledRole, }; use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; @@ -41,7 +42,7 @@ use propolis_client::{ }; use propolis_mock_server::Context as PropolisContext; use slog::Logger; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::str::FromStr; use std::sync::Arc; @@ -69,6 +70,8 @@ pub struct SledAgent { pub v2p_mappings: Mutex>>, mock_propolis: Mutex>, PropolisClient)>>, + /// lists of external IPs assigned to instances + pub external_ips: Mutex>>, config: Config, fake_zones: Mutex, instance_ensure_state_error: Mutex>, @@ -162,6 +165,7 @@ impl SledAgent { nexus_client, disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashMap::new()), + external_ips: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), config: config.clone(), fake_zones: Mutex::new(OmicronZonesConfig { @@ -627,6 +631,58 @@ impl SledAgent { Ok(()) } + pub async fn instance_put_external_ip( + &self, + instance_id: Uuid, + body_args: &InstanceExternalIpBody, + ) -> Result<(), Error> { + if !self.instances.contains_key(&instance_id).await { + return Err(Error::internal_error( + "can't alter IP state for nonexistent instance", + )); + } + + let mut eips = self.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + + // High-level behaviour: this should always succeed UNLESS + // trying to add a double ephemeral. + if let InstanceExternalIpBody::Ephemeral(curr_ip) = &body_args { + if my_eips.iter().any(|v| { + if let InstanceExternalIpBody::Ephemeral(other_ip) = v { + curr_ip != other_ip + } else { + false + } + }) { + return Err(Error::invalid_request("cannot replace existing ephemeral IP without explicit removal")); + } + } + + my_eips.insert(*body_args); + + Ok(()) + } + + pub async fn instance_delete_external_ip( + &self, + instance_id: Uuid, + body_args: &InstanceExternalIpBody, + ) -> Result<(), Error> { + if !self.instances.contains_key(&instance_id).await { + return Err(Error::internal_error( + "can't alter IP state for nonexistent instance", + )); + } + + let mut eips = self.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + + my_eips.remove(&body_args); + + Ok(()) + } + /// Used for integration tests that require a component to talk to a /// mocked propolis-server API. // TODO: fix schemas so propolis-server's port isn't hardcoded in nexus diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 71fe3584f0..eaf354db26 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,10 +16,11 @@ use crate::long_running_tasks::LongRunningTaskHandles; use crate::metrics::MetricsManager; use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ - DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, - TimeSync, VpcFirewallRule, ZoneBundleMetadata, Zpool, + DiskStateRequested, InstanceExternalIpBody, InstanceHardware, + InstanceMigrationSourceParams, InstancePutStateResponse, + InstanceStateRequested, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule, + ZoneBundleMetadata, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_monitor::UnderlayAccess; @@ -979,6 +980,37 @@ impl SledAgent { .map_err(|e| Error::Instance(e)) } + /// Idempotently ensures that an instance's OPTE/port state includes the + /// specified external IP address. + /// + /// This method will return an error when trying to register an ephemeral IP which + /// does not match the current ephemeral IP. + pub async fn instance_put_external_ip( + &self, + instance_id: Uuid, + external_ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + self.inner + .instances + .add_external_ip(instance_id, external_ip) + .await + .map_err(|e| Error::Instance(e)) + } + + /// Idempotently ensures that an instance's OPTE/port state does not include the + /// specified external IP address in either its ephemeral or floating IP set. + pub async fn instance_delete_external_ip( + &self, + instance_id: Uuid, + external_ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + self.inner + .instances + .delete_external_ip(instance_id, external_ip) + .await + .map_err(|e| Error::Instance(e)) + } + /// Idempotently ensures that the given virtual disk is attached (or not) as /// specified. /// From e261a960cb365ad92f103a35b262713118ea6441 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 15:38:25 -0800 Subject: [PATCH 37/91] [meta] update samael to 0.0.14 (#4878) Required to unblock a bunch of other updates. The behavior of a test changed, but in a way that to my understanding based on [the Duo article](https://duo.com/blog/duo-finds-saml-vulnerabilities-affecting-multiple-implementations) is still safe. See the comment included in the PR for more. --- Cargo.lock | 18 +++++++-------- Cargo.toml | 2 +- nexus/tests/integration_tests/saml.rs | 33 +++++++++++++++++++++++---- workspace-hack/Cargo.toml | 2 -- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c3eb15179..6ee028bbc5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -479,11 +479,11 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.65.1" +version = "0.69.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +checksum = "a4c69fae65a523209d34240b60abe0c42d33d1045d445c0839d8a4894a736e2d" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "cexpr", "clang-sys", "lazy_static", @@ -5267,7 +5267,6 @@ dependencies = [ "sha2", "similar", "slog", - "snafu", "socket2 0.5.5", "spin 0.9.8", "string_cache", @@ -6496,9 +6495,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quick-xml" -version = "0.23.1" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11bafc859c6815fbaffbbbf4229ecb767ac913fecb27f9ad4343662e9ef099ea" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" dependencies = [ "memchr", "serde", @@ -7372,8 +7371,9 @@ dependencies = [ [[package]] name = "samael" -version = "0.0.10" -source = "git+https://github.com/njaremko/samael?branch=master#52028e45d11ceb7114bf0c730a9971207e965602" +version = "0.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75583aad4a51c50fc0af69c230d18078c9d5a69a98d0f6013d01053acf744f4" dependencies = [ "base64", "bindgen", @@ -7391,7 +7391,7 @@ dependencies = [ "quick-xml", "rand 0.8.5", "serde", - "snafu", + "thiserror", "url", "uuid", ] diff --git a/Cargo.toml b/Cargo.toml index 093e972b42..ed54ae8c6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -325,7 +325,7 @@ rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.0.0" rustyline = "12.0.0" -samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } +samael = { version = "0.0.14", features = ["xmlsec"] } schemars = "0.8.16" secrecy = "0.8.0" semver = { version = "1.0.21", features = ["std", "serde"] } diff --git a/nexus/tests/integration_tests/saml.rs b/nexus/tests/integration_tests/saml.rs index fc04bbf908..b1b0429c2e 100644 --- a/nexus/tests/integration_tests/saml.rs +++ b/nexus/tests/integration_tests/saml.rs @@ -964,12 +964,33 @@ fn test_reject_unsigned_saml_response() { assert!(result.is_err()); } -// Test rejecting a correct SAML response that contains a XML comment in -// saml:NameID. +// Test accepting a correct SAML response that contains a XML comment in +// saml:NameID, and ensuring that the full text node is extracted (and not a +// substring). // -// See: https://duo.com/blog/duo-finds-saml-vulnerabilities-affecting-multiple-implementations +// This used to be a test that _rejected_ such responses, but a change to an +// upstream dependency (quick-xml) caused the behavior around text nodes with +// embedded comments to change. Specifically, consider: +// +// user@example.com.evil.com +// +// What should the text node for this element be? +// +// * Some XML parsing libraries just return "user@example.com". That leads to a +// vulnerability, where an attacker can get a response signed with a +// different email address than intended. +// * Some XML libraries return "user@example.com.evil.com". This is safe, +// because the text after the comment hasn't been dropped. This is the behavior +// with quick-xml 0.30, and the one that we're testing here. +// * Some XML libraries are unable to deserialize the document. This is also +// safe (and not particularly problematic because typically SAML responses +// aren't going to contain comments), and was the behavior with quick-xml +// 0.23. +// +// See: +// https://duo.com/blog/duo-finds-saml-vulnerabilities-affecting-multiple-implementations #[test] -fn test_reject_saml_response_with_xml_comment() { +fn test_handle_saml_response_with_xml_comment() { let silo_saml_identity_provider = SamlIdentityProvider { idp_metadata_document_string: SAML_RESPONSE_IDP_DESCRIPTOR.to_string(), @@ -1004,7 +1025,9 @@ fn test_reject_saml_response_with_xml_comment() { ), ); - assert!(result.is_err()); + let (authenticated_subject, _) = + result.expect("expected validation to succeed"); + assert_eq!(authenticated_subject.external_id, "some@customer.com"); } // Test receiving a correct SAML response that has group attributes diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index cda4426c9b..25a72838a0 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -96,7 +96,6 @@ serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } -snafu = { version = "0.7.5", features = ["futures"] } socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } @@ -204,7 +203,6 @@ serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } -snafu = { version = "0.7.5", features = ["futures"] } socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } From 97318e91017124629c4231a7cdfc473a90f99270 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 25 Jan 2024 00:14:37 +0000 Subject: [PATCH 38/91] Correctly bump schema version for floating IP changes. (#4890) Two closely-occurring merges hit at high speed! This PR bumps the schema to 26.0.0, as was missed at the time. --- nexus/db-model/src/schema.rs | 2 +- schema/crdb/{25.0.0 => 26.0.0}/up01.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up02.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up03.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up04.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up05.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up06.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up07.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up08.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up09.sql | 0 schema/crdb/dbinit.sql | 2 +- 11 files changed, 2 insertions(+), 2 deletions(-) rename schema/crdb/{25.0.0 => 26.0.0}/up01.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up02.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up03.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up04.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up05.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up06.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up07.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up08.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up09.sql (100%) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 11cdf87f6c..954647f70d 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(25, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(26, 0, 0); table! { disk (id) { diff --git a/schema/crdb/25.0.0/up01.sql b/schema/crdb/26.0.0/up01.sql similarity index 100% rename from schema/crdb/25.0.0/up01.sql rename to schema/crdb/26.0.0/up01.sql diff --git a/schema/crdb/25.0.0/up02.sql b/schema/crdb/26.0.0/up02.sql similarity index 100% rename from schema/crdb/25.0.0/up02.sql rename to schema/crdb/26.0.0/up02.sql diff --git a/schema/crdb/25.0.0/up03.sql b/schema/crdb/26.0.0/up03.sql similarity index 100% rename from schema/crdb/25.0.0/up03.sql rename to schema/crdb/26.0.0/up03.sql diff --git a/schema/crdb/25.0.0/up04.sql b/schema/crdb/26.0.0/up04.sql similarity index 100% rename from schema/crdb/25.0.0/up04.sql rename to schema/crdb/26.0.0/up04.sql diff --git a/schema/crdb/25.0.0/up05.sql b/schema/crdb/26.0.0/up05.sql similarity index 100% rename from schema/crdb/25.0.0/up05.sql rename to schema/crdb/26.0.0/up05.sql diff --git a/schema/crdb/25.0.0/up06.sql b/schema/crdb/26.0.0/up06.sql similarity index 100% rename from schema/crdb/25.0.0/up06.sql rename to schema/crdb/26.0.0/up06.sql diff --git a/schema/crdb/25.0.0/up07.sql b/schema/crdb/26.0.0/up07.sql similarity index 100% rename from schema/crdb/25.0.0/up07.sql rename to schema/crdb/26.0.0/up07.sql diff --git a/schema/crdb/25.0.0/up08.sql b/schema/crdb/26.0.0/up08.sql similarity index 100% rename from schema/crdb/25.0.0/up08.sql rename to schema/crdb/26.0.0/up08.sql diff --git a/schema/crdb/25.0.0/up09.sql b/schema/crdb/26.0.0/up09.sql similarity index 100% rename from schema/crdb/25.0.0/up09.sql rename to schema/crdb/26.0.0/up09.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 86d88f5fe9..79a43d3c89 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3296,7 +3296,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '25.0.0', NULL) + ( TRUE, NOW(), NOW(), '26.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 9e08978c5d932a8fe89c248abae06ff54161daf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Thu, 25 Jan 2024 16:43:45 +1300 Subject: [PATCH 39/91] [sled-agent] Oximeter self-assembling zone (#4534) Related https://github.com/oxidecomputer/omicron/issues/1898 Closes: https://github.com/oxidecomputer/omicron/issues/2883 --- .github/buildomat/jobs/package.sh | 2 +- oximeter/db/schema/README.md | 2 +- package-manifest.toml | 10 +++- sled-agent/src/services.rs | 98 ++++++++++++++++++++++++++----- smf/oximeter/manifest.xml | 13 +++- 5 files changed, 103 insertions(+), 22 deletions(-) diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index b4d10891b9..79590a44df 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -117,7 +117,7 @@ zones=( out/internal-dns.tar.gz out/omicron-nexus.tar.gz out/omicron-nexus-single-sled.tar.gz - out/oximeter-collector.tar.gz + out/oximeter.tar.gz out/propolis-server.tar.gz out/switch-*.tar.gz out/ntp.tar.gz diff --git a/oximeter/db/schema/README.md b/oximeter/db/schema/README.md index 2f1633138d..929144bccf 100644 --- a/oximeter/db/schema/README.md +++ b/oximeter/db/schema/README.md @@ -32,7 +32,7 @@ To run this program: - Run this tool, pointing it at the desired schema directory, e.g.: ```bash -# /opt/oxide/oximeter/bin/clickhouse-schema-updater \ +# /opt/oxide/oximeter-collector/bin/clickhouse-schema-updater \ --host \ --schema-dir /opt/oxide/oximeter/sql up VERSION diff --git a/package-manifest.toml b/package-manifest.toml index 3525b121e4..36e43157f9 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -116,9 +116,16 @@ setup_hint = """ - Run `pkg install library/postgresql-13` to download Postgres libraries """ -[package.oximeter-collector] +[package.oximeter] service_name = "oximeter" only_for_targets.image = "standard" +source.type = "composite" +source.packages = [ "oximeter-collector.tar.gz", "zone-network-setup.tar.gz" ] +output.type = "zone" + +[package.oximeter-collector] +service_name = "oximeter-collector" +only_for_targets.image = "standard" source.type = "local" source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true @@ -127,6 +134,7 @@ source.paths = [ { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, ] output.type = "zone" +output.intermediate_only = true [package.clickhouse] service_name = "clickhouse" diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 211e602bbf..77b6bcbed4 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -61,7 +61,6 @@ use illumos_utils::zone::Zones; use illumos_utils::{execute, PFEXEC}; use internal_dns::resolver::Resolver; use itertools::Itertools; -use omicron_common::address::AZ_PREFIX; use omicron_common::address::BOOTSTRAP_ARTIFACT_PORT; use omicron_common::address::CLICKHOUSE_KEEPER_PORT; use omicron_common::address::CLICKHOUSE_PORT; @@ -75,6 +74,7 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::address::WICKETD_NEXUS_PROXY_PORT; use omicron_common::address::WICKETD_PORT; use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT}; +use omicron_common::address::{AZ_PREFIX, OXIMETER_PORT}; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::{ HostPortConfig, RackNetworkConfig, @@ -1798,7 +1798,55 @@ impl ServiceManager { let running_zone = RunningZone::boot(installed_zone).await?; return Ok(running_zone); } + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + id, + zone_type: OmicronZoneType::Oximeter { .. }, + underlay_address, + .. + }, + .. + }) => { + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + + // Configure the Oximeter service. + let address = SocketAddr::new( + IpAddr::V6(*underlay_address), + OXIMETER_PORT, + ); + + let listen_addr = &address.ip().to_string(); + + let nw_setup_service = Self::zone_network_setup_install( + info, + &installed_zone, + listen_addr, + )?; + + let oximeter_config = PropertyGroupBuilder::new("config") + .add_property("id", "astring", &id.to_string()) + .add_property("address", "astring", &address.to_string()); + let oximeter_service = ServiceBuilder::new("oxide/oximeter") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(oximeter_config), + ); + let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) + .add_service(disabled_ssh_service) + .add_service(oximeter_service); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io("Failed to setup Oximeter profile", err) + })?; + return Ok(RunningZone::boot(installed_zone).await?); + } _ => {} } @@ -2154,14 +2202,6 @@ impl ServiceManager { // service is enabled. smfh.refresh()?; } - - OmicronZoneType::Oximeter { address } => { - info!(self.inner.log, "Setting up oximeter service"); - smfh.setprop("config/id", zone_config.zone.id)?; - smfh.setprop("config/address", address.to_string())?; - smfh.refresh()?; - } - OmicronZoneType::BoundaryNtp { ntp_servers, dns_servers, @@ -2227,7 +2267,8 @@ impl ServiceManager { | OmicronZoneType::ClickhouseKeeper { .. } | OmicronZoneType::CockroachDb { .. } | OmicronZoneType::Crucible { .. } - | OmicronZoneType::CruciblePantry { .. } => { + | OmicronZoneType::CruciblePantry { .. } + | OmicronZoneType::Oximeter { .. } => { panic!( "{} is a service which exists as part of a \ self-assembling zone", @@ -3729,7 +3770,7 @@ mod test { const GLOBAL_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; const SWITCH_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_oximeter"; + const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_ntp"; const EXPECTED_PORT: u16 = 12223; fn make_bootstrap_networking_config() -> BootstrapNetworking { @@ -3906,7 +3947,12 @@ mod test { mgr, id, generation, - OmicronZoneType::Oximeter { address }, + OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, ) .await .expect("Could not create service"); @@ -3945,7 +3991,12 @@ mod test { zones: vec![OmicronZoneConfig { id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }], }) .await @@ -4314,7 +4365,12 @@ mod test { let mut zones = vec![OmicronZoneConfig { id: id1, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }]; mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation: v2, @@ -4335,7 +4391,12 @@ mod test { zones.push(OmicronZoneConfig { id: id2, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }); // Now try to apply that list with an older generation number. This @@ -4508,7 +4569,12 @@ mod test { zones.push(OmicronZoneConfig { id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }); mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation: vv, diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 9c8b30f1f4..fe6c9ac23a 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -4,21 +4,28 @@ - + + + + + + exec='ctrun -l child -o noorphan,regent /opt/oxide/oximeter-collector/bin/oximeter run /var/svc/manifest/site/oximeter/config.toml --address %{config/address} --id %{config/id} &' + timeout_seconds='0'> + + From e69e6f68154f6efe923f5fa74ef719a26feb17b4 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 20:58:40 -0800 Subject: [PATCH 40/91] [nexus] add support for ingesting TUF repos (#4690) Implement basic support for uploading TUF repos via an endpoint. The PR looks pretty big but most of it is fairly mechanical addition and removal (and much of it has to be done in one go because of internal dependencies.) Also include a few more changes: - Move more code to update-common. - Move the `by_hash` and `by_id` maps to update-common's `UpdatePlanBuilder`. - Remove old update-related code and database migrations that will be replaced by newer blueprint design. (This is the vast majority of deleted code, and ideally would be a separate PR except it's a bit inconvenient to have a PR stack with multiple schema migrations.) This PR **does not include** actually storing TUF repos and replicating them among sleds. That work has been deprioritized for now, to instead focus on sled addition and removal. --- Cargo.lock | 7 + clients/nexus-client/src/lib.rs | 26 - common/src/api/external/mod.rs | 105 +- common/src/nexus_config.rs | 4 - common/src/update.rs | 7 + nexus/Cargo.toml | 5 + nexus/db-model/src/lib.rs | 6 +- nexus/db-model/src/schema.rs | 84 +- nexus/db-model/src/semver_version.rs | 2 + nexus/db-model/src/system_update.rs | 306 ------ nexus/db-model/src/tuf_repo.rs | 312 ++++++ nexus/db-model/src/update_artifact.rs | 62 -- nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/authz/api_resources.rs | 26 +- nexus/db-queries/src/authz/oso_generic.rs | 5 +- .../src/authz/policy_test/resources.rs | 26 +- nexus/db-queries/src/db/datastore/mod.rs | 117 +-- nexus/db-queries/src/db/datastore/update.rs | 638 ++++++------ nexus/db-queries/src/db/lookup.rs | 50 +- nexus/db-queries/src/db/pool_connection.rs | 3 - nexus/db-queries/tests/output/authz-roles.out | 4 +- nexus/examples/config.toml | 10 +- nexus/src/app/mod.rs | 1 + nexus/src/app/rack.rs | 3 - nexus/src/app/update/mod.rs | 965 +----------------- nexus/src/external_api/http_entrypoints.rs | 343 +------ nexus/src/internal_api/http_entrypoints.rs | 9 +- nexus/src/lib.rs | 1 - nexus/src/updates.rs | 74 -- nexus/test-utils/Cargo.toml | 1 + nexus/test-utils/src/http_testing.rs | 30 + nexus/tests/integration_tests/endpoints.rs | 83 +- nexus/tests/integration_tests/mod.rs | 1 - .../tests/integration_tests/system_updates.rs | 219 ---- nexus/tests/integration_tests/updates.rs | 524 ++++++---- .../output/unexpected-authz-endpoints.txt | 12 +- nexus/types/src/external_api/params.rs | 31 +- nexus/types/src/external_api/views.rs | 61 +- openapi/nexus-internal.json | 19 +- schema/crdb/27.0.0/up01.sql | 1 + schema/crdb/27.0.0/up02.sql | 1 + schema/crdb/27.0.0/up03.sql | 1 + schema/crdb/27.0.0/up04.sql | 1 + schema/crdb/27.0.0/up05.sql | 1 + schema/crdb/27.0.0/up06.sql | 1 + schema/crdb/27.0.0/up07.sql | 1 + schema/crdb/27.0.0/up08.sql | 1 + schema/crdb/27.0.0/up09.sql | 1 + schema/crdb/27.0.0/up10.sql | 33 + schema/crdb/27.0.0/up11.sql | 23 + schema/crdb/27.0.0/up12.sql | 21 + schema/crdb/dbinit.sql | 224 ++-- sled-agent/src/updates.rs | 2 +- tufaceous-lib/src/assemble/manifest.rs | 125 +++ update-common/Cargo.toml | 1 + .../src/artifacts/artifacts_with_plan.rs | 221 +++- .../src/artifacts/extracted_artifacts.rs | 4 +- update-common/src/artifacts/update_plan.rs | 228 ++--- update-common/src/errors.rs | 27 +- wicketd/src/artifacts/store.rs | 22 +- wicketd/src/http_entrypoints.rs | 42 +- wicketd/src/update_tracker.rs | 36 +- 62 files changed, 1896 insertions(+), 3305 deletions(-) delete mode 100644 nexus/db-model/src/system_update.rs create mode 100644 nexus/db-model/src/tuf_repo.rs delete mode 100644 nexus/db-model/src/update_artifact.rs delete mode 100644 nexus/src/updates.rs delete mode 100644 nexus/tests/integration_tests/system_updates.rs create mode 100644 schema/crdb/27.0.0/up01.sql create mode 100644 schema/crdb/27.0.0/up02.sql create mode 100644 schema/crdb/27.0.0/up03.sql create mode 100644 schema/crdb/27.0.0/up04.sql create mode 100644 schema/crdb/27.0.0/up05.sql create mode 100644 schema/crdb/27.0.0/up06.sql create mode 100644 schema/crdb/27.0.0/up07.sql create mode 100644 schema/crdb/27.0.0/up08.sql create mode 100644 schema/crdb/27.0.0/up09.sql create mode 100644 schema/crdb/27.0.0/up10.sql create mode 100644 schema/crdb/27.0.0/up11.sql create mode 100644 schema/crdb/27.0.0/up12.sql diff --git a/Cargo.lock b/Cargo.lock index 6ee028bbc5..c2f3e1a949 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4273,6 +4273,7 @@ dependencies = [ "steno", "strum", "subprocess", + "swrite", "term", "thiserror", "tokio", @@ -4387,6 +4388,7 @@ dependencies = [ "serde_urlencoded", "slog", "tokio", + "tokio-util", "trust-dns-resolver", "uuid", ] @@ -4847,6 +4849,7 @@ dependencies = [ "async-trait", "base64", "buf-list", + "bytes", "camino", "camino-tempfile", "cancel-safe-futures", @@ -4947,6 +4950,9 @@ dependencies = [ "tokio-postgres", "tough", "trust-dns-resolver", + "tufaceous", + "tufaceous-lib", + "update-common", "uuid", ] @@ -9570,6 +9576,7 @@ dependencies = [ "bytes", "camino", "camino-tempfile", + "chrono", "clap 4.4.3", "debug-ignore", "display-error-chain", diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 1e1cbc31e7..17fb5aa367 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -236,32 +236,6 @@ impl From } } -impl From - for types::KnownArtifactKind -{ - fn from( - s: omicron_common::api::internal::nexus::KnownArtifactKind, - ) -> Self { - use omicron_common::api::internal::nexus::KnownArtifactKind; - - match s { - KnownArtifactKind::GimletSp => types::KnownArtifactKind::GimletSp, - KnownArtifactKind::GimletRot => types::KnownArtifactKind::GimletRot, - KnownArtifactKind::Host => types::KnownArtifactKind::Host, - KnownArtifactKind::Trampoline => { - types::KnownArtifactKind::Trampoline - } - KnownArtifactKind::ControlPlane => { - types::KnownArtifactKind::ControlPlane - } - KnownArtifactKind::PscSp => types::KnownArtifactKind::PscSp, - KnownArtifactKind::PscRot => types::KnownArtifactKind::PscRot, - KnownArtifactKind::SwitchSp => types::KnownArtifactKind::SwitchSp, - KnownArtifactKind::SwitchRot => types::KnownArtifactKind::SwitchRot, - } - } -} - impl From for types::Duration { fn from(s: std::time::Duration) -> Self { Self { secs: s.as_secs(), nanos: s.subsec_nanos() } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index a8aff00afa..dc3537fbb2 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -13,6 +13,8 @@ use dropshot::HttpError; pub use error::*; pub use crate::api::internal::shared::SwitchLocation; +use crate::update::ArtifactHash; +use crate::update::ArtifactId; use anyhow::anyhow; use anyhow::Context; use api_identity::ObjectIdentity; @@ -760,13 +762,9 @@ pub enum ResourceType { Oximeter, MetricProducer, RoleBuiltin, - UpdateArtifact, + TufRepo, + TufArtifact, SwitchPort, - SystemUpdate, - ComponentUpdate, - SystemUpdateComponentUpdate, - UpdateDeployment, - UpdateableComponent, UserBuiltin, Zpool, Vmm, @@ -2625,6 +2623,101 @@ pub struct BgpImportedRouteIpv4 { pub switch: SwitchLocation, } +/// A description of an uploaded TUF repository. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +pub struct TufRepoDescription { + // Information about the repository. + pub repo: TufRepoMeta, + + // Information about the artifacts present in the repository. + pub artifacts: Vec, +} + +impl TufRepoDescription { + /// Sorts the artifacts so that descriptions can be compared. + pub fn sort_artifacts(&mut self) { + self.artifacts.sort_by(|a, b| a.id.cmp(&b.id)); + } +} + +/// Metadata about a TUF repository. +/// +/// Found within a [`TufRepoDescription`]. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +pub struct TufRepoMeta { + /// The hash of the repository. + /// + /// This is a slight abuse of `ArtifactHash`, since that's the hash of + /// individual artifacts within the repository. However, we use it here for + /// convenience. + pub hash: ArtifactHash, + + /// The version of the targets role. + pub targets_role_version: u64, + + /// The time until which the repo is valid. + pub valid_until: DateTime, + + /// The system version in artifacts.json. + pub system_version: SemverVersion, + + /// The file name of the repository. + /// + /// This is purely used for debugging and may not always be correct (e.g. + /// with wicket, we read the file contents from stdin so we don't know the + /// correct file name). + pub file_name: String, +} + +/// Metadata about an individual TUF artifact. +/// +/// Found within a [`TufRepoDescription`]. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +pub struct TufArtifactMeta { + /// The artifact ID. + pub id: ArtifactId, + + /// The hash of the artifact. + pub hash: ArtifactHash, + + /// The size of the artifact in bytes. + pub size: u64, +} + +/// Data about a successful TUF repo import into Nexus. +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct TufRepoInsertResponse { + /// The repository as present in the database. + pub recorded: TufRepoDescription, + + /// Whether this repository already existed or is new. + pub status: TufRepoInsertStatus, +} + +/// Status of a TUF repo import. +/// +/// Part of [`TufRepoInsertResponse`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum TufRepoInsertStatus { + /// The repository already existed in the database. + AlreadyExists, + + /// The repository did not exist, and was inserted into the database. + Inserted, +} + +/// Data about a successful TUF repo get from Nexus. +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct TufRepoGetResponse { + /// The description of the repository. + pub description: TufRepoDescription, +} + #[cfg(test)] mod test { use serde::Deserialize; diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 7f26bd84b0..be4b05ffdf 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -213,8 +213,6 @@ pub struct ConsoleConfig { pub struct UpdatesConfig { /// Trusted root.json role for the TUF updates repository. pub trusted_root: Utf8PathBuf, - /// Default base URL for the TUF repository. - pub default_base_url: String, } /// Options to tweak database schema changes. @@ -631,7 +629,6 @@ mod test { address = "[::1]:8123" [updates] trusted_root = "/path/to/root.json" - default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 [deployment] @@ -728,7 +725,6 @@ mod test { }, updates: Some(UpdatesConfig { trusted_root: Utf8PathBuf::from("/path/to/root.json"), - default_base_url: "http://example.invalid/".into(), }), schema: None, tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, diff --git a/common/src/update.rs b/common/src/update.rs index 28d5ae50a6..9feff1f868 100644 --- a/common/src/update.rs +++ b/common/src/update.rs @@ -95,6 +95,13 @@ pub struct ArtifactId { pub kind: ArtifactKind, } +/// Used for user-friendly messages. +impl fmt::Display for ArtifactId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} v{} ({})", self.name, self.version, self.kind) + } +} + /// A hash-based identifier for an artifact. /// /// Some places, e.g. the installinator, request artifacts by hash rather than diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 52ee7034dd..87703cce77 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -13,8 +13,10 @@ assert_matches.workspace = true async-trait.workspace = true base64.workspace = true buf-list.workspace = true +bytes.workspace = true cancel-safe-futures.workspace = true camino.workspace = true +camino-tempfile.workspace = true clap.workspace = true chrono.workspace = true crucible-agent-client.workspace = true @@ -88,6 +90,7 @@ oximeter-instruments = { workspace = true, features = ["http-instruments"] } oximeter-producer.workspace = true rustls = { workspace = true } rustls-pemfile = { workspace = true } +update-common.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] @@ -120,6 +123,8 @@ rustls = { workspace = true } subprocess.workspace = true term.workspace = true trust-dns-resolver.workspace = true +tufaceous.workspace = true +tufaceous-lib.workspace = true httptest.workspace = true strum.workspace = true diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 8fdf05e876..5c0a68c253 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -49,7 +49,6 @@ mod project; mod semver_version; mod switch_interface; mod switch_port; -mod system_update; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. @@ -78,8 +77,8 @@ mod sled_underlay_subnet_allocation; mod snapshot; mod ssh_key; mod switch; +mod tuf_repo; mod unsigned; -mod update_artifact; mod user_builtin; mod utilization; mod virtual_provisioning_collection; @@ -165,8 +164,7 @@ pub use ssh_key::*; pub use switch::*; pub use switch_interface::*; pub use switch_port::*; -pub use system_update::*; -pub use update_artifact::*; +pub use tuf_repo::*; pub use user_builtin::*; pub use utilization::*; pub use virtual_provisioning_collection::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 954647f70d..eb71a12f04 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(26, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(27, 0, 0); table! { disk (id) { @@ -1177,72 +1177,45 @@ table! { } table! { - update_artifact (name, version, kind) { - name -> Text, - version -> Text, - kind -> crate::KnownArtifactKindEnum, + tuf_repo (id) { + id -> Uuid, + time_created -> Timestamptz, + sha256 -> Text, targets_role_version -> Int8, valid_until -> Timestamptz, - target_name -> Text, - target_sha256 -> Text, - target_length -> Int8, + system_version -> Text, + file_name -> Text, } } table! { - system_update (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - + tuf_artifact (name, version, kind) { + name -> Text, version -> Text, - } -} - -table! { - update_deployment (id) { - id -> Uuid, + kind -> Text, time_created -> Timestamptz, - time_modified -> Timestamptz, - - version -> Text, - status -> crate::UpdateStatusEnum, - // TODO: status reason for updateable_component + sha256 -> Text, + artifact_size -> Int8, } } table! { - component_update (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - version -> Text, - component_type -> crate::UpdateableComponentTypeEnum, - } -} - -table! { - updateable_component (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - device_id -> Text, - version -> Text, - system_version -> Text, - component_type -> crate::UpdateableComponentTypeEnum, - status -> crate::UpdateStatusEnum, - // TODO: status reason for updateable_component + tuf_repo_artifact (tuf_repo_id, tuf_artifact_name, tuf_artifact_version, tuf_artifact_kind) { + tuf_repo_id -> Uuid, + tuf_artifact_name -> Text, + tuf_artifact_version -> Text, + tuf_artifact_kind -> Text, } } -table! { - system_update_component_update (system_update_id, component_update_id) { - system_update_id -> Uuid, - component_update_id -> Uuid, - } -} +allow_tables_to_appear_in_same_query!( + tuf_repo, + tuf_repo_artifact, + tuf_artifact +); +joinable!(tuf_repo_artifact -> tuf_repo (tuf_repo_id)); +// Can't specify joinable for a composite primary key (tuf_repo_artifact -> +// tuf_artifact). /* hardware inventory */ @@ -1432,13 +1405,6 @@ table! { } } -allow_tables_to_appear_in_same_query!( - system_update, - component_update, - system_update_component_update, -); -joinable!(system_update_component_update -> component_update (component_update_id)); - allow_tables_to_appear_in_same_query!(ip_pool_range, ip_pool, ip_pool_resource); joinable!(ip_pool_range -> ip_pool (ip_pool_id)); joinable!(ip_pool_resource -> ip_pool (ip_pool_id)); diff --git a/nexus/db-model/src/semver_version.rs b/nexus/db-model/src/semver_version.rs index 8e168e11a2..f314e98ab3 100644 --- a/nexus/db-model/src/semver_version.rs +++ b/nexus/db-model/src/semver_version.rs @@ -24,6 +24,8 @@ use serde::{Deserialize, Serialize}; Serialize, Deserialize, PartialEq, + Eq, + Hash, Display, )] #[diesel(sql_type = sql_types::Text)] diff --git a/nexus/db-model/src/system_update.rs b/nexus/db-model/src/system_update.rs deleted file mode 100644 index 17421936b1..0000000000 --- a/nexus/db-model/src/system_update.rs +++ /dev/null @@ -1,306 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use crate::{ - impl_enum_type, - schema::{ - component_update, system_update, system_update_component_update, - update_deployment, updateable_component, - }, - SemverVersion, -}; -use db_macros::Asset; -use nexus_types::{ - external_api::{params, shared, views}, - identity::Asset, -}; -use omicron_common::api::external; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = system_update)] -pub struct SystemUpdate { - #[diesel(embed)] - pub identity: SystemUpdateIdentity, - pub version: SemverVersion, -} - -impl SystemUpdate { - /// Can fail if version numbers are too high. - pub fn new( - version: external::SemverVersion, - ) -> Result { - Ok(Self { - identity: SystemUpdateIdentity::new(Uuid::new_v4()), - version: SemverVersion(version), - }) - } -} - -impl From for views::SystemUpdate { - fn from(system_update: SystemUpdate) -> Self { - Self { - identity: system_update.identity(), - version: system_update.version.into(), - } - } -} - -impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "update_status", schema = "public"))] - pub struct UpdateStatusEnum; - - #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] - #[diesel(sql_type = UpdateStatusEnum)] - pub enum UpdateStatus; - - Updating => b"updating" - Steady => b"steady" -); - -impl From for views::UpdateStatus { - fn from(status: UpdateStatus) -> Self { - match status { - UpdateStatus::Updating => Self::Updating, - UpdateStatus::Steady => Self::Steady, - } - } -} - -impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "updateable_component_type", schema = "public"))] - pub struct UpdateableComponentTypeEnum; - - #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] - #[diesel(sql_type = UpdateableComponentTypeEnum)] - pub enum UpdateableComponentType; - - BootloaderForRot => b"bootloader_for_rot" - BootloaderForSp => b"bootloader_for_sp" - BootloaderForHostProc => b"bootloader_for_host_proc" - HubrisForPscRot => b"hubris_for_psc_rot" - HubrisForPscSp => b"hubris_for_psc_sp" - HubrisForSidecarRot => b"hubris_for_sidecar_rot" - HubrisForSidecarSp => b"hubris_for_sidecar_sp" - HubrisForGimletRot => b"hubris_for_gimlet_rot" - HubrisForGimletSp => b"hubris_for_gimlet_sp" - HeliosHostPhase1 => b"helios_host_phase_1" - HeliosHostPhase2 => b"helios_host_phase_2" - HostOmicron => b"host_omicron" -); - -impl From for UpdateableComponentType { - fn from(component_type: shared::UpdateableComponentType) -> Self { - match component_type { - shared::UpdateableComponentType::BootloaderForRot => { - UpdateableComponentType::BootloaderForRot - } - shared::UpdateableComponentType::BootloaderForSp => { - UpdateableComponentType::BootloaderForSp - } - shared::UpdateableComponentType::BootloaderForHostProc => { - UpdateableComponentType::BootloaderForHostProc - } - shared::UpdateableComponentType::HubrisForPscRot => { - UpdateableComponentType::HubrisForPscRot - } - shared::UpdateableComponentType::HubrisForPscSp => { - UpdateableComponentType::HubrisForPscSp - } - shared::UpdateableComponentType::HubrisForSidecarRot => { - UpdateableComponentType::HubrisForSidecarRot - } - shared::UpdateableComponentType::HubrisForSidecarSp => { - UpdateableComponentType::HubrisForSidecarSp - } - shared::UpdateableComponentType::HubrisForGimletRot => { - UpdateableComponentType::HubrisForGimletRot - } - shared::UpdateableComponentType::HubrisForGimletSp => { - UpdateableComponentType::HubrisForGimletSp - } - shared::UpdateableComponentType::HeliosHostPhase1 => { - UpdateableComponentType::HeliosHostPhase1 - } - shared::UpdateableComponentType::HeliosHostPhase2 => { - UpdateableComponentType::HeliosHostPhase2 - } - shared::UpdateableComponentType::HostOmicron => { - UpdateableComponentType::HostOmicron - } - } - } -} - -impl Into for UpdateableComponentType { - fn into(self) -> shared::UpdateableComponentType { - match self { - UpdateableComponentType::BootloaderForRot => { - shared::UpdateableComponentType::BootloaderForRot - } - UpdateableComponentType::BootloaderForSp => { - shared::UpdateableComponentType::BootloaderForSp - } - UpdateableComponentType::BootloaderForHostProc => { - shared::UpdateableComponentType::BootloaderForHostProc - } - UpdateableComponentType::HubrisForPscRot => { - shared::UpdateableComponentType::HubrisForPscRot - } - UpdateableComponentType::HubrisForPscSp => { - shared::UpdateableComponentType::HubrisForPscSp - } - UpdateableComponentType::HubrisForSidecarRot => { - shared::UpdateableComponentType::HubrisForSidecarRot - } - UpdateableComponentType::HubrisForSidecarSp => { - shared::UpdateableComponentType::HubrisForSidecarSp - } - UpdateableComponentType::HubrisForGimletRot => { - shared::UpdateableComponentType::HubrisForGimletRot - } - UpdateableComponentType::HubrisForGimletSp => { - shared::UpdateableComponentType::HubrisForGimletSp - } - UpdateableComponentType::HeliosHostPhase1 => { - shared::UpdateableComponentType::HeliosHostPhase1 - } - UpdateableComponentType::HeliosHostPhase2 => { - shared::UpdateableComponentType::HeliosHostPhase2 - } - UpdateableComponentType::HostOmicron => { - shared::UpdateableComponentType::HostOmicron - } - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = component_update)] -pub struct ComponentUpdate { - #[diesel(embed)] - pub identity: ComponentUpdateIdentity, - pub version: SemverVersion, - pub component_type: UpdateableComponentType, -} - -#[derive( - Queryable, Insertable, Selectable, Clone, Debug, Serialize, Deserialize, -)] -#[diesel(table_name = system_update_component_update)] -pub struct SystemUpdateComponentUpdate { - pub component_update_id: Uuid, - pub system_update_id: Uuid, -} - -impl From for views::ComponentUpdate { - fn from(component_update: ComponentUpdate) -> Self { - Self { - identity: component_update.identity(), - version: component_update.version.into(), - component_type: component_update.component_type.into(), - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = updateable_component)] -pub struct UpdateableComponent { - #[diesel(embed)] - pub identity: UpdateableComponentIdentity, - pub device_id: String, - pub component_type: UpdateableComponentType, - pub version: SemverVersion, - pub system_version: SemverVersion, - pub status: UpdateStatus, - // TODO: point to the actual update artifact -} - -impl TryFrom for UpdateableComponent { - type Error = external::Error; - - fn try_from( - create: params::UpdateableComponentCreate, - ) -> Result { - Ok(Self { - identity: UpdateableComponentIdentity::new(Uuid::new_v4()), - version: SemverVersion(create.version), - system_version: SemverVersion(create.system_version), - component_type: create.component_type.into(), - device_id: create.device_id, - status: UpdateStatus::Steady, - }) - } -} - -impl From for views::UpdateableComponent { - fn from(component: UpdateableComponent) -> Self { - Self { - identity: component.identity(), - device_id: component.device_id, - component_type: component.component_type.into(), - version: component.version.into(), - system_version: component.system_version.into(), - status: component.status.into(), - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = update_deployment)] -pub struct UpdateDeployment { - #[diesel(embed)] - pub identity: UpdateDeploymentIdentity, - pub version: SemverVersion, - pub status: UpdateStatus, -} - -impl From for views::UpdateDeployment { - fn from(deployment: UpdateDeployment) -> Self { - Self { - identity: deployment.identity(), - version: deployment.version.into(), - status: deployment.status.into(), - } - } -} diff --git a/nexus/db-model/src/tuf_repo.rs b/nexus/db-model/src/tuf_repo.rs new file mode 100644 index 0000000000..5fa2a0aac7 --- /dev/null +++ b/nexus/db-model/src/tuf_repo.rs @@ -0,0 +1,312 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::str::FromStr; + +use crate::{ + schema::{tuf_artifact, tuf_repo, tuf_repo_artifact}, + SemverVersion, +}; +use chrono::{DateTime, Utc}; +use diesel::{deserialize::FromSql, serialize::ToSql, sql_types::Text}; +use omicron_common::{ + api::external, + update::{ + ArtifactHash as ExternalArtifactHash, ArtifactId as ExternalArtifactId, + ArtifactKind, + }, +}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use uuid::Uuid; + +/// A description of a TUF update: a repo, along with the artifacts it +/// contains. +/// +/// This is the internal variant of [`external::TufRepoDescription`]. +#[derive(Debug, Clone)] +pub struct TufRepoDescription { + /// The repository. + pub repo: TufRepo, + + /// The artifacts. + pub artifacts: Vec, +} + +impl TufRepoDescription { + /// Creates a new `TufRepoDescription` from an + /// [`external::TufRepoDescription`]. + /// + /// This is not implemented as a `From` impl because we insert new fields + /// as part of the process, which `From` doesn't necessarily communicate + /// and can be surprising. + pub fn from_external(description: external::TufRepoDescription) -> Self { + Self { + repo: TufRepo::from_external(description.repo), + artifacts: description + .artifacts + .into_iter() + .map(TufArtifact::from_external) + .collect(), + } + } + + /// Converts self into [`external::TufRepoDescription`]. + pub fn into_external(self) -> external::TufRepoDescription { + external::TufRepoDescription { + repo: self.repo.into_external(), + artifacts: self + .artifacts + .into_iter() + .map(TufArtifact::into_external) + .collect(), + } + } +} + +/// A record representing an uploaded TUF repository. +/// +/// This is the internal variant of [`external::TufRepoMeta`]. +#[derive( + Queryable, Identifiable, Insertable, Clone, Debug, Selectable, AsChangeset, +)] +#[diesel(table_name = tuf_repo)] +pub struct TufRepo { + pub id: Uuid, + pub time_created: DateTime, + // XXX: We're overloading ArtifactHash here to also mean the hash of the + // repository zip itself. + pub sha256: ArtifactHash, + pub targets_role_version: i64, + pub valid_until: DateTime, + pub system_version: SemverVersion, + pub file_name: String, +} + +impl TufRepo { + /// Creates a new `TufRepo` ready for insertion. + pub fn new( + sha256: ArtifactHash, + targets_role_version: u64, + valid_until: DateTime, + system_version: SemverVersion, + file_name: String, + ) -> Self { + Self { + id: Uuid::new_v4(), + time_created: Utc::now(), + sha256, + targets_role_version: targets_role_version as i64, + valid_until, + system_version, + file_name, + } + } + + /// Creates a new `TufRepo` ready for insertion from an external + /// `TufRepoMeta`. + /// + /// This is not implemented as a `From` impl because we insert new fields + /// as part of the process, which `From` doesn't necessarily communicate + /// and can be surprising. + pub fn from_external(repo: external::TufRepoMeta) -> Self { + Self::new( + repo.hash.into(), + repo.targets_role_version, + repo.valid_until, + repo.system_version.into(), + repo.file_name, + ) + } + + /// Converts self into [`external::TufRepoMeta`]. + pub fn into_external(self) -> external::TufRepoMeta { + external::TufRepoMeta { + hash: self.sha256.into(), + targets_role_version: self.targets_role_version as u64, + valid_until: self.valid_until, + system_version: self.system_version.into(), + file_name: self.file_name, + } + } + + /// Returns the repository's ID. + pub fn id(&self) -> Uuid { + self.id + } + + /// Returns the targets role version. + pub fn targets_role_version(&self) -> u64 { + self.targets_role_version as u64 + } +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable, AsChangeset)] +#[diesel(table_name = tuf_artifact)] +pub struct TufArtifact { + #[diesel(embed)] + pub id: ArtifactId, + pub time_created: DateTime, + pub sha256: ArtifactHash, + artifact_size: i64, +} + +impl TufArtifact { + /// Creates a new `TufArtifact` ready for insertion. + pub fn new( + id: ArtifactId, + sha256: ArtifactHash, + artifact_size: u64, + ) -> Self { + Self { + id, + time_created: Utc::now(), + sha256, + artifact_size: artifact_size as i64, + } + } + + /// Creates a new `TufArtifact` ready for insertion from an external + /// `TufArtifactMeta`. + /// + /// This is not implemented as a `From` impl because we insert new fields + /// as part of the process, which `From` doesn't necessarily communicate + /// and can be surprising. + pub fn from_external(artifact: external::TufArtifactMeta) -> Self { + Self::new(artifact.id.into(), artifact.hash.into(), artifact.size) + } + + /// Converts self into [`external::TufArtifactMeta`]. + pub fn into_external(self) -> external::TufArtifactMeta { + external::TufArtifactMeta { + id: self.id.into(), + hash: self.sha256.into(), + size: self.artifact_size as u64, + } + } + + /// Returns the artifact's ID. + pub fn id(&self) -> (String, SemverVersion, String) { + (self.id.name.clone(), self.id.version.clone(), self.id.kind.clone()) + } + + /// Returns the artifact length in bytes. + pub fn artifact_size(&self) -> u64 { + self.artifact_size as u64 + } +} + +/// The ID (primary key) of a [`TufArtifact`]. +/// +/// This is the internal variant of a [`ExternalArtifactId`]. +#[derive( + Queryable, + Insertable, + Clone, + Debug, + Selectable, + PartialEq, + Eq, + Hash, + Deserialize, + Serialize, +)] +#[diesel(table_name = tuf_artifact)] +pub struct ArtifactId { + pub name: String, + pub version: SemverVersion, + pub kind: String, +} + +impl From for ArtifactId { + fn from(id: ExternalArtifactId) -> Self { + Self { + name: id.name, + version: id.version.into(), + kind: id.kind.as_str().to_owned(), + } + } +} + +impl From for ExternalArtifactId { + fn from(id: ArtifactId) -> Self { + Self { + name: id.name, + version: id.version.into(), + kind: ArtifactKind::new(id.kind), + } + } +} + +impl fmt::Display for ArtifactId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // This is the same as ExternalArtifactId's Display impl. + write!(f, "{} v{} ({})", self.name, self.version, self.kind) + } +} + +/// Required by the authz_resource macro. +impl From for (String, SemverVersion, String) { + fn from(id: ArtifactId) -> Self { + (id.name, id.version, id.kind) + } +} + +/// A many-to-many relationship between [`TufRepo`] and [`TufArtifact`]. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = tuf_repo_artifact)] +pub struct TufRepoArtifact { + pub tuf_repo_id: Uuid, + pub tuf_artifact_name: String, + pub tuf_artifact_version: SemverVersion, + pub tuf_artifact_kind: String, +} + +/// A wrapper around omicron-common's [`ArtifactHash`](ExternalArtifactHash), +/// supported by Diesel. +#[derive( + Copy, + Clone, + Debug, + AsExpression, + FromSqlRow, + Serialize, + Deserialize, + PartialEq, +)] +#[diesel(sql_type = Text)] +#[serde(transparent)] +pub struct ArtifactHash(pub ExternalArtifactHash); + +NewtypeFrom! { () pub struct ArtifactHash(ExternalArtifactHash); } +NewtypeDeref! { () pub struct ArtifactHash(ExternalArtifactHash); } + +impl fmt::Display for ArtifactHash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl ToSql for ArtifactHash { + fn to_sql<'a>( + &'a self, + out: &mut diesel::serialize::Output<'a, '_, diesel::pg::Pg>, + ) -> diesel::serialize::Result { + >::to_sql( + &self.0.to_string(), + &mut out.reborrow(), + ) + } +} + +impl FromSql for ArtifactHash { + fn from_sql( + bytes: diesel::pg::PgValue<'_>, + ) -> diesel::deserialize::Result { + let s = String::from_sql(bytes)?; + ExternalArtifactHash::from_str(&s) + .map(ArtifactHash) + .map_err(|e| e.into()) + } +} diff --git a/nexus/db-model/src/update_artifact.rs b/nexus/db-model/src/update_artifact.rs deleted file mode 100644 index 97c57b44cc..0000000000 --- a/nexus/db-model/src/update_artifact.rs +++ /dev/null @@ -1,62 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::impl_enum_wrapper; -use crate::schema::update_artifact; -use crate::SemverVersion; -use chrono::{DateTime, Utc}; -use omicron_common::api::internal; -use parse_display::Display; -use serde::Deserialize; -use serde::Serialize; -use std::io::Write; - -impl_enum_wrapper!( - #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "update_artifact_kind", schema = "public"))] - pub struct KnownArtifactKindEnum; - - #[derive(Clone, Copy, Debug, Display, AsExpression, FromSqlRow, PartialEq, Eq, Serialize, Deserialize)] - #[display("{0}")] - #[diesel(sql_type = KnownArtifactKindEnum)] - pub struct KnownArtifactKind(pub internal::nexus::KnownArtifactKind); - - // Enum values - GimletSp => b"gimlet_sp" - GimletRot => b"gimlet_rot" - Host => b"host" - Trampoline => b"trampoline" - ControlPlane => b"control_plane" - PscSp => b"psc_sp" - PscRot => b"psc_rot" - SwitchSp => b"switch_sp" - SwitchRot => b"switch_rot" -); - -#[derive( - Queryable, Insertable, Clone, Debug, Display, Selectable, AsChangeset, -)] -#[diesel(table_name = update_artifact)] -#[display("{kind} \"{name}\" v{version}")] -pub struct UpdateArtifact { - pub name: String, - /// Version of the artifact itself - pub version: SemverVersion, - pub kind: KnownArtifactKind, - /// `version` field of targets.json from the repository - // FIXME this *should* be a NonZeroU64 - pub targets_role_version: i64, - pub valid_until: DateTime, - pub target_name: String, - // FIXME should this be [u8; 32]? - pub target_sha256: String, - // FIXME this *should* be a u64 - pub target_length: i64, -} - -impl UpdateArtifact { - pub fn id(&self) -> (String, SemverVersion, KnownArtifactKind) { - (self.name.clone(), self.version.clone(), self.kind) - } -} diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index cae42a0944..3240c54f3f 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -43,6 +43,7 @@ sled-agent-client.workspace = true slog.workspace = true static_assertions.workspace = true steno.workspace = true +swrite.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } uuid.workspace = true diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 444a00d5ad..b4fd4e1890 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -36,8 +36,7 @@ use crate::authn; use crate::context::OpContext; use crate::db; use crate::db::fixed_data::FLEET_ID; -use crate::db::model::KnownArtifactKind; -use crate::db::model::SemverVersion; +use crate::db::model::{ArtifactId, SemverVersion}; use crate::db::DataStore; use authz_macros::authz_resource; use futures::future::BoxFuture; @@ -1067,35 +1066,28 @@ authz_resource! { } authz_resource! { - name = "UpdateArtifact", + name = "TufRepo", parent = "Fleet", - primary_key = (String, SemverVersion, KnownArtifactKind), - roles_allowed = false, - polar_snippet = FleetChild, -} - -authz_resource! { - name = "Certificate", - parent = "Silo", primary_key = Uuid, roles_allowed = false, - polar_snippet = Custom, + polar_snippet = FleetChild, } authz_resource! { - name = "SystemUpdate", + name = "TufArtifact", parent = "Fleet", - primary_key = Uuid, + primary_key = (String, SemverVersion, String), + input_key = ArtifactId, roles_allowed = false, polar_snippet = FleetChild, } authz_resource! { - name = "UpdateDeployment", - parent = "Fleet", + name = "Certificate", + parent = "Silo", primary_key = Uuid, roles_allowed = false, - polar_snippet = FleetChild, + polar_snippet = Custom, } authz_resource! { diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/db-queries/src/authz/oso_generic.rs index 9b842216b4..dd646a1c98 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/db-queries/src/authz/oso_generic.rs @@ -154,12 +154,11 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { IdentityProvider::init(), SamlIdentityProvider::init(), Sled::init(), + TufRepo::init(), + TufArtifact::init(), Zpool::init(), Service::init(), - UpdateArtifact::init(), UserBuiltin::init(), - SystemUpdate::init(), - UpdateDeployment::init(), ]; for init in generated_inits { diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index 9cc4e28790..3e87f6db51 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -7,6 +7,8 @@ use super::resource_builder::ResourceBuilder; use super::resource_builder::ResourceSet; use crate::authz; +use crate::db::model::ArtifactId; +use nexus_db_model::SemverVersion; use omicron_common::api::external::LookupType; use oso::PolarClass; use std::collections::BTreeSet; @@ -126,20 +128,23 @@ pub async fn make_resources( LookupType::ById(blueprint_id), )); - let system_update_id = - "9c86d713-1bc2-4927-9892-ada3eb6f5f62".parse().unwrap(); - builder.new_resource(authz::SystemUpdate::new( + let tuf_repo_id = "3c52d72f-cbf7-4951-a62f-a4154e74da87".parse().unwrap(); + builder.new_resource(authz::TufRepo::new( authz::FLEET, - system_update_id, - LookupType::ById(system_update_id), + tuf_repo_id, + LookupType::ById(tuf_repo_id), )); - let update_deployment_id = - "c617a035-7c42-49ff-a36a-5dfeee382832".parse().unwrap(); - builder.new_resource(authz::UpdateDeployment::new( + let artifact_id = ArtifactId { + name: "a".to_owned(), + version: SemverVersion("1.0.0".parse().unwrap()), + kind: "b".to_owned(), + }; + let artifact_id_desc = artifact_id.to_string(); + builder.new_resource(authz::TufArtifact::new( authz::FLEET, - update_deployment_id, - LookupType::ById(update_deployment_id), + artifact_id, + LookupType::ByCompositeId(artifact_id_desc), )); let address_lot_id = @@ -375,7 +380,6 @@ pub fn exempted_authz_classes() -> BTreeSet { authz::RouterRoute::get_polar_class(), authz::ConsoleSession::get_polar_class(), authz::RoleBuiltin::get_polar_class(), - authz::UpdateArtifact::get_polar_class(), authz::UserBuiltin::get_polar_class(), ] .into_iter() diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 5fd16e2633..78a7aeda87 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -397,14 +397,12 @@ mod test { use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::{ - BlockSize, ComponentUpdate, ComponentUpdateIdentity, ConsoleSession, - Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind, - Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard, - SledProvisionState, SledSystemHardware, SledUpdate, SshKey, - SystemUpdate, UpdateableComponentType, VpcSubnet, Zpool, + BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, + PhysicalDisk, PhysicalDiskKind, Project, Rack, Region, Service, + ServiceKind, SiloUser, SledBaseboard, SledProvisionState, + SledSystemHardware, SledUpdate, SshKey, VpcSubnet, Zpool, }; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; - use assert_matches::assert_matches; use chrono::{Duration, Utc}; use futures::stream; use futures::StreamExt; @@ -413,7 +411,7 @@ mod test { use nexus_types::external_api::params; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::{ - self, ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, + ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; use omicron_common::nexus_config::RegionAllocationStrategy; use omicron_test_utils::dev; @@ -1988,109 +1986,4 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } - - /// Expect DB error if we try to insert a system update with an id that - /// already exists. If version matches, update the existing row (currently - /// only time_modified) - #[tokio::test] - async fn test_system_update_conflict() { - let logctx = dev::test_setup_log("test_system_update_conflict"); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = datastore_test(&logctx, &db).await; - - let v1 = external::SemverVersion::new(1, 0, 0); - let update1 = SystemUpdate::new(v1.clone()).unwrap(); - datastore - .upsert_system_update(&opctx, update1.clone()) - .await - .expect("Failed to create system update"); - - // same version, but different ID (generated by constructor). should - // conflict and therefore update time_modified, keeping the old ID - let update2 = SystemUpdate::new(v1).unwrap(); - let updated_update = datastore - .upsert_system_update(&opctx, update2.clone()) - .await - .unwrap(); - assert!(updated_update.identity.id == update1.identity.id); - assert!( - updated_update.identity.time_modified - != update1.identity.time_modified - ); - - // now let's do same ID, but different version. should conflict on the - // ID because it's the PK, but since the version doesn't match an - // existing row, it errors out instead of updating one - let update3 = - SystemUpdate::new(external::SemverVersion::new(2, 0, 0)).unwrap(); - let update3 = SystemUpdate { identity: update1.identity, ..update3 }; - let conflict = - datastore.upsert_system_update(&opctx, update3).await.unwrap_err(); - assert_matches!(conflict, Error::ObjectAlreadyExists { .. }); - - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } - - /// Expect DB error if we try to insert a component update with a (version, - /// component_type) that already exists - #[tokio::test] - async fn test_component_update_conflict() { - let logctx = dev::test_setup_log("test_component_update_conflict"); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = datastore_test(&logctx, &db).await; - - // we need a system update for the component updates to hang off of - let v1 = external::SemverVersion::new(1, 0, 0); - let system_update = SystemUpdate::new(v1.clone()).unwrap(); - datastore - .upsert_system_update(&opctx, system_update.clone()) - .await - .expect("Failed to create system update"); - - // create a component update, that's fine - let cu1 = ComponentUpdate { - identity: ComponentUpdateIdentity::new(Uuid::new_v4()), - component_type: UpdateableComponentType::HubrisForSidecarRot, - version: db::model::SemverVersion::new(1, 0, 0), - }; - datastore - .create_component_update( - &opctx, - system_update.identity.id, - cu1.clone(), - ) - .await - .expect("Failed to create component update"); - - // create a second component update with same version but different - // type, also fine - let cu2 = ComponentUpdate { - identity: ComponentUpdateIdentity::new(Uuid::new_v4()), - component_type: UpdateableComponentType::HubrisForSidecarSp, - version: db::model::SemverVersion::new(1, 0, 0), - }; - datastore - .create_component_update( - &opctx, - system_update.identity.id, - cu2.clone(), - ) - .await - .expect("Failed to create component update"); - - // but same type and version should fail - let cu3 = ComponentUpdate { - identity: ComponentUpdateIdentity::new(Uuid::new_v4()), - ..cu1 - }; - let conflict = datastore - .create_component_update(&opctx, system_update.identity.id, cu3) - .await - .unwrap_err(); - assert_matches!(conflict, Error::ObjectAlreadyExists { .. }); - - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } } diff --git a/nexus/db-queries/src/db/datastore/update.rs b/nexus/db-queries/src/db/datastore/update.rs index 0790bd458e..3725797f83 100644 --- a/nexus/db-queries/src/db/datastore/update.rs +++ b/nexus/db-queries/src/db/datastore/update.rs @@ -4,376 +4,368 @@ //! [`DataStore`] methods related to updates and artifacts. +use std::collections::HashMap; + use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::error::{public_error_from_diesel, ErrorHandler}; -use crate::db::model::{ - ComponentUpdate, SemverVersion, SystemUpdate, UpdateArtifact, - UpdateDeployment, UpdateStatus, UpdateableComponent, -}; -use crate::db::pagination::paginated; +use crate::db::model::SemverVersion; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; -use chrono::Utc; use diesel::prelude::*; -use nexus_db_model::SystemUpdateComponentUpdate; -use nexus_types::identity::Asset; +use diesel::result::Error as DieselError; +use nexus_db_model::{ArtifactHash, TufArtifact, TufRepo, TufRepoDescription}; use omicron_common::api::external::{ - CreateResult, DataPageParams, DeleteResult, InternalContext, ListResultVec, - LookupResult, LookupType, ResourceType, UpdateResult, + self, CreateResult, LookupResult, LookupType, ResourceType, + TufRepoInsertStatus, }; +use swrite::{swrite, SWrite}; use uuid::Uuid; -impl DataStore { - pub async fn update_artifact_upsert( - &self, - opctx: &OpContext, - artifact: UpdateArtifact, - ) -> CreateResult { - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; +/// The return value of [`DataStore::update_tuf_repo_description_insert`]. +/// +/// This is similar to [`external::TufRepoInsertResponse`], but uses +/// nexus-db-model's types instead of external types. +pub struct TufRepoInsertResponse { + pub recorded: TufRepoDescription, + pub status: TufRepoInsertStatus, +} - use db::schema::update_artifact::dsl; - diesel::insert_into(dsl::update_artifact) - .values(artifact.clone()) - .on_conflict((dsl::name, dsl::version, dsl::kind)) - .do_update() - .set(artifact.clone()) - .returning(UpdateArtifact::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) +impl TufRepoInsertResponse { + pub fn into_external(self) -> external::TufRepoInsertResponse { + external::TufRepoInsertResponse { + recorded: self.recorded.into_external(), + status: self.status, + } } +} + +async fn artifacts_for_repo( + repo_id: Uuid, + conn: &async_bb8_diesel::Connection, +) -> Result, DieselError> { + use db::schema::tuf_artifact::dsl as tuf_artifact_dsl; + use db::schema::tuf_repo_artifact::dsl as tuf_repo_artifact_dsl; + + let join_on_dsl = tuf_artifact_dsl::name + .eq(tuf_repo_artifact_dsl::tuf_artifact_name) + .and( + tuf_artifact_dsl::version + .eq(tuf_repo_artifact_dsl::tuf_artifact_version), + ) + .and( + tuf_artifact_dsl::kind.eq(tuf_repo_artifact_dsl::tuf_artifact_kind), + ); + // Don't bother paginating because each repo should only have a few (under + // 20) artifacts. + tuf_repo_artifact_dsl::tuf_repo_artifact + .filter(tuf_repo_artifact_dsl::tuf_repo_id.eq(repo_id)) + .inner_join(tuf_artifact_dsl::tuf_artifact.on(join_on_dsl)) + .select(TufArtifact::as_select()) + .load_async(conn) + .await +} - pub async fn update_artifact_hard_delete_outdated( +impl DataStore { + /// Inserts a new TUF repository into the database. + /// + /// Returns the repository just inserted, or an existing + /// `TufRepoDescription` if one was already found. (This is not an upsert, + /// because if we know about an existing repo but with different contents, + /// we reject that.) + pub async fn update_tuf_repo_insert( &self, opctx: &OpContext, - current_targets_role_version: i64, - ) -> DeleteResult { + description: TufRepoDescription, + ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let log = opctx.log.new( + slog::o!( + "method" => "update_tuf_repo_insert", + "uploaded_system_version" => description.repo.system_version.to_string(), + ), + ); - // We use the `targets_role_version` column in the table to delete any - // old rows, keeping the table in sync with the current copy of - // artifacts.json. - use db::schema::update_artifact::dsl; - diesel::delete(dsl::update_artifact) - .filter(dsl::targets_role_version.lt(current_targets_role_version)) - .execute_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map(|_rows_deleted| ()) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - .internal_context("deleting outdated available artifacts") - } + let err = OptionalError::new(); + let err2 = err.clone(); - pub async fn upsert_system_update( - &self, - opctx: &OpContext, - update: SystemUpdate, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - use db::schema::system_update::dsl::*; - - diesel::insert_into(system_update) - .values(update.clone()) - .on_conflict(version) - .do_update() - // for now the only modifiable field is time_modified, but we intend - // to add more metadata to this model - .set(time_modified.eq(Utc::now())) - .returning(SystemUpdate::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::SystemUpdate, - &update.version.to_string(), - ), + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("update_tuf_repo_insert") + .transaction(&conn, move |conn| { + insert_impl( + log.clone(), + conn, + description.clone(), + err2.clone(), ) }) - } - - // version is unique but not the primary key, so we can't use LookupPath to handle this for us - pub async fn system_update_fetch_by_version( - &self, - opctx: &OpContext, - target: SemverVersion, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::system_update::dsl::*; - - let version_string = target.to_string(); - - system_update - .filter(version.eq(target)) - .select(SystemUpdate::as_select()) - .first_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByLookup( - ResourceType::SystemUpdate, - LookupType::ByCompositeId(version_string), - ), - ) - }) - } - - pub async fn create_component_update( - &self, - opctx: &OpContext, - system_update_id: Uuid, - update: ComponentUpdate, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - // TODO: make sure system update with that ID exists first - // let (.., db_system_update) = LookupPath::new(opctx, &self) - - use db::schema::component_update; - use db::schema::system_update_component_update as join_table; - - let version_string = update.version.to_string(); - - let conn = self.pool_connection_authorized(opctx).await?; - - self.transaction_retry_wrapper("create_component_update") - .transaction(&conn, |conn| { - let update = update.clone(); - async move { - let db_update = - diesel::insert_into(component_update::table) - .values(update.clone()) - .returning(ComponentUpdate::as_returning()) - .get_result_async(&conn) - .await?; - - diesel::insert_into(join_table::table) - .values(SystemUpdateComponentUpdate { - system_update_id, - component_update_id: update.id(), - }) - .returning(SystemUpdateComponentUpdate::as_returning()) - .get_result_async(&conn) - .await?; - - Ok(db_update) + if let Some(err) = err.take() { + err.into() + } else { + public_error_from_diesel(e, ErrorHandler::Server) } }) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::ComponentUpdate, - &version_string, - ), - ) - }) } - pub async fn system_updates_list_by_id( + /// Returns the TUF repo description corresponding to this system version. + pub async fn update_tuf_repo_get( &self, opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + system_version: SemverVersion, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - use db::schema::system_update::dsl::*; + use db::schema::tuf_repo::dsl; - paginated(system_update, id, pagparams) - .select(SystemUpdate::as_select()) - .order(version.desc()) - .load_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn system_update_components_list( - &self, - opctx: &OpContext, - system_update_id: Uuid, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::component_update; - use db::schema::system_update_component_update as join_table; - - component_update::table - .inner_join(join_table::table) - .filter(join_table::columns::system_update_id.eq(system_update_id)) - .select(ComponentUpdate::as_select()) - .get_results_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn create_updateable_component( - &self, - opctx: &OpContext, - component: UpdateableComponent, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - // make sure system version exists - let sys_version = component.system_version.clone(); - self.system_update_fetch_by_version(opctx, sys_version).await?; - - use db::schema::updateable_component::dsl::*; + let conn = self.pool_connection_authorized(opctx).await?; - diesel::insert_into(updateable_component) - .values(component.clone()) - .returning(UpdateableComponent::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) + let repo = dsl::tuf_repo + .filter(dsl::system_version.eq(system_version.clone())) + .select(TufRepo::as_select()) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel( e, - ErrorHandler::Conflict( - ResourceType::UpdateableComponent, - &component.id().to_string(), // TODO: more informative identifier + ErrorHandler::NotFoundByLookup( + ResourceType::TufRepo, + LookupType::ByCompositeId(system_version.to_string()), ), ) - }) - } - - pub async fn updateable_components_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::updateable_component::dsl::*; - - paginated(updateable_component, id, pagparams) - .select(UpdateableComponent::as_select()) - .load_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn lowest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::updateable_component::dsl::*; - - updateable_component - .select(system_version) - .order(system_version.asc()) - .first_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn highest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::updateable_component::dsl::*; - - updateable_component - .select(system_version) - .order(system_version.desc()) - .first_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn create_update_deployment( - &self, - opctx: &OpContext, - deployment: UpdateDeployment, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - use db::schema::update_deployment::dsl::*; + })?; - diesel::insert_into(update_deployment) - .values(deployment.clone()) - .returning(UpdateDeployment::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) + let artifacts = artifacts_for_repo(repo.id, &conn) .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::UpdateDeployment, - &deployment.id().to_string(), - ), - ) - }) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(TufRepoDescription { repo, artifacts }) } +} - pub async fn steady_update_deployment( - &self, - opctx: &OpContext, - deployment_id: Uuid, - ) -> UpdateResult { - // TODO: use authz::UpdateDeployment as the input so we can check Modify - // on that instead - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - use db::schema::update_deployment::dsl::*; - - diesel::update(update_deployment) - .filter(id.eq(deployment_id)) - .set(( - status.eq(UpdateStatus::Steady), - time_modified.eq(diesel::dsl::now), - )) - .returning(UpdateDeployment::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) +// This is a separate method mostly to make rustfmt not bail out on long lines +// of text. +async fn insert_impl( + log: slog::Logger, + conn: async_bb8_diesel::Connection, + desc: TufRepoDescription, + err: OptionalError, +) -> Result { + let repo = { + use db::schema::tuf_repo::dsl; + + // Load the existing repo by the system version, if + // any. + let existing_repo = dsl::tuf_repo + .filter(dsl::system_version.eq(desc.repo.system_version.clone())) + .select(TufRepo::as_select()) + .first_async::(&conn) .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByLookup( - ResourceType::UpdateDeployment, - LookupType::ById(deployment_id), - ), - ) - }) + .optional()?; + + if let Some(existing_repo) = existing_repo { + // It doesn't matter whether the UUID of the repo matches or not, + // since it's uniquely generated. But do check the hash. + if existing_repo.sha256 != desc.repo.sha256 { + return Err(err.bail(InsertError::RepoHashMismatch { + system_version: desc.repo.system_version, + uploaded: desc.repo.sha256, + existing: existing_repo.sha256, + })); + } + + // Just return the existing repo along with all of its artifacts. + let artifacts = artifacts_for_repo(existing_repo.id, &conn).await?; + + let recorded = + TufRepoDescription { repo: existing_repo, artifacts }; + return Ok(TufRepoInsertResponse { + recorded, + status: TufRepoInsertStatus::AlreadyExists, + }); + } + + // This will fail if this ID or system version already exists with a + // different hash, but that's a weird situation that should error out + // anyway (IDs are not user controlled, hashes are). + diesel::insert_into(dsl::tuf_repo) + .values(desc.repo.clone()) + .execute_async(&conn) + .await?; + desc.repo.clone() + }; + + // Since we've inserted a new repo, we also need to insert the + // corresponding artifacts. + let all_artifacts = { + use db::schema::tuf_artifact::dsl; + + // Multiple repos can have the same artifacts, so we shouldn't error + // out if we find an existing artifact. However, we should check that + // the SHA256 hash and length matches if an existing artifact matches. + + let mut filter_dsl = dsl::tuf_artifact.into_boxed(); + for artifact in desc.artifacts.clone() { + filter_dsl = filter_dsl.or_filter( + dsl::name + .eq(artifact.id.name) + .and(dsl::version.eq(artifact.id.version)) + .and(dsl::kind.eq(artifact.id.kind)), + ); + } + + let results = filter_dsl + .select(TufArtifact::as_select()) + .load_async(&conn) + .await?; + debug!( + log, + "found {} existing artifacts", results.len(); + "results" => ?results, + ); + + let results_by_id = results + .iter() + .map(|artifact| (&artifact.id, artifact)) + .collect::>(); + + // uploaded_and_existing contains non-matching artifacts in pairs of + // (uploaded, currently in db). + let mut uploaded_and_existing = Vec::new(); + let mut new_artifacts = Vec::new(); + let mut all_artifacts = Vec::new(); + + for uploaded_artifact in desc.artifacts.clone() { + let Some(&existing_artifact) = + results_by_id.get(&uploaded_artifact.id) + else { + // This is a new artifact. + new_artifacts.push(uploaded_artifact.clone()); + all_artifacts.push(uploaded_artifact); + continue; + }; + + if existing_artifact.sha256 != uploaded_artifact.sha256 + || existing_artifact.artifact_size() + != uploaded_artifact.artifact_size() + { + uploaded_and_existing.push(( + uploaded_artifact.clone(), + existing_artifact.clone(), + )); + } else { + all_artifacts.push(uploaded_artifact); + } + } + + if !uploaded_and_existing.is_empty() { + debug!(log, "uploaded artifacts don't match existing artifacts"; + "uploaded_and_existing" => ?uploaded_and_existing, + ); + return Err(err.bail(InsertError::ArtifactMismatch { + uploaded_and_existing, + })); + } + + debug!( + log, + "inserting {} new artifacts", new_artifacts.len(); + "new_artifacts" => ?new_artifacts, + ); + + // Insert new artifacts into the database. + diesel::insert_into(dsl::tuf_artifact) + .values(new_artifacts) + .execute_async(&conn) + .await?; + all_artifacts + }; + + // Finally, insert all the associations into the tuf_repo_artifact table. + { + use db::schema::tuf_repo_artifact::dsl; + + let mut values = Vec::new(); + for artifact in desc.artifacts.clone() { + slog::debug!( + log, + "inserting artifact into tuf_repo_artifact table"; + "artifact" => %artifact.id, + ); + values.push(( + dsl::tuf_repo_id.eq(desc.repo.id), + dsl::tuf_artifact_name.eq(artifact.id.name), + dsl::tuf_artifact_version.eq(artifact.id.version), + dsl::tuf_artifact_kind.eq(artifact.id.kind), + )); + } + + diesel::insert_into(dsl::tuf_repo_artifact) + .values(values) + .execute_async(&conn) + .await?; } - pub async fn update_deployments_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::update_deployment::dsl::*; - - paginated(update_deployment, id, pagparams) - .select(UpdateDeployment::as_select()) - .load_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } + let recorded = TufRepoDescription { repo, artifacts: all_artifacts }; + Ok(TufRepoInsertResponse { + recorded, + status: TufRepoInsertStatus::Inserted, + }) +} - pub async fn latest_update_deployment( - &self, - opctx: &OpContext, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; +#[derive(Clone, Debug)] +enum InsertError { + /// The SHA256 of the uploaded repository doesn't match the SHA256 of the + /// existing repository with the same system version. + RepoHashMismatch { + system_version: SemverVersion, + uploaded: ArtifactHash, + existing: ArtifactHash, + }, + /// The SHA256 or length of one or more artifacts doesn't match the + /// corresponding entries in the database. + ArtifactMismatch { + // Pairs of (uploaded, existing) artifacts. + uploaded_and_existing: Vec<(TufArtifact, TufArtifact)>, + }, +} - use db::schema::update_deployment::dsl::*; +impl From for external::Error { + fn from(e: InsertError) -> Self { + match e { + InsertError::RepoHashMismatch { + system_version, + uploaded, + existing, + } => external::Error::conflict(format!( + "Uploaded repository with system version {} has SHA256 hash \ + {}, but existing repository has SHA256 hash {}.", + system_version, uploaded, existing, + )), + InsertError::ArtifactMismatch { uploaded_and_existing } => { + // Build a message out of uploaded and existing artifacts. + let mut message = "Uploaded artifacts don't match existing \ + artifacts with same IDs:\n" + .to_string(); + for (uploaded, existing) in uploaded_and_existing { + swrite!( + message, + "- Uploaded artifact {} has SHA256 hash {} and length \ + {}, but existing artifact {} has SHA256 hash {} and \ + length {}.\n", + uploaded.id, + uploaded.sha256, + uploaded.artifact_size(), + existing.id, + existing.sha256, + existing.artifact_size(), + ); + } - update_deployment - .select(UpdateDeployment::as_returning()) - .order(time_created.desc()) - .first_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + external::Error::conflict(message) + } + } } } diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index 028694dc4b..1cf14c5a8f 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -17,7 +17,6 @@ use async_bb8_diesel::AsyncRunQueryDsl; use db_macros::lookup_resource; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use ipnetwork::IpNetwork; -use nexus_db_model::KnownArtifactKind; use nexus_db_model::Name; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; @@ -431,27 +430,27 @@ impl<'a> LookupPath<'a> { ) } + /// Select a resource of type TufRepo, identified by its UUID. + pub fn tuf_repo_id(self, id: Uuid) -> TufRepo<'a> { + TufRepo::PrimaryKey(Root { lookup_root: self }, id) + } + /// Select a resource of type UpdateArtifact, identified by its /// `(name, version, kind)` tuple - pub fn update_artifact_tuple( + pub fn tuf_artifact_tuple( self, - name: &str, + name: impl Into, version: db::model::SemverVersion, - kind: KnownArtifactKind, - ) -> UpdateArtifact<'a> { - UpdateArtifact::PrimaryKey( + kind: impl Into, + ) -> TufArtifact<'a> { + TufArtifact::PrimaryKey( Root { lookup_root: self }, - name.to_string(), + name.into(), version, - kind, + kind.into(), ) } - /// Select a resource of type UpdateDeployment, identified by its id - pub fn update_deployment_id(self, id: Uuid) -> UpdateDeployment<'a> { - UpdateDeployment::PrimaryKey(Root { lookup_root: self }, id) - } - /// Select a resource of type UserBuiltin, identified by its `name` pub fn user_builtin_id<'b>(self, id: Uuid) -> UserBuiltin<'b> where @@ -857,21 +856,10 @@ lookup_resource! { } lookup_resource! { - name = "UpdateArtifact", - ancestors = [], - children = [], - lookup_by_name = false, - soft_deletes = false, - primary_key_columns = [ - { column_name = "name", rust_type = String }, - { column_name = "version", rust_type = db::model::SemverVersion }, - { column_name = "kind", rust_type = KnownArtifactKind } - ] -} - -lookup_resource! { - name = "SystemUpdate", + name = "TufRepo", ancestors = [], + // TODO: should this have TufArtifact as a child? This is a many-many + // relationship. children = [], lookup_by_name = false, soft_deletes = false, @@ -879,12 +867,16 @@ lookup_resource! { } lookup_resource! { - name = "UpdateDeployment", + name = "TufArtifact", ancestors = [], children = [], lookup_by_name = false, soft_deletes = false, - primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] + primary_key_columns = [ + { column_name = "name", rust_type = String }, + { column_name = "version", rust_type = db::model::SemverVersion }, + { column_name = "kind", rust_type = String }, + ] } lookup_resource! { diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index e8ef721e98..2d57274909 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -67,9 +67,6 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "switch_link_fec", "switch_link_speed", "switch_port_geometry", - "update_artifact_kind", - "update_status", - "updateable_component_type", "user_provision_type", "vpc_firewall_rule_action", "vpc_firewall_rule_direction", diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 26cc13fc6a..ee55d775f0 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -950,7 +950,7 @@ resource: Blueprint id "b9e923f6-caf3-4c83-96f9-8ffe8c627dd2" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! -resource: SystemUpdate id "9c86d713-1bc2-4927-9892-ada3eb6f5f62" +resource: TufRepo id "3c52d72f-cbf7-4951-a62f-a4154e74da87" USER Q R LC RP M MP CC D fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ @@ -964,7 +964,7 @@ resource: SystemUpdate id "9c86d713-1bc2-4927-9892-ada3eb6f5f62" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! -resource: UpdateDeployment id "c617a035-7c42-49ff-a36a-5dfeee382832" +resource: TufArtifact id "a v1.0.0 (b)" USER Q R LC RP M MP CC D fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 9d6bf2d22f..f13ea721b8 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -40,9 +40,13 @@ external_dns_servers = [ "1.1.1.1", "9.9.9.9" ] [deployment.dropshot_external] # IP Address and TCP port on which to listen for the external API bind_address = "127.0.0.1:12220" -# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one -# rule is ~500 bytes) -request_body_max_bytes = 1048576 +# Allow large request bodies to support uploading TUF archives. The number here +# is picked based on the typical size for tuf-mupdate.zip as of 2024-01 +# (~1.5GiB) and multiplying it by 2. +# +# This should be brought back down to a more reasonable value once per-endpoint +# request body limits are implemented. +request_body_max_bytes = 3221225472 # To have Nexus's external HTTP endpoint use TLS, uncomment the line below. You # will also need to provide an initial TLS certificate during rack # initialization. If you're using this config file, you're probably running a diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d643969924..d6ad7c98ea 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -140,6 +140,7 @@ pub struct Nexus { timeseries_client: LazyTimeseriesClient, /// Contents of the trusted root role for the TUF repository. + #[allow(dead_code)] updates_config: Option, /// The tunable parameters from a configuration file diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 17e7a17444..38c7861e46 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -151,9 +151,6 @@ impl super::Nexus { }) .collect(); - // internally ignores ObjectAlreadyExists, so will not error on repeat runs - let _ = self.populate_mock_system_updates(&opctx).await?; - let dns_zone = request .internal_dns_zone_config .zones diff --git a/nexus/src/app/update/mod.rs b/nexus/src/app/update/mod.rs index 36d4dbcb9e..d4a47375bc 100644 --- a/nexus/src/app/update/mod.rs +++ b/nexus/src/app/update/mod.rs @@ -4,27 +4,17 @@ //! Software Updates -use chrono::Utc; -use hex; +use bytes::Bytes; +use dropshot::HttpError; +use futures::Stream; +use nexus_db_model::TufRepoDescription; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; -use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; -use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::db::model::KnownArtifactKind; -use nexus_types::external_api::{params, shared}; use omicron_common::api::external::{ - self, CreateResult, DataPageParams, Error, ListResultVec, LookupResult, - PaginationOrder, UpdateResult, + Error, SemverVersion, TufRepoInsertResponse, }; -use omicron_common::api::internal::nexus::UpdateArtifactId; -use rand::Rng; -use ring::digest; -use std::convert::TryFrom; -use std::num::NonZeroU32; -use std::path::Path; -use tokio::io::AsyncWriteExt; -use uuid::Uuid; +use omicron_common::update::ArtifactId; +use update_common::artifacts::ArtifactsWithPlan; mod common_sp_update; mod host_phase1_updater; @@ -47,927 +37,70 @@ pub enum UpdateProgress { Failed(String), } -static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; - impl super::Nexus { - async fn tuf_base_url( + pub(crate) async fn updates_put_repository( &self, opctx: &OpContext, - ) -> Result, Error> { - let rack = self.rack_lookup(opctx, &self.rack_id).await?; - - Ok(self.updates_config.as_ref().map(|c| { - rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) - })) - } - - pub(crate) async fn updates_refresh_metadata( - &self, - opctx: &OpContext, - ) -> Result<(), Error> { + body: impl Stream> + Send + Sync + 'static, + file_name: String, + ) -> Result { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - let updates_config = self.updates_config.as_ref().ok_or_else(|| { - Error::invalid_request("updates system not configured") - })?; - let base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { - Error::invalid_request("updates system not configured") - })?; - let trusted_root = tokio::fs::read(&updates_config.trusted_root) - .await - .map_err(|e| Error::InternalError { - internal_message: format!( - "error trying to read trusted root: {}", - e - ), + // XXX: this needs to validate against the trusted root! + let _updates_config = + self.updates_config.as_ref().ok_or_else(|| { + Error::internal_error("updates system not initialized") })?; - let artifacts = crate::updates::read_artifacts(&trusted_root, base_url) - .await - .map_err(|e| Error::InternalError { - internal_message: format!( - "error trying to refresh updates: {}", - e - ), - })?; - - // FIXME: if we hit an error in any of these database calls, the - // available artifact table will be out of sync with the current - // artifacts.json. can we do a transaction or something? + let artifacts_with_plan = + ArtifactsWithPlan::from_stream(body, Some(file_name), &self.log) + .await + .map_err(|error| error.to_http_error())?; - let mut current_version = None; - for artifact in &artifacts { - current_version = Some(artifact.targets_role_version); - self.db_datastore - .update_artifact_upsert(&opctx, artifact.clone()) - .await?; - } - - // ensure table is in sync with current copy of artifacts.json - if let Some(current_version) = current_version { - self.db_datastore - .update_artifact_hard_delete_outdated(&opctx, current_version) - .await?; - } - - // demo-grade update logic: tell all sleds to apply all artifacts - for sled in self - .db_datastore - .sled_list( - &opctx, - &DataPageParams { - marker: None, - direction: PaginationOrder::Ascending, - limit: NonZeroU32::new(100).unwrap(), - }, - ) - .await? - { - let client = self.sled_client(&sled.id()).await?; - for artifact in &artifacts { - info!( - self.log, - "telling sled {} to apply {}", - sled.id(), - artifact.target_name - ); - client - .update_artifact( - &sled_agent_client::types::UpdateArtifactId { - name: artifact.name.clone(), - version: artifact.version.0.clone().into(), - kind: artifact.kind.0.into(), - }, - ) - .await?; - } - } - - Ok(()) - } - - /// Downloads a file from within [`BASE_ARTIFACT_DIR`]. - pub(crate) async fn download_artifact( - &self, - opctx: &OpContext, - artifact: UpdateArtifactId, - ) -> Result, Error> { - let mut base_url = - self.tuf_base_url(opctx).await?.ok_or_else(|| { - Error::invalid_request("updates system not configured") - })?; - if !base_url.ends_with('/') { - base_url.push('/'); - } - - // We cache the artifact based on its checksum, so fetch that from the - // database. - let (.., artifact_entry) = LookupPath::new(opctx, &self.db_datastore) - .update_artifact_tuple( - &artifact.name, - db::model::SemverVersion(artifact.version.clone()), - KnownArtifactKind(artifact.kind), - ) - .fetch() - .await?; - let filename = format!( - "{}.{}.{}-{}", - artifact_entry.target_sha256, - artifact.kind, - artifact.name, - artifact.version + // Now store the artifacts in the database. + let tuf_repo_description = TufRepoDescription::from_external( + artifacts_with_plan.description().clone(), ); - let path = Path::new(BASE_ARTIFACT_DIR).join(&filename); - - if !path.exists() { - // If the artifact doesn't exist, we should download it. - // - // TODO: There also exists the question of "when should we *remove* - // things from BASE_ARTIFACT_DIR", which we should also resolve. - // Demo-quality solution could be "destroy it on boot" or something? - // (we aren't doing that yet). - info!(self.log, "Accessing {} - needs to be downloaded", filename); - tokio::fs::create_dir_all(BASE_ARTIFACT_DIR).await.map_err( - |e| { - Error::internal_error(&format!( - "Failed to create artifacts directory: {}", - e - )) - }, - )?; - - let mut response = reqwest::get(format!( - "{}targets/{}.{}", - base_url, - artifact_entry.target_sha256, - artifact_entry.target_name - )) - .await - .map_err(|e| { - Error::internal_error(&format!( - "Failed to fetch artifact: {}", - e - )) - })?; - // To ensure another request isn't trying to use this target while we're downloading it - // or before we've verified it, write to a random path in the same directory, then move - // it to the correct path after verification. - let temp_path = path.with_file_name(format!( - ".{}.{:x}", - filename, - rand::thread_rng().gen::() - )); - let mut file = - tokio::fs::File::create(&temp_path).await.map_err(|e| { - Error::internal_error(&format!( - "Failed to create file: {}", - e - )) - })?; - - let mut context = digest::Context::new(&digest::SHA256); - let mut length: i64 = 0; - while let Some(chunk) = response.chunk().await.map_err(|e| { - Error::internal_error(&format!( - "Failed to read HTTP body: {}", - e - )) - })? { - file.write_all(&chunk).await.map_err(|e| { - Error::internal_error(&format!( - "Failed to write to file: {}", - e - )) - })?; - context.update(&chunk); - length += i64::try_from(chunk.len()).unwrap(); - - if length > artifact_entry.target_length { - return Err(Error::internal_error(&format!( - "target {} is larger than expected", - artifact_entry.target_name - ))); - } - } - drop(file); - - if hex::encode(context.finish()) == artifact_entry.target_sha256 - && length == artifact_entry.target_length - { - tokio::fs::rename(temp_path, &path).await.map_err(|e| { - Error::internal_error(&format!( - "Failed to rename file after verification: {}", - e - )) - })? - } else { - return Err(Error::internal_error(&format!( - "failed to verify target {}", - artifact_entry.target_name - ))); - } - - info!( - self.log, - "wrote {} to artifact dir", artifact_entry.target_name - ); - } else { - info!(self.log, "Accessing {} - already exists", path.display()); - } - - // TODO: These artifacts could be quite large - we should figure out how to - // stream this file back instead of holding it entirely in-memory in a - // Vec. - // - // Options: - // - RFC 7233 - "Range Requests" (is this HTTP/1.1 only?) - // https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests - // - "Roll our own". See: - // https://stackoverflow.com/questions/20969331/standard-method-for-http-partial-upload-resume-upload - let body = tokio::fs::read(&path).await.map_err(|e| { - Error::internal_error(&format!( - "Cannot read artifact from filesystem: {}", - e - )) - })?; - Ok(body) - } - - pub async fn upsert_system_update( - &self, - opctx: &OpContext, - create_update: params::SystemUpdateCreate, - ) -> CreateResult { - let update = db::model::SystemUpdate::new(create_update.version)?; - self.db_datastore.upsert_system_update(opctx, update).await - } - - pub async fn create_component_update( - &self, - opctx: &OpContext, - create_update: params::ComponentUpdateCreate, - ) -> CreateResult { - let now = Utc::now(); - let update = db::model::ComponentUpdate { - identity: db::model::ComponentUpdateIdentity { - id: Uuid::new_v4(), - time_created: now, - time_modified: now, - }, - version: db::model::SemverVersion(create_update.version), - component_type: create_update.component_type.into(), - }; - - self.db_datastore - .create_component_update( - opctx, - create_update.system_update_id, - update, - ) - .await - } - - pub(crate) async fn system_update_fetch_by_version( - &self, - opctx: &OpContext, - version: &external::SemverVersion, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - self.db_datastore - .system_update_fetch_by_version(opctx, version.clone().into()) + let response = self + .db_datastore + .update_tuf_repo_insert(opctx, tuf_repo_description) .await + .map_err(HttpError::from)?; + Ok(response.into_external()) } - pub(crate) async fn system_updates_list_by_id( + pub(crate) async fn updates_get_repository( &self, opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - self.db_datastore.system_updates_list_by_id(opctx, pagparams).await - } + system_version: SemverVersion, + ) -> Result { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - pub(crate) async fn system_update_list_components( - &self, - opctx: &OpContext, - version: &external::SemverVersion, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let _updates_config = + self.updates_config.as_ref().ok_or_else(|| { + Error::internal_error("updates system not initialized") + })?; - let system_update = self + let tuf_repo_description = self .db_datastore - .system_update_fetch_by_version(opctx, version.clone().into()) - .await?; - - self.db_datastore - .system_update_components_list(opctx, system_update.id()) + .update_tuf_repo_get(opctx, system_version.into()) .await - } - - pub async fn create_updateable_component( - &self, - opctx: &OpContext, - create_component: params::UpdateableComponentCreate, - ) -> CreateResult { - let component = - db::model::UpdateableComponent::try_from(create_component)?; - self.db_datastore.create_updateable_component(opctx, component).await - } - - pub(crate) async fn updateable_components_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - self.db_datastore - .updateable_components_list_by_id(opctx, pagparams) - .await - } - - pub(crate) async fn create_update_deployment( - &self, - opctx: &OpContext, - start: params::SystemUpdateStart, - ) -> CreateResult { - // 404 if specified version doesn't exist - // TODO: is 404 the right error for starting an update with a nonexistent version? - self.system_update_fetch_by_version(opctx, &start.version).await?; - - // We only need to look at the latest deployment because it's the only - // one that could be running - - let latest_deployment = self.latest_update_deployment(opctx).await; - if let Ok(dep) = latest_deployment { - if dep.status == db::model::UpdateStatus::Updating { - // TODO: should "already updating" conflict be a new kind of error? - return Err(Error::ObjectAlreadyExists { - type_name: external::ResourceType::UpdateDeployment, - object_name: dep.id().to_string(), - }); - } - } - - let deployment = db::model::UpdateDeployment { - identity: db::model::UpdateDeploymentIdentity::new(Uuid::new_v4()), - version: db::model::SemverVersion(start.version), - status: db::model::UpdateStatus::Updating, - }; - self.db_datastore.create_update_deployment(opctx, deployment).await - } - - /// If there's a running update, change it to steady. Otherwise do nothing. - // TODO: codify the state machine around update deployments - pub(crate) async fn steady_update_deployment( - &self, - opctx: &OpContext, - ) -> UpdateResult { - let latest = self.latest_update_deployment(opctx).await?; - // already steady. do nothing in order to avoid updating `time_modified` - if latest.status == db::model::UpdateStatus::Steady { - return Ok(latest); - } - - self.db_datastore.steady_update_deployment(opctx, latest.id()).await - } - - pub(crate) async fn update_deployments_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - self.db_datastore.update_deployments_list_by_id(opctx, pagparams).await - } + .map_err(HttpError::from)?; - pub(crate) async fn update_deployment_fetch_by_id( - &self, - opctx: &OpContext, - deployment_id: &Uuid, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - let (.., db_deployment) = LookupPath::new(opctx, &self.db_datastore) - .update_deployment_id(*deployment_id) - .fetch() - .await?; - Ok(db_deployment) + Ok(tuf_repo_description) } - pub(crate) async fn latest_update_deployment( + /// Downloads a file (currently not implemented). + pub(crate) async fn updates_download_artifact( &self, - opctx: &OpContext, - ) -> LookupResult { - self.db_datastore.latest_update_deployment(opctx).await - } - - pub(crate) async fn lowest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - self.db_datastore.lowest_component_system_version(opctx).await - } - - pub(crate) async fn highest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - self.db_datastore.highest_component_system_version(opctx).await - } - - /// Inner function makes it easier to implement the logic where we ignore - /// ObjectAlreadyExists errors but let the others pass through - async fn populate_mock_system_updates_inner( - &self, - opctx: &OpContext, - ) -> CreateResult<()> { - let types = vec![ - shared::UpdateableComponentType::HubrisForPscRot, - shared::UpdateableComponentType::HubrisForPscSp, - shared::UpdateableComponentType::HubrisForSidecarRot, - shared::UpdateableComponentType::HubrisForSidecarSp, - shared::UpdateableComponentType::HubrisForGimletRot, - shared::UpdateableComponentType::HubrisForGimletSp, - shared::UpdateableComponentType::HeliosHostPhase1, - shared::UpdateableComponentType::HeliosHostPhase2, - shared::UpdateableComponentType::HostOmicron, - ]; - - // create system updates and associated component updates - for v in [1, 2, 3] { - let version = external::SemverVersion::new(v, 0, 0); - let su = self - .upsert_system_update( - opctx, - params::SystemUpdateCreate { version: version.clone() }, - ) - .await?; - - for component_type in types.clone() { - self.create_component_update( - &opctx, - params::ComponentUpdateCreate { - version: external::SemverVersion::new(1, v, 0), - system_update_id: su.identity.id, - component_type, - }, - ) - .await?; - } - } - - // create deployment for v1.0.0, stop it, then create one for v2.0.0. - // This makes plausible the state of the components: all v1 except for one v2 - self.create_update_deployment( - &opctx, - params::SystemUpdateStart { - version: external::SemverVersion::new(1, 0, 0), - }, - ) - .await?; - self.steady_update_deployment(opctx).await?; - - self.create_update_deployment( - &opctx, - params::SystemUpdateStart { - version: external::SemverVersion::new(2, 0, 0), - }, - ) - .await?; - - // now create components, with one component on a different system - // version from the others - - for (i, component_type) in types.iter().enumerate() { - let version = if i == 0 { - external::SemverVersion::new(1, 2, 0) - } else { - external::SemverVersion::new(1, 1, 0) - }; - - let system_version = if i == 0 { - external::SemverVersion::new(2, 0, 0) - } else { - external::SemverVersion::new(1, 0, 0) - }; - - self.create_updateable_component( - opctx, - params::UpdateableComponentCreate { - version, - system_version, - device_id: "a-device".to_string(), - component_type: component_type.clone(), - }, - ) - .await?; - } - - Ok(()) - } - - /// Populate the DB with update-related data. Data is hard-coded until we - /// figure out how to pull it from the TUF repo. - /// - /// We need this to be idempotent because it can be called arbitrarily many - /// times. The service functions we call to create these resources will - /// error on ID or version conflicts, so to remain idempotent we can simply - /// ignore those errors. We let other errors through. - pub(crate) async fn populate_mock_system_updates( - &self, - opctx: &OpContext, - ) -> CreateResult<()> { - self.populate_mock_system_updates_inner(opctx).await.or_else(|error| { - match error { - // ignore ObjectAlreadyExists but pass through other errors - external::Error::ObjectAlreadyExists { .. } => Ok(()), - _ => Err(error), - } - }) - } -} - -// TODO: convert system update tests to integration tests now that I know how to -// call nexus functions in those - -#[cfg(test)] -mod tests { - use assert_matches::assert_matches; - use std::num::NonZeroU32; - - use dropshot::PaginationOrder; - use nexus_db_queries::context::OpContext; - use nexus_db_queries::db::model::UpdateStatus; - use nexus_test_utils_macros::nexus_test; - use nexus_types::external_api::{ - params::{ - ComponentUpdateCreate, SystemUpdateCreate, SystemUpdateStart, - UpdateableComponentCreate, - }, - shared::UpdateableComponentType, - }; - use omicron_common::api::external::{self, DataPageParams}; - use uuid::Uuid; - - type ControlPlaneTestContext = - nexus_test_utils::ControlPlaneTestContext; - - pub fn test_opctx(cptestctx: &ControlPlaneTestContext) -> OpContext { - OpContext::for_tests( - cptestctx.logctx.log.new(o!()), - cptestctx.server.apictx.nexus.datastore().clone(), - ) - } - - pub fn test_pagparams() -> DataPageParams<'static, Uuid> { - DataPageParams { - marker: None, - direction: PaginationOrder::Ascending, - limit: NonZeroU32::new(100).unwrap(), - } - } - - #[nexus_test(server = crate::Server)] - async fn test_system_updates(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out with 3 populated - let system_updates = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(system_updates.len(), 3); - - let su1_create = SystemUpdateCreate { - version: external::SemverVersion::new(5, 0, 0), - }; - let su1 = nexus.upsert_system_update(&opctx, su1_create).await.unwrap(); - - // weird order is deliberate - let su3_create = SystemUpdateCreate { - version: external::SemverVersion::new(10, 0, 0), - }; - nexus.upsert_system_update(&opctx, su3_create).await.unwrap(); - - let su2_create = SystemUpdateCreate { - version: external::SemverVersion::new(0, 7, 0), - }; - let su2 = nexus.upsert_system_update(&opctx, su2_create).await.unwrap(); - - // now there should be a bunch of system updates, sorted by version descending - let versions: Vec = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap() - .iter() - .map(|su| su.version.to_string()) - .collect(); - - assert_eq!(versions.len(), 6); - assert_eq!(versions[0], "10.0.0".to_string()); - assert_eq!(versions[1], "5.0.0".to_string()); - assert_eq!(versions[2], "3.0.0".to_string()); - assert_eq!(versions[3], "2.0.0".to_string()); - assert_eq!(versions[4], "1.0.0".to_string()); - assert_eq!(versions[5], "0.7.0".to_string()); - - // let's also make sure we can fetch by version - let su1_fetched = nexus - .system_update_fetch_by_version(&opctx, &su1.version) - .await - .unwrap(); - assert_eq!(su1.identity.id, su1_fetched.identity.id); - - // now create two component updates for update 1, one at root, and one - // hanging off the first - nexus - .create_component_update( - &opctx, - ComponentUpdateCreate { - version: external::SemverVersion::new(1, 0, 0), - component_type: UpdateableComponentType::BootloaderForRot, - system_update_id: su1.identity.id, - }, - ) - .await - .expect("Failed to create component update"); - nexus - .create_component_update( - &opctx, - ComponentUpdateCreate { - version: external::SemverVersion::new(2, 0, 0), - component_type: UpdateableComponentType::HubrisForGimletSp, - system_update_id: su1.identity.id, - }, - ) - .await - .expect("Failed to create component update"); - - // now there should be two component updates - let cus_for_su1 = nexus - .system_update_list_components(&opctx, &su1.version) - .await - .unwrap(); - - assert_eq!(cus_for_su1.len(), 2); - - // other system update should not be associated with any component updates - let cus_for_su2 = nexus - .system_update_list_components(&opctx, &su2.version) - .await - .unwrap(); - - assert_eq!(cus_for_su2.len(), 0); - } - - #[nexus_test(server = crate::Server)] - async fn test_semver_max(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - let expected = "Invalid Value: version, Major, minor, and patch version must be less than 99999999"; - - // major, minor, and patch are all capped - - let su_create = SystemUpdateCreate { - version: external::SemverVersion::new(100000000, 0, 0), - }; - let error = - nexus.upsert_system_update(&opctx, su_create).await.unwrap_err(); - assert!(error.to_string().contains(expected)); - - let su_create = SystemUpdateCreate { - version: external::SemverVersion::new(0, 100000000, 0), - }; - let error = - nexus.upsert_system_update(&opctx, su_create).await.unwrap_err(); - assert!(error.to_string().contains(expected)); - - let su_create = SystemUpdateCreate { - version: external::SemverVersion::new(0, 0, 100000000), - }; - let error = - nexus.upsert_system_update(&opctx, su_create).await.unwrap_err(); - assert!(error.to_string().contains(expected)); - } - - #[nexus_test(server = crate::Server)] - async fn test_updateable_components(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out populated - let components = nexus - .updateable_components_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(components.len(), 9); - - // with no components these should both 500. as discussed in the - // implementation, this is appropriate because we should never be - // running the external API without components populated - // - // let low = - // nexus.lowest_component_system_version(&opctx).await.unwrap_err(); - // assert_matches!(low, external::Error::InternalError { .. }); - // let high = - // nexus.highest_component_system_version(&opctx).await.unwrap_err(); - // assert_matches!(high, external::Error::InternalError { .. }); - - // creating a component if its system_version doesn't exist is a 404 - let uc_create = UpdateableComponentCreate { - version: external::SemverVersion::new(0, 4, 1), - system_version: external::SemverVersion::new(0, 2, 0), - component_type: UpdateableComponentType::BootloaderForSp, - device_id: "look-a-device".to_string(), - }; - let uc_404 = nexus - .create_updateable_component(&opctx, uc_create.clone()) - .await - .unwrap_err(); - assert_matches!(uc_404, external::Error::ObjectNotFound { .. }); - - // create system updates for the component updates to hang off of - let v020 = external::SemverVersion::new(0, 2, 0); - nexus - .upsert_system_update(&opctx, SystemUpdateCreate { version: v020 }) - .await - .expect("Failed to create system update"); - let v3 = external::SemverVersion::new(4, 0, 0); - nexus - .upsert_system_update(&opctx, SystemUpdateCreate { version: v3 }) - .await - .expect("Failed to create system update"); - let v10 = external::SemverVersion::new(10, 0, 0); - nexus - .upsert_system_update(&opctx, SystemUpdateCreate { version: v10 }) - .await - .expect("Failed to create system update"); - - // now uc_create and friends will work - nexus - .create_updateable_component(&opctx, uc_create) - .await - .expect("failed to create updateable component"); - nexus - .create_updateable_component( - &opctx, - UpdateableComponentCreate { - version: external::SemverVersion::new(0, 4, 1), - system_version: external::SemverVersion::new(3, 0, 0), - component_type: UpdateableComponentType::HeliosHostPhase2, - device_id: "another-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); - nexus - .create_updateable_component( - &opctx, - UpdateableComponentCreate { - version: external::SemverVersion::new(0, 4, 1), - system_version: external::SemverVersion::new(10, 0, 0), - component_type: UpdateableComponentType::HeliosHostPhase1, - device_id: "a-third-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); - - // now there should be 3 more, or 12 - let components = nexus - .updateable_components_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(components.len(), 12); - - let low = nexus.lowest_component_system_version(&opctx).await.unwrap(); - assert_eq!(&low.to_string(), "0.2.0"); - let high = - nexus.highest_component_system_version(&opctx).await.unwrap(); - assert_eq!(&high.to_string(), "10.0.0"); - - // TODO: update the version of a component - } - - #[nexus_test(server = crate::Server)] - async fn test_update_deployments(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out with one populated - let deployments = nexus - .update_deployments_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(deployments.len(), 2); - - // start update fails with nonexistent version - let not_found = nexus - .create_update_deployment( - &opctx, - SystemUpdateStart { - version: external::SemverVersion::new(6, 0, 0), - }, - ) - .await - .unwrap_err(); - - assert_matches!(not_found, external::Error::ObjectNotFound { .. }); - - // starting with existing version fails because there's already an - // update running - let start_v3 = SystemUpdateStart { - version: external::SemverVersion::new(3, 0, 0), - }; - let already_updating = nexus - .create_update_deployment(&opctx, start_v3.clone()) - .await - .unwrap_err(); - - assert_matches!( - already_updating, - external::Error::ObjectAlreadyExists { .. } - ); - - // stop the running update - nexus - .steady_update_deployment(&opctx) - .await - .expect("Failed to stop running update"); - - // now starting an update succeeds - let d = nexus - .create_update_deployment(&opctx, start_v3) - .await - .expect("Failed to create deployment"); - - let deployment_ids: Vec = nexus - .update_deployments_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap() - .into_iter() - .map(|d| d.identity.id) - .collect(); - - assert_eq!(deployment_ids.len(), 3); - assert!(deployment_ids.contains(&d.identity.id)); - - // latest deployment returns the one just created - let latest_deployment = - nexus.latest_update_deployment(&opctx).await.unwrap(); - - assert_eq!(latest_deployment.identity.id, d.identity.id); - assert_eq!(latest_deployment.status, UpdateStatus::Updating); - assert!( - latest_deployment.identity.time_modified - == d.identity.time_modified - ); - - // stopping update updates both its status and its time_modified - nexus - .steady_update_deployment(&opctx) - .await - .expect("Failed to steady running update"); - - let latest_deployment = - nexus.latest_update_deployment(&opctx).await.unwrap(); - - assert_eq!(latest_deployment.identity.id, d.identity.id); - assert_eq!(latest_deployment.status, UpdateStatus::Steady); - assert!( - latest_deployment.identity.time_modified > d.identity.time_modified - ); - } - - #[nexus_test(server = crate::Server)] - async fn test_populate_mock_system_updates( - cptestctx: &ControlPlaneTestContext, - ) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out with updates because they're populated at rack init - let su_count = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap() - .len(); - assert!(su_count > 0); - - // additional call doesn't error because the conflict gets eaten - let result = nexus.populate_mock_system_updates(&opctx).await; - assert!(result.is_ok()); - - // count didn't change - let system_updates = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - assert_eq!(system_updates.len(), su_count); + _opctx: &OpContext, + _artifact: ArtifactId, + ) -> Result, Error> { + // TODO: this is part of the TUF repo depot. + return Err(Error::internal_error( + "artifact download not implemented, \ + will be part of TUF repo depot", + )); } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a6cb9e80fe..3c3c40d026 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -8,14 +8,13 @@ use super::{ console_api, device_auth, params, views::{ self, Certificate, Group, IdentityProvider, Image, IpPool, IpPoolRange, - PhysicalDisk, Project, Rack, Role, Silo, SiloUtilization, Sled, - Snapshot, SshKey, User, UserBuiltin, Vpc, VpcRouter, VpcSubnet, + PhysicalDisk, Project, Rack, Role, Silo, SiloQuotas, SiloUtilization, + Sled, Snapshot, SshKey, User, UserBuiltin, Utilization, Vpc, VpcRouter, + VpcSubnet, }, }; use crate::external_api::shared; use crate::ServerContext; -use chrono::Utc; -use dropshot::ApiDescription; use dropshot::EmptyScanParams; use dropshot::HttpError; use dropshot::HttpResponseAccepted; @@ -34,6 +33,7 @@ use dropshot::WhichPage; use dropshot::{ channel, endpoint, WebsocketChannelResult, WebsocketConnection, }; +use dropshot::{ApiDescription, StreamingBody}; use ipnetwork::IpNetwork; use nexus_db_queries::authz; use nexus_db_queries::db; @@ -41,9 +41,6 @@ use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; -use nexus_types::external_api::views::SiloQuotas; -use nexus_types::external_api::views::Utilization; -use nexus_types::identity::AssetIdentityMetadata; use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; @@ -76,6 +73,8 @@ use omicron_common::api::external::RouterRouteKind; use omicron_common::api::external::SwitchPort; use omicron_common::api::external::SwitchPortSettings; use omicron_common::api::external::SwitchPortSettingsView; +use omicron_common::api::external::TufRepoGetResponse; +use omicron_common::api::external::TufRepoInsertResponse; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::external::VpcFirewallRules; use omicron_common::bail_unless; @@ -309,16 +308,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(system_metric)?; api.register(silo_metric)?; - api.register(system_update_refresh)?; - api.register(system_version)?; - api.register(system_component_version_list)?; - api.register(system_update_list)?; - api.register(system_update_view)?; - api.register(system_update_start)?; - api.register(system_update_stop)?; - api.register(system_update_components_list)?; - api.register(update_deployments_list)?; - api.register(update_deployment_view)?; + api.register(system_update_put_repository)?; + api.register(system_update_get_repository)?; api.register(user_list)?; api.register(silo_user_list)?; @@ -433,12 +424,6 @@ async fn system_policy_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Path parameters for `/by-id/` endpoints -#[derive(Deserialize, JsonSchema)] -struct ByIdPathParams { - id: Uuid, -} - /// Update the top-level IAM policy #[endpoint { method = PUT, @@ -5376,320 +5361,56 @@ async fn silo_metric( // Updates -/// Refresh update data -#[endpoint { - method = POST, - path = "/v1/system/update/refresh", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_refresh( - rqctx: RequestContext>, -) -> Result { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.updates_refresh_metadata(&opctx).await?; - Ok(HttpResponseUpdatedNoContent()) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View system version and update status -#[endpoint { - method = GET, - path = "/v1/system/update/version", - tags = ["system/update"], - unpublished = true, -}] -async fn system_version( - rqctx: RequestContext>, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - // The only way we have no latest deployment is if the rack was just set - // up and no system updates have ever been run. In this case there is no - // update running, so we can fall back to steady. - let status = nexus - .latest_update_deployment(&opctx) - .await - .map_or(views::UpdateStatus::Steady, |d| d.status.into()); - - // Updateable components, however, are populated at rack setup before - // the external API is even started, so if we get here and there are no - // components, that's a real issue and the 500 we throw is appropriate. - let low = nexus.lowest_component_system_version(&opctx).await?.into(); - let high = nexus.highest_component_system_version(&opctx).await?.into(); - - Ok(HttpResponseOk(views::SystemVersion { - version_range: views::VersionRange { low, high }, - status, - })) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View version and update status of component tree -#[endpoint { - method = GET, - path = "/v1/system/update/components", - tags = ["system/update"], - unpublished = true, -}] -async fn system_component_version_list( - rqctx: RequestContext>, - query_params: Query, -) -> Result>, HttpError> -{ - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let components = nexus - .updateable_components_list_by_id(&opctx, &pagparams) - .await? - .into_iter() - .map(|u| u.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - components, - &|_, u: &views::UpdateableComponent| u.identity.id, - )?)) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// List all updates -#[endpoint { - method = GET, - path = "/v1/system/update/updates", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_list( - rqctx: RequestContext>, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let updates = nexus - .system_updates_list_by_id(&opctx, &pagparams) - .await? - .into_iter() - .map(|u| u.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - updates, - &|_, u: &views::SystemUpdate| u.identity.id, - )?)) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View system update -#[endpoint { - method = GET, - path = "/v1/system/update/updates/{version}", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_view( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let path = path_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let system_update = - nexus.system_update_fetch_by_version(&opctx, &path.version).await?; - Ok(HttpResponseOk(system_update.into())) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View system update component tree +/// Upload a TUF repository #[endpoint { - method = GET, - path = "/v1/system/update/updates/{version}/components", + method = PUT, + path = "/v1/system/update/repository", tags = ["system/update"], unpublished = true, }] -async fn system_update_components_list( +async fn system_update_put_repository( rqctx: RequestContext>, - path_params: Path, -) -> Result>, HttpError> { + query: Query, + body: StreamingBody, +) -> Result, HttpError> { let apictx = rqctx.context(); let nexus = &apictx.nexus; - let path = path_params.into_inner(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let components = nexus - .system_update_list_components(&opctx, &path.version) - .await? - .into_iter() - .map(|i| i.into()) - .collect(); - Ok(HttpResponseOk(ResultsPage { items: components, next_page: None })) + let query = query.into_inner(); + let body = body.into_stream(); + let update = + nexus.updates_put_repository(&opctx, body, query.file_name).await?; + Ok(HttpResponseOk(update)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Start system update +/// Get the description of a repository by system version. #[endpoint { - method = POST, - path = "/v1/system/update/start", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_start( - rqctx: RequestContext>, - // The use of the request body here instead of a path param is deliberate. - // Unlike instance start (which uses a path param), update start is about - // modifying the state of the system rather than the state of the resource - // (instance there, system update here) identified by the param. This - // approach also gives us symmetry with the /stop endpoint. - update: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let _nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - - // inverse situation to stop: we only want to actually start an update - // if there isn't one already in progress. - - // 1. check that there is no update in progress - // a. if there is one, this should probably 409 - // 2. kick off the update start saga, which - // a. tells the update system to get going - // b. creates an update deployment - - // similar question for stop: do we return the deployment directly, or a - // special StartUpdateResult that includes a deployment ID iff an update - // was actually started - - Ok(HttpResponseAccepted(views::UpdateDeployment { - identity: AssetIdentityMetadata { - id: Uuid::new_v4(), - time_created: Utc::now(), - time_modified: Utc::now(), - }, - version: update.into_inner().version, - status: views::UpdateStatus::Updating, - })) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// Stop system update -/// -/// If there is no update in progress, do nothing. -#[endpoint { - method = POST, - path = "/v1/system/update/stop", + method = GET, + path = "/v1/system/update/repository/{system_version}", tags = ["system/update"], unpublished = true, }] -async fn system_update_stop( - rqctx: RequestContext>, -) -> Result { - let apictx = rqctx.context(); - let _nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - - // TODO: Implement stopping an update. Should probably be a saga. - - // Ask update subsystem if it's doing anything. If so, tell it to stop. - // This could be done in a single call to the updater if the latter can - // respond to a stop command differently depending on whether it did - // anything or not. - - // If we did in fact stop a running update, update the status on the - // latest update deployment in the DB to `stopped` and respond with that - // deployment. If we do nothing, what should we return? Maybe instead of - // responding with the deployment, this endpoint gets its own - // `StopUpdateResult` response view that says whether it was a noop, and - // if it wasn't, includes the ID of the stopped deployment, which allows - // the client to fetch it if it actually wants it. - - Ok(HttpResponseUpdatedNoContent()) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// List all update deployments -#[endpoint { - method = GET, - path = "/v1/system/update/deployments", - tags = ["system/update"], - unpublished = true, -}] -async fn update_deployments_list( +async fn system_update_get_repository( rqctx: RequestContext>, - query_params: Query, -) -> Result>, HttpError> { + path_params: Path, +) -> Result, HttpError> { let apictx = rqctx.context(); let nexus = &apictx.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let updates = nexus - .update_deployments_list_by_id(&opctx, &pagparams) - .await? - .into_iter() - .map(|u| u.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - updates, - &|_, u: &views::UpdateDeployment| u.identity.id, - )?)) + let params = path_params.into_inner(); + let description = + nexus.updates_get_repository(&opctx, params.system_version).await?; + Ok(HttpResponseOk(TufRepoGetResponse { + description: description.into_external(), + })) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Fetch a system update deployment -#[endpoint { - method = GET, - path = "/v1/system/update/deployments/{id}", - tags = ["system/update"], - unpublished = true, -}] -async fn update_deployment_view( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let path = path_params.into_inner(); - let id = &path.id; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let deployment = - nexus.update_deployment_fetch_by_id(&opctx, id).await?; - Ok(HttpResponseOk(deployment.into())) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} // Silo users /// List users diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 63578e360a..58038cb37a 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -40,7 +40,7 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::SledInstanceState; -use omicron_common::api::internal::nexus::UpdateArtifactId; +use omicron_common::update::ArtifactId; use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; @@ -438,15 +438,16 @@ async fn cpapi_metrics_collect( }] async fn cpapi_artifact_download( request_context: RequestContext>, - path_params: Path, + path_params: Path, ) -> Result, HttpError> { let context = request_context.context(); let nexus = &context.nexus; let opctx = crate::context::op_context_for_internal_api(&request_context).await; // TODO: return 404 if the error we get here says that the record isn't found - let body = - nexus.download_artifact(&opctx, path_params.into_inner()).await?; + let body = nexus + .updates_download_artifact(&opctx, path_params.into_inner()) + .await?; Ok(HttpResponseOk(Body::from(body).into())) } diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 01aca36e1d..e1392440a1 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -20,7 +20,6 @@ pub mod external_api; // Public for testing mod internal_api; mod populate; mod saga_interface; -mod updates; // public for testing pub use app::test_interfaces::TestInterfaces; pub use app::Nexus; diff --git a/nexus/src/updates.rs b/nexus/src/updates.rs deleted file mode 100644 index 2f57868acc..0000000000 --- a/nexus/src/updates.rs +++ /dev/null @@ -1,74 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use buf_list::BufList; -use futures::TryStreamExt; -use nexus_db_queries::db; -use omicron_common::update::ArtifactsDocument; -use std::convert::TryInto; - -pub(crate) async fn read_artifacts( - trusted_root: &[u8], - mut base_url: String, -) -> Result< - Vec, - Box, -> { - if !base_url.ends_with('/') { - base_url.push('/'); - } - - let repository = tough::RepositoryLoader::new( - &trusted_root, - format!("{}metadata/", base_url).parse()?, - format!("{}targets/", base_url).parse()?, - ) - .load() - .await?; - - let artifact_document = - match repository.read_target(&"artifacts.json".parse()?).await? { - Some(target) => target.try_collect::().await?, - None => return Err("artifacts.json missing".into()), - }; - let artifacts: ArtifactsDocument = - serde_json::from_reader(buf_list::Cursor::new(&artifact_document))?; - - let valid_until = repository - .root() - .signed - .expires - .min(repository.snapshot().signed.expires) - .min(repository.targets().signed.expires) - .min(repository.timestamp().signed.expires); - - let mut v = Vec::new(); - for artifact in artifacts.artifacts { - // Skip any artifacts where we don't recognize its kind or the target - // name isn't in the repository - let target = - repository.targets().signed.targets.get(&artifact.target.parse()?); - let (kind, target) = match (artifact.kind.to_known(), target) { - (Some(kind), Some(target)) => (kind, target), - _ => break, - }; - - v.push(db::model::UpdateArtifact { - name: artifact.name, - version: db::model::SemverVersion(artifact.version), - kind: db::model::KnownArtifactKind(kind), - targets_role_version: repository - .targets() - .signed - .version - .get() - .try_into()?, - valid_until, - target_name: artifact.target, - target_sha256: hex::encode(&target.hashes.sha256), - target_length: target.length.try_into()?, - }); - } - Ok(v) -} diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 4a7924770e..5605f33f75 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -36,6 +36,7 @@ serde_json.workspace = true serde_urlencoded.workspace = true slog.workspace = true tokio.workspace = true +tokio-util.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/http_testing.rs b/nexus/test-utils/src/http_testing.rs index bf5370a925..ae62218c93 100644 --- a/nexus/test-utils/src/http_testing.rs +++ b/nexus/test-utils/src/http_testing.rs @@ -7,6 +7,7 @@ use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; +use camino::Utf8Path; use dropshot::test_util::ClientTestContext; use dropshot::ResultsPage; use headers::authorization::Credentials; @@ -147,6 +148,35 @@ impl<'a> RequestBuilder<'a> { self } + /// Set the outgoing request body to the contents of a file. + /// + /// A handle to the file will be kept open until the request is completed. + /// + /// If `path` is `None`, the request body will be empty. + pub fn body_file(mut self, path: Option<&Utf8Path>) -> Self { + match path { + Some(path) => { + // Turn the file into a stream. (Opening the file with + // std::fs::File::open means that this method doesn't have to + // be async.) + let file = std::fs::File::open(path).with_context(|| { + format!("failed to open request body file at {path}") + }); + match file { + Ok(file) => { + let stream = tokio_util::io::ReaderStream::new( + tokio::fs::File::from_std(file), + ); + self.body = hyper::Body::wrap_stream(stream); + } + Err(error) => self.error = Some(error), + } + } + None => self.body = hyper::Body::empty(), + }; + self + } + /// Set the outgoing request body using URL encoding /// and set the content type appropriately /// diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 4f606f2bff..c721fe3606 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -32,7 +32,6 @@ use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::RouteDestination; use omicron_common::api::external::RouteTarget; -use omicron_common::api::external::SemverVersion; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_test_utils::certificates::CertificateChain; use once_cell::sync::Lazy; @@ -708,13 +707,6 @@ pub static DEMO_SSHKEY_CREATE: Lazy = pub static DEMO_SPECIFIC_SSHKEY_URL: Lazy = Lazy::new(|| format!("{}/{}", DEMO_SSHKEYS_URL, *DEMO_SSHKEY_NAME)); -// System update - -pub static DEMO_SYSTEM_UPDATE_PARAMS: Lazy = - Lazy::new(|| params::SystemUpdatePath { - version: SemverVersion::new(1, 0, 0), - }); - // Project Floating IPs pub static DEMO_FLOAT_IP_NAME: Lazy = Lazy::new(|| "float-ip".parse().unwrap()); @@ -1920,81 +1912,22 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { /* Updates */ VerifyEndpoint { - url: "/v1/system/update/refresh", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Post( - serde_json::Value::Null - )], - }, - - VerifyEndpoint { - url: "/v1/system/update/version", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/components", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/updates", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - // TODO: make system update endpoints work instead of expecting 404 - - VerifyEndpoint { - url: "/v1/system/update/updates/1.0.0", + url: "/v1/system/update/repository?file_name=demo-repo.zip", visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/updates/1.0.0/components", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/start", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Post( - serde_json::to_value(&*DEMO_SYSTEM_UPDATE_PARAMS).unwrap() - )], - }, - - VerifyEndpoint { - url: "/v1/system/update/stop", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Post( - serde_json::Value::Null + allowed_methods: vec![AllowedMethod::Put( + // In reality this is the contents of a zip file. + serde_json::Value::Null, )], }, VerifyEndpoint { - url: "/v1/system/update/deployments", + url: "/v1/system/update/repository/1.0.0", visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/deployments/120bbb6f-660a-440c-8cb7-199be202ddff", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::GetNonexistent], + // The update system is disabled, which causes a 500 error even for + // privileged users. That is captured by GetUnimplemented. + allowed_methods: vec![AllowedMethod::GetUnimplemented], }, /* Metrics */ diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 6cb99b9e45..4b68a6c4f2 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -40,7 +40,6 @@ mod sp_updater; mod ssh_keys; mod subnet_allocation; mod switch_port; -mod system_updates; mod unauthorized; mod unauthorized_coverage; mod updates; diff --git a/nexus/tests/integration_tests/system_updates.rs b/nexus/tests/integration_tests/system_updates.rs deleted file mode 100644 index aa00caac29..0000000000 --- a/nexus/tests/integration_tests/system_updates.rs +++ /dev/null @@ -1,219 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use dropshot::ResultsPage; -use http::{method::Method, StatusCode}; -use nexus_db_queries::context::OpContext; -use nexus_test_utils::http_testing::{AuthnMode, NexusRequest}; -use nexus_test_utils_macros::nexus_test; -use nexus_types::external_api::{ - params, shared::UpdateableComponentType, views, -}; -use omicron_common::api::external::SemverVersion; - -type ControlPlaneTestContext = - nexus_test_utils::ControlPlaneTestContext; - -// This file could be combined with ./updates.rs, but there's a lot going on in -// there that has nothing to do with testing the API endpoints. We could come up -// with more descriptive names. - -/// Because there are no create endpoints for these resources, we need to call -/// the `nexus` functions directly. -async fn populate_db(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx().nexus; - let opctx = OpContext::for_tests( - cptestctx.logctx.log.new(o!()), - cptestctx.server.apictx().nexus.datastore().clone(), - ); - - // system updates have to exist first - let create_su = - params::SystemUpdateCreate { version: SemverVersion::new(0, 2, 0) }; - nexus - .upsert_system_update(&opctx, create_su) - .await - .expect("Failed to create system update"); - let create_su = - params::SystemUpdateCreate { version: SemverVersion::new(1, 0, 1) }; - nexus - .upsert_system_update(&opctx, create_su) - .await - .expect("Failed to create system update"); - - nexus - .create_updateable_component( - &opctx, - params::UpdateableComponentCreate { - version: SemverVersion::new(0, 4, 1), - system_version: SemverVersion::new(0, 2, 0), - component_type: UpdateableComponentType::BootloaderForSp, - device_id: "look-a-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); - - nexus - .create_updateable_component( - &opctx, - params::UpdateableComponentCreate { - version: SemverVersion::new(0, 4, 1), - system_version: SemverVersion::new(1, 0, 1), - component_type: UpdateableComponentType::HubrisForGimletSp, - device_id: "another-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); -} - -#[nexus_test] -async fn test_system_version(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - // Initially the endpoint 500s because there are no updateable components. - // This is the desired behavior because those are populated by rack startup - // before the external API starts, so it really is a problem if we can hit - // this endpoint without any data backing it. - // - // Because this data is now populated at rack init, this doesn't work as a - // test. If we really wanted to test it, we would have to run the tests - // without that bit of setup. - // - // NexusRequest::expect_failure( - // &client, - // StatusCode::INTERNAL_SERVER_ERROR, - // Method::GET, - // "/v1/system/update/version", - // ) - // .authn_as(AuthnMode::PrivilegedUser) - // .execute() - // .await - // .expect("Failed to 500 with no system version data"); - - // create two updateable components - populate_db(&cptestctx).await; - - let version = - NexusRequest::object_get(&client, "/v1/system/update/version") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() - .await; - - assert_eq!( - version, - views::SystemVersion { - version_range: views::VersionRange { - low: SemverVersion::new(0, 2, 0), - high: SemverVersion::new(2, 0, 0), - }, - status: views::UpdateStatus::Updating, - } - ); -} - -#[nexus_test] -async fn test_list_updates(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - let updates = - NexusRequest::object_get(&client, &"/v1/system/update/updates") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(updates.items.len(), 3); -} - -#[nexus_test] -async fn test_list_components(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - let component_updates = - NexusRequest::object_get(&client, &"/v1/system/update/components") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(component_updates.items.len(), 9); -} - -#[nexus_test] -async fn test_get_update(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - // existing update works - let update = - NexusRequest::object_get(&client, &"/v1/system/update/updates/1.0.0") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() - .await; - - assert_eq!(update.version, SemverVersion::new(1, 0, 0)); - - // non-existent update 404s - NexusRequest::expect_failure( - client, - StatusCode::NOT_FOUND, - Method::GET, - "/v1/system/update/updates/1.0.1", - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("Failed to 404 on non-existent update"); -} - -#[nexus_test] -async fn test_list_update_components(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - // listing components of an existing update works - let components = NexusRequest::object_get( - &client, - &"/v1/system/update/updates/1.0.0/components", - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(components.items.len(), 9); - - // non existent 404s - NexusRequest::expect_failure( - client, - StatusCode::NOT_FOUND, - Method::GET, - "/v1/system/update/updates/1.0.1/components", - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("Failed to 404 on components of nonexistent system update"); -} - -#[nexus_test] -async fn test_update_deployments(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - let deployments = - NexusRequest::object_get(&client, &"/v1/system/update/deployments") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(deployments.items.len(), 2); - - let first_dep = deployments.items.get(0).unwrap(); - - let dep_id = first_dep.identity.id.to_string(); - let dep_url = format!("/v1/system/update/deployments/{}", dep_id); - let deployment = NexusRequest::object_get(&client, &dep_url) - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() - .await; - - assert_eq!(deployment.version, first_dep.version); -} diff --git a/nexus/tests/integration_tests/updates.rs b/nexus/tests/integration_tests/updates.rs index 418e12e001..e830348103 100644 --- a/nexus/tests/integration_tests/updates.rs +++ b/nexus/tests/integration_tests/updates.rs @@ -7,69 +7,49 @@ // - test that an unknown artifact returns 404, not 500 // - tests around target names and artifact names that contain dangerous paths like `../` -use async_trait::async_trait; -use camino_tempfile::Utf8TempDir; -use chrono::{Duration, Utc}; +use anyhow::{ensure, Context, Result}; +use camino::Utf8Path; +use camino_tempfile::{Builder, Utf8TempDir, Utf8TempPath}; +use clap::Parser; use dropshot::test_util::LogContext; -use dropshot::{ - endpoint, ApiDescription, HttpError, HttpServerStarter, Path, - RequestContext, -}; -use http::{Method, Response, StatusCode}; -use hyper::Body; +use http::{Method, StatusCode}; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils::{load_test_config, test_setup, test_setup_with_config}; +use omicron_common::api::external::{ + SemverVersion, TufRepoGetResponse, TufRepoInsertResponse, + TufRepoInsertStatus, +}; use omicron_common::api::internal::nexus::KnownArtifactKind; use omicron_common::nexus_config::UpdatesConfig; -use omicron_common::update::{Artifact, ArtifactKind, ArtifactsDocument}; use omicron_sled_agent::sim; -use ring::pkcs8::Document; -use ring::rand::{SecureRandom, SystemRandom}; -use ring::signature::Ed25519KeyPair; -use schemars::JsonSchema; +use pretty_assertions::assert_eq; use serde::Deserialize; -use std::collections::HashMap; -use std::convert::TryInto; -use std::fmt::{self, Debug}; +use std::fmt::Debug; use std::fs::File; use std::io::Write; -use std::num::NonZeroU64; -use std::path::PathBuf; -use tempfile::{NamedTempFile, TempDir}; -use tough::editor::signed::{PathExists, SignedRole}; -use tough::editor::RepositoryEditor; -use tough::key_source::KeySource; -use tough::schema::{KeyHolder, RoleKeys, RoleType, Root}; -use tough::sign::Sign; +use tufaceous_lib::assemble::{DeserializedManifest, ManifestTweak}; -const UPDATE_COMPONENT: &'static str = "omicron-test-component"; +const FAKE_MANIFEST_PATH: &'static str = "../tufaceous/manifests/fake.toml"; -#[tokio::test] -async fn test_update_end_to_end() { +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_update_uninitialized() -> Result<()> { let mut config = load_test_config(); - let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); + let logctx = LogContext::new("test_update_uninitialized", &config.pkg.log); + + // Build a fake TUF repo + let temp_dir = Utf8TempDir::new()?; + let archive_path = temp_dir.path().join("archive.zip"); + + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + FAKE_MANIFEST_PATH, + archive_path.as_str(), + ]) + .context("error parsing args")?; + + args.exec(&logctx.log).await.context("error executing assemble command")?; - // build the TUF repo - let rng = SystemRandom::new(); - let tuf_repo = new_tuf_repo(&rng).await; - slog::info!(logctx.log, "TUF repo created at {}", tuf_repo.path()); - - // serve it over HTTP - let dropshot_config = Default::default(); - let mut api = ApiDescription::new(); - api.register(static_content).unwrap(); - let context = FileServerContext { base: tuf_repo.path().to_owned().into() }; - let server = - HttpServerStarter::new(&dropshot_config, api, context, &logctx.log) - .unwrap() - .start(); - let local_addr = server.local_addr(); - - // stand up the test environment - config.pkg.updates = Some(UpdatesConfig { - trusted_root: tuf_repo.path().join("metadata").join("1.root.json"), - default_base_url: format!("http://{}/", local_addr), - }); let cptestctx = test_setup_with_config::( "test_update_end_to_end", &mut config, @@ -79,212 +59,304 @@ async fn test_update_end_to_end() { .await; let client = &cptestctx.external_client; - // call /v1/system/update/refresh on nexus - // - download and verify the repo - // - return 204 Non Content - // - tells sled agent to do the thing - NexusRequest::new( - RequestBuilder::new(client, Method::POST, "/v1/system/update/refresh") - .expect_status(Some(StatusCode::NO_CONTENT)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); + // Attempt to upload the repository to Nexus. This should fail with a 500 + // error because the updates system is not configured. + { + make_upload_request( + client, + &archive_path, + StatusCode::INTERNAL_SERVER_ERROR, + ) + .execute() + .await + .context("repository upload should have failed with 500 error")?; + } - let artifact_path = cptestctx.sled_agent_storage.path(); - let component_path = artifact_path.join(UPDATE_COMPONENT); - // check sled agent did the thing - assert_eq!(tokio::fs::read(component_path).await.unwrap(), TARGET_CONTENTS); + // Attempt to fetch a repository description from Nexus. This should also + // fail with a 500 error. + { + make_get_request( + client, + "1.0.0".parse().unwrap(), + StatusCode::INTERNAL_SERVER_ERROR, + ) + .execute() + .await + .context("repository fetch should have failed with 500 error")?; + } - server.close().await.expect("failed to shut down dropshot server"); cptestctx.teardown().await; logctx.cleanup_successful(); + + Ok(()) } -// =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_update_end_to_end() -> Result<()> { + let mut config = load_test_config(); + config.pkg.updates = Some(UpdatesConfig { + // XXX: This is currently not used by the update system, but + // trusted_root will become meaningful in the future. + trusted_root: "does-not-exist.json".into(), + }); + let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); -struct FileServerContext { - base: PathBuf, -} + // Build a fake TUF repo + let temp_dir = Utf8TempDir::new()?; + let archive_path = temp_dir.path().join("archive.zip"); -#[derive(Deserialize, JsonSchema)] -struct AllPath { - path: Vec, -} + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + FAKE_MANIFEST_PATH, + archive_path.as_str(), + ]) + .context("error parsing args")?; -#[endpoint(method = GET, path = "/{path:.*}", unpublished = true)] -async fn static_content( - rqctx: RequestContext, - path: Path, -) -> Result, HttpError> { - // NOTE: this is a particularly brief and bad implementation of this to keep the test shorter. - // see https://github.com/oxidecomputer/dropshot/blob/main/dropshot/examples/file_server.rs for - // something more robust! - let mut fs_path = rqctx.context().base.clone(); - for component in path.into_inner().path { - fs_path.push(component); - } - let body = tokio::fs::read(fs_path).await.map_err(|e| { - // tough 0.15+ depend on ENOENT being translated into 404. - if e.kind() == std::io::ErrorKind::NotFound { - HttpError::for_not_found(None, e.to_string()) - } else { - HttpError::for_bad_request(None, e.to_string()) - } - })?; - Ok(Response::builder().status(StatusCode::OK).body(body.into())?) -} + args.exec(&logctx.log).await.context("error executing assemble command")?; -// =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= + let cptestctx = test_setup_with_config::( + "test_update_end_to_end", + &mut config, + sim::SimMode::Explicit, + None, + ) + .await; + let client = &cptestctx.external_client; -const TARGET_CONTENTS: &[u8] = b"hello world".as_slice(); - -async fn new_tuf_repo(rng: &(dyn SecureRandom + Sync)) -> Utf8TempDir { - let version = - NonZeroU64::new(Utc::now().timestamp().try_into().unwrap()).unwrap(); - let expires = Utc::now() + Duration::minutes(5); - - // create the key - let key_data = Ed25519KeyPair::generate_pkcs8(rng).unwrap(); - let key = Ed25519KeyPair::from_pkcs8(key_data.as_ref()).unwrap(); - let tuf_key = key.tuf_key(); - let key_id = tuf_key.key_id().unwrap(); - - // create the root role - let mut root = Root { - spec_version: "1.0.0".to_string(), - consistent_snapshot: true, - version: NonZeroU64::new(1).unwrap(), - expires, - keys: HashMap::new(), - roles: HashMap::new(), - _extra: HashMap::new(), + // Upload the repository to Nexus. + let mut initial_description = { + let response = + make_upload_request(client, &archive_path, StatusCode::OK) + .execute() + .await + .context("error uploading repository")?; + + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + assert_eq!(response.status, TufRepoInsertStatus::Inserted); + response.recorded }; - root.keys.insert(key_id.clone(), tuf_key); - for role in [ - RoleType::Root, - RoleType::Snapshot, - RoleType::Targets, - RoleType::Timestamp, - ] { - root.roles.insert( - role, - RoleKeys { - keyids: vec![key_id.clone()], - threshold: NonZeroU64::new(1).unwrap(), - _extra: HashMap::new(), - }, - ); - } - - let signing_keys = - vec![Box::new(KeyKeySource(key_data)) as Box]; - // self-sign the root role - let signed_root = SignedRole::new( - root.clone(), - &KeyHolder::Root(root), - &signing_keys, - rng, - ) - .await - .unwrap(); + // Upload the repository to Nexus again. This should return a 200 with an + // `AlreadyExists` status. + let mut reupload_description = { + let response = + make_upload_request(client, &archive_path, StatusCode::OK) + .execute() + .await + .context("error uploading repository a second time")?; + + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + assert_eq!(response.status, TufRepoInsertStatus::AlreadyExists); + response.recorded + }; - // TODO(iliana): there's no way to create a `RepositoryEditor` without having the root.json on - // disk. this is really unergonomic. write and upstream a fix - let mut root_tmp = NamedTempFile::new().unwrap(); - root_tmp.as_file_mut().write_all(signed_root.buffer()).unwrap(); - let mut editor = RepositoryEditor::new(&root_tmp).await.unwrap(); - root_tmp.close().unwrap(); - - editor - .targets_version(version) - .unwrap() - .targets_expires(expires) - .unwrap() - .snapshot_version(version) - .snapshot_expires(expires) - .timestamp_version(version) - .timestamp_expires(expires); - let (targets_dir, target_names) = generate_targets(); - for target in target_names { - editor.add_target_path(targets_dir.path().join(target)).await.unwrap(); - } + initial_description.sort_artifacts(); + reupload_description.sort_artifacts(); - let signed_repo = editor.sign(&signing_keys).await.unwrap(); + assert_eq!( + initial_description, reupload_description, + "initial description matches reupload" + ); - let repo = Utf8TempDir::new().unwrap(); - signed_repo.write(repo.path().join("metadata")).await.unwrap(); - signed_repo - .copy_targets( - targets_dir, - repo.path().join("targets"), - PathExists::Fail, + // Now get the repository that was just uploaded. + let mut get_description = { + let response = make_get_request( + client, + "1.0.0".parse().unwrap(), // this is the system version of the fake manifest + StatusCode::OK, ) + .execute() .await - .unwrap(); - - repo -} + .context("error fetching repository")?; -// Returns a temporary directory of targets and the list of filenames in it. -fn generate_targets() -> (TempDir, Vec<&'static str>) { - let dir = TempDir::new().unwrap(); - - // The update artifact. This will someday be a tarball of some variety. - std::fs::write( - dir.path().join(format!("{UPDATE_COMPONENT}-1")), - TARGET_CONTENTS, - ) - .unwrap(); - - // artifacts.json, which describes all available artifacts. - let artifacts = ArtifactsDocument { - system_version: "1.0.0".parse().unwrap(), - artifacts: vec![Artifact { - name: UPDATE_COMPONENT.into(), - version: "0.0.0".parse().unwrap(), - kind: ArtifactKind::from_known(KnownArtifactKind::ControlPlane), - target: format!("{UPDATE_COMPONENT}-1"), - }], + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + response.description }; - let f = File::create(dir.path().join("artifacts.json")).unwrap(); - serde_json::to_writer_pretty(f, &artifacts).unwrap(); - (dir, vec!["omicron-test-component-1", "artifacts.json"]) -} + get_description.sort_artifacts(); -// =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= + assert_eq!( + initial_description, get_description, + "initial description matches fetched description" + ); -// Wrapper struct so that we can use an in-memory key as a key source. -// TODO(iliana): this should just be in tough with a lot less hacks -struct KeyKeySource(Document); + // TODO: attempt to download extracted artifacts. -impl Debug for KeyKeySource { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("KeyKeySource").finish() + // Upload a new repository with the same system version but a different + // version for one of the components. This will produce a different hash, + // which should return an error. + { + let tweaks = &[ManifestTweak::ArtifactVersion { + kind: KnownArtifactKind::GimletSp, + version: "2.0.0".parse().unwrap(), + }]; + let archive_path = + make_tweaked_archive(&logctx.log, &temp_dir, tweaks).await?; + + let response = make_upload_request( + client, + &archive_path, + StatusCode::CONFLICT, + ) + .execute() + .await + .context( + "error uploading repository with different artifact version \ + but same system version", + )?; + assert_error_message_contains( + &response.body, + "Uploaded repository with system version 1.0.0 has SHA256 hash", + )?; } -} -#[async_trait] -impl KeySource for KeyKeySource { - async fn as_sign( - &self, - ) -> Result, Box> + // Upload a new repository with a different system version and different + // contents (but same version) for an artifact. { - // this is a really ugly hack, because tough doesn't `impl Sign for &'a T where T: Sign`. - // awslabs/tough#446 - Ok(Box::new(Ed25519KeyPair::from_pkcs8(self.0.as_ref()).unwrap())) + let tweaks = &[ + ManifestTweak::SystemVersion("2.0.0".parse().unwrap()), + ManifestTweak::ArtifactContents { + kind: KnownArtifactKind::ControlPlane, + size_delta: 1024, + }, + ]; + let archive_path = + make_tweaked_archive(&logctx.log, &temp_dir, tweaks).await?; + + let response = + make_upload_request(client, &archive_path, StatusCode::CONFLICT) + .execute() + .await + .context( + "error uploading repository with artifact \ + containing different hash for same version", + )?; + assert_error_message_contains( + &response.body, + "Uploaded artifacts don't match existing artifacts with same IDs:", + )?; } - async fn write( - &self, - _value: &str, - _key_id_hex: &str, - ) -> Result<(), Box> { - unimplemented!(); + // Upload a new repository with a different system version but no other + // changes. This should be accepted. + { + let tweaks = &[ManifestTweak::SystemVersion("2.0.0".parse().unwrap())]; + let archive_path = + make_tweaked_archive(&logctx.log, &temp_dir, tweaks).await?; + + let response = + make_upload_request(client, &archive_path, StatusCode::OK) + .execute() + .await + .context("error uploading repository with different system version (should succeed)")?; + + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + assert_eq!(response.status, TufRepoInsertStatus::Inserted); } + + cptestctx.teardown().await; + logctx.cleanup_successful(); + + Ok(()) +} + +async fn make_tweaked_archive( + log: &slog::Logger, + temp_dir: &Utf8TempDir, + tweaks: &[ManifestTweak], +) -> anyhow::Result { + let manifest = DeserializedManifest::tweaked_fake(tweaks); + let manifest_path = temp_dir.path().join("fake2.toml"); + let mut manifest_file = + File::create(&manifest_path).context("error creating manifest file")?; + let manifest_to_toml = manifest.to_toml()?; + manifest_file.write_all(manifest_to_toml.as_bytes())?; + + let archive_path = Builder::new() + .prefix("archive") + .suffix(".zip") + .tempfile_in(temp_dir.path()) + .context("error creating temp file for tweaked archive")? + .into_temp_path(); + + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + manifest_path.as_str(), + archive_path.as_str(), + ]) + .context("error parsing args")?; + + args.exec(log).await.context("error executing assemble command")?; + + Ok(archive_path) +} + +fn make_upload_request<'a>( + client: &'a dropshot::test_util::ClientTestContext, + archive_path: &'a Utf8Path, + expected_status: StatusCode, +) -> NexusRequest<'a> { + let file_name = + archive_path.file_name().expect("archive_path must have a file name"); + let request = NexusRequest::new( + RequestBuilder::new( + client, + Method::PUT, + &format!("/v1/system/update/repository?file_name={}", file_name), + ) + .body_file(Some(archive_path)) + .expect_status(Some(expected_status)), + ) + .authn_as(AuthnMode::PrivilegedUser); + request +} + +fn make_get_request( + client: &dropshot::test_util::ClientTestContext, + system_version: SemverVersion, + expected_status: StatusCode, +) -> NexusRequest<'_> { + let request = NexusRequest::new( + RequestBuilder::new( + client, + Method::GET, + &format!("/v1/system/update/repository/{system_version}"), + ) + .expect_status(Some(expected_status)), + ) + .authn_as(AuthnMode::PrivilegedUser); + request +} + +#[derive(Debug, Deserialize)] +struct ErrorBody { + message: String, +} + +// XXX: maybe replace this with a more detailed error code +fn assert_error_message_contains( + body: &[u8], + needle: &str, +) -> anyhow::Result<()> { + let body: ErrorBody = + serde_json::from_slice(body).context("body is not valid JSON")?; + ensure!( + body.message.contains(needle), + "expected body to contain {:?}, but it was {:?}", + needle, + body + ); + Ok(()) } // =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= diff --git a/nexus/tests/output/unexpected-authz-endpoints.txt b/nexus/tests/output/unexpected-authz-endpoints.txt index 1cd87a75e5..e8bb60224a 100644 --- a/nexus/tests/output/unexpected-authz-endpoints.txt +++ b/nexus/tests/output/unexpected-authz-endpoints.txt @@ -9,13 +9,5 @@ POST "/v1/vpc-router-routes?project=demo-project&vpc=demo-vpc&router=demo-vpc- GET "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" PUT "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" DELETE "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" -POST "/v1/system/update/refresh" -GET "/v1/system/update/version" -GET "/v1/system/update/components" -GET "/v1/system/update/updates" -GET "/v1/system/update/updates/1.0.0" -GET "/v1/system/update/updates/1.0.0/components" -POST "/v1/system/update/start" -POST "/v1/system/update/stop" -GET "/v1/system/update/deployments" -GET "/v1/system/update/deployments/120bbb6f-660a-440c-8cb7-199be202ddff" +PUT "/v1/system/update/repository?file_name=demo-repo.zip" +GET "/v1/system/update/repository/1.0.0" diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 62c8224461..c32dae4df9 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1960,32 +1960,17 @@ pub struct ResourceMetrics { // SYSTEM UPDATE +/// Parameters for PUT requests for `/v1/system/update/repository`. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdatePath { - pub version: SemverVersion, +pub struct UpdatesPutRepositoryParams { + /// The name of the uploaded file. + pub file_name: String, } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdateStart { - pub version: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdateCreate { - pub version: SemverVersion, -} +/// Parameters for GET requests for `/v1/system/update/repository`. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct ComponentUpdateCreate { - pub version: SemverVersion, - pub component_type: shared::UpdateableComponentType, - pub system_update_id: Uuid, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct UpdateableComponentCreate { - pub version: SemverVersion, +#[derive(Clone, Debug, Deserialize, JsonSchema)] +pub struct UpdatesGetRepositoryParams { + /// The version to get. pub system_version: SemverVersion, - pub component_type: shared::UpdateableComponentType, - pub device_id: String, } diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 5e31be7af8..45cfe8e267 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -13,7 +13,7 @@ use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::{ ByteCount, Digest, Error, IdentityMetadata, InstanceState, Ipv4Net, - Ipv6Net, Name, ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, + Ipv6Net, Name, ObjectIdentity, RoleName, SimpleIdentity, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -608,65 +608,6 @@ pub enum DeviceAccessTokenType { Bearer, } -// SYSTEM UPDATES - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] -pub struct VersionRange { - pub low: SemverVersion, - pub high: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] -#[serde(tag = "status", rename_all = "snake_case")] -pub enum UpdateStatus { - Updating, - Steady, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] -pub struct SystemVersion { - pub version_range: VersionRange, - pub status: UpdateStatus, - // TODO: time_released? time_last_applied? I got a fever and the only - // prescription is more timestamps -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdate { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - pub version: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct ComponentUpdate { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - - pub component_type: shared::UpdateableComponentType, - pub version: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct UpdateableComponent { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - - pub device_id: String, - pub component_type: shared::UpdateableComponentType, - pub version: SemverVersion, - pub system_version: SemverVersion, - pub status: UpdateStatus, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct UpdateDeployment { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - pub version: SemverVersion, - pub status: UpdateStatus, -} - // SYSTEM HEALTH #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)] diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index b5cbb25c66..2a047068ee 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -18,10 +18,10 @@ { "in": "path", "name": "kind", - "description": "The kind of update artifact this is.", + "description": "The kind of artifact this is.", "required": true, "schema": { - "$ref": "#/components/schemas/KnownArtifactKind" + "type": "string" } }, { @@ -6534,21 +6534,6 @@ "ZpoolPutResponse": { "type": "object" }, - "KnownArtifactKind": { - "description": "Kinds of update artifacts, as used by Nexus to determine what updates are available and by sled-agent to determine how to apply an update when asked.", - "type": "string", - "enum": [ - "gimlet_sp", - "gimlet_rot", - "host", - "trampoline", - "control_plane", - "psc_sp", - "psc_rot", - "switch_sp", - "switch_rot" - ] - }, "SemverVersion": { "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" diff --git a/schema/crdb/27.0.0/up01.sql b/schema/crdb/27.0.0/up01.sql new file mode 100644 index 0000000000..5b7fb4df93 --- /dev/null +++ b/schema/crdb/27.0.0/up01.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.update_deployment; diff --git a/schema/crdb/27.0.0/up02.sql b/schema/crdb/27.0.0/up02.sql new file mode 100644 index 0000000000..a6ab82583d --- /dev/null +++ b/schema/crdb/27.0.0/up02.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.updateable_component; diff --git a/schema/crdb/27.0.0/up03.sql b/schema/crdb/27.0.0/up03.sql new file mode 100644 index 0000000000..8a9b89bd5c --- /dev/null +++ b/schema/crdb/27.0.0/up03.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.system_update_component_update; diff --git a/schema/crdb/27.0.0/up04.sql b/schema/crdb/27.0.0/up04.sql new file mode 100644 index 0000000000..9fb8d61a1e --- /dev/null +++ b/schema/crdb/27.0.0/up04.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.component_update; diff --git a/schema/crdb/27.0.0/up05.sql b/schema/crdb/27.0.0/up05.sql new file mode 100644 index 0000000000..bb76e717ab --- /dev/null +++ b/schema/crdb/27.0.0/up05.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.updateable_component_type; diff --git a/schema/crdb/27.0.0/up06.sql b/schema/crdb/27.0.0/up06.sql new file mode 100644 index 0000000000..a68d6595bb --- /dev/null +++ b/schema/crdb/27.0.0/up06.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.system_update; diff --git a/schema/crdb/27.0.0/up07.sql b/schema/crdb/27.0.0/up07.sql new file mode 100644 index 0000000000..ddcbbbb8fd --- /dev/null +++ b/schema/crdb/27.0.0/up07.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.update_artifact; diff --git a/schema/crdb/27.0.0/up08.sql b/schema/crdb/27.0.0/up08.sql new file mode 100644 index 0000000000..75a15dc817 --- /dev/null +++ b/schema/crdb/27.0.0/up08.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.update_artifact_kind; diff --git a/schema/crdb/27.0.0/up09.sql b/schema/crdb/27.0.0/up09.sql new file mode 100644 index 0000000000..984aff57de --- /dev/null +++ b/schema/crdb/27.0.0/up09.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.update_status; diff --git a/schema/crdb/27.0.0/up10.sql b/schema/crdb/27.0.0/up10.sql new file mode 100644 index 0000000000..ddb13ca1c0 --- /dev/null +++ b/schema/crdb/27.0.0/up10.sql @@ -0,0 +1,33 @@ +-- Describes a single uploaded TUF repo. +-- +-- Identified by both a random uuid and its SHA256 hash. The hash could be the +-- primary key, but it seems unnecessarily large and unwieldy. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + + sha256 STRING(64) NOT NULL, + + -- The version of the targets.json role that was used to generate the repo. + targets_role_version INT NOT NULL, + + -- The valid_until time for the repo. + valid_until TIMESTAMPTZ NOT NULL, + + -- The system version described in the TUF repo. + -- + -- This is the "true" primary key, but is not treated as such in the + -- database because we may want to change this format in the future. + -- Re-doing primary keys is annoying. + -- + -- Because the system version is embedded in the repo's artifacts.json, + -- each system version is associated with exactly one checksum. + system_version STRING(64) NOT NULL, + + -- For debugging only: + -- Filename provided by the user. + file_name TEXT NOT NULL, + + CONSTRAINT unique_checksum UNIQUE (sha256), + CONSTRAINT unique_system_version UNIQUE (system_version) +); diff --git a/schema/crdb/27.0.0/up11.sql b/schema/crdb/27.0.0/up11.sql new file mode 100644 index 0000000000..e0e36a51d7 --- /dev/null +++ b/schema/crdb/27.0.0/up11.sql @@ -0,0 +1,23 @@ +-- Describes an individual artifact from an uploaded TUF repo. +-- +-- In the future, this may also be used to describe artifacts that are fetched +-- from a remote TUF repo, but that requires some additional design work. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_artifact ( + name STRING(63) NOT NULL, + version STRING(63) NOT NULL, + -- This used to be an enum but is now a string, because it can represent + -- artifact kinds currently unknown to a particular version of Nexus as + -- well. + kind STRING(63) NOT NULL, + + -- The time this artifact was first recorded. + time_created TIMESTAMPTZ NOT NULL, + + -- The SHA256 hash of the artifact, typically obtained from the TUF + -- targets.json (and validated at extract time). + sha256 STRING(64) NOT NULL, + -- The length of the artifact, in bytes. + artifact_size INT8 NOT NULL, + + PRIMARY KEY (name, version, kind) +); diff --git a/schema/crdb/27.0.0/up12.sql b/schema/crdb/27.0.0/up12.sql new file mode 100644 index 0000000000..9c1ffb0de4 --- /dev/null +++ b/schema/crdb/27.0.0/up12.sql @@ -0,0 +1,21 @@ +-- Reflects that a particular artifact was provided by a particular TUF repo. +-- This is a many-many mapping. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo_artifact ( + tuf_repo_id UUID NOT NULL, + tuf_artifact_name STRING(63) NOT NULL, + tuf_artifact_version STRING(63) NOT NULL, + tuf_artifact_kind STRING(63) NOT NULL, + + /* + For the primary key, this definition uses the natural key rather than a + smaller surrogate key (UUID). That's because with CockroachDB the most + important factor in selecting a primary key is the ability to distribute + well. In this case, the first element of the primary key is the tuf_repo_id, + which is a random UUID. + + For more, see https://www.cockroachlabs.com/blog/how-to-choose-a-primary-key/. + */ + PRIMARY KEY ( + tuf_repo_id, tuf_artifact_name, tuf_artifact_version, tuf_artifact_kind + ) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 79a43d3c89..c91bb669a9 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1955,184 +1955,84 @@ CREATE INDEX IF NOT EXISTS lookup_console_by_silo_user ON omicron.public.console /*******************************************************************/ -CREATE TYPE IF NOT EXISTS omicron.public.update_artifact_kind AS ENUM ( - -- Sled artifacts - 'gimlet_sp', - 'gimlet_rot', - 'host', - 'trampoline', - 'control_plane', - - -- PSC artifacts - 'psc_sp', - 'psc_rot', - - -- Switch artifacts - 'switch_sp', - 'switch_rot' -); - -CREATE TABLE IF NOT EXISTS omicron.public.update_artifact ( - name STRING(63) NOT NULL, - version STRING(63) NOT NULL, - kind omicron.public.update_artifact_kind NOT NULL, - - /* the version of the targets.json role this came from */ - targets_role_version INT NOT NULL, - - /* when the metadata this artifact was cached from expires */ - valid_until TIMESTAMPTZ NOT NULL, - - /* data about the target from the targets.json role */ - target_name STRING(512) NOT NULL, - target_sha256 STRING(64) NOT NULL, - target_length INT NOT NULL, - - PRIMARY KEY (name, version, kind) -); - -/* This index is used to quickly find outdated artifacts. */ -CREATE INDEX IF NOT EXISTS lookup_artifact_by_targets_role_version ON omicron.public.update_artifact ( - targets_role_version -); - -/* - * System updates - */ -CREATE TABLE IF NOT EXISTS omicron.public.system_update ( - /* Identity metadata (asset) */ - id UUID PRIMARY KEY, - time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - - -- Because the version is unique, it could be the PK, but that would make - -- this resource different from every other resource for little benefit. - - -- Unique semver version - version STRING(64) NOT NULL -- TODO: length -); - -CREATE UNIQUE INDEX IF NOT EXISTS lookup_update_by_version ON omicron.public.system_update ( - version -); - - -CREATE TYPE IF NOT EXISTS omicron.public.updateable_component_type AS ENUM ( - 'bootloader_for_rot', - 'bootloader_for_sp', - 'bootloader_for_host_proc', - 'hubris_for_psc_rot', - 'hubris_for_psc_sp', - 'hubris_for_sidecar_rot', - 'hubris_for_sidecar_sp', - 'hubris_for_gimlet_rot', - 'hubris_for_gimlet_sp', - 'helios_host_phase_1', - 'helios_host_phase_2', - 'host_omicron' -); - -/* - * Component updates. Associated with at least one system_update through - * system_update_component_update. - */ -CREATE TABLE IF NOT EXISTS omicron.public.component_update ( - /* Identity metadata (asset) */ +-- Describes a single uploaded TUF repo. +-- +-- Identified by both a random uuid and its SHA256 hash. The hash could be the +-- primary key, but it seems unnecessarily large and unwieldy. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo ( id UUID PRIMARY KEY, time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - -- On component updates there's no device ID because the update can apply to - -- multiple instances of a given device kind + sha256 STRING(64) NOT NULL, - -- The *system* update version associated with this version (this is confusing, will rename) - version STRING(64) NOT NULL, -- TODO: length - -- TODO: add component update version to component_update + -- The version of the targets.json role that was used to generate the repo. + targets_role_version INT NOT NULL, - component_type omicron.public.updateable_component_type NOT NULL -); + -- The valid_until time for the repo. + valid_until TIMESTAMPTZ NOT NULL, --- version is unique per component type -CREATE UNIQUE INDEX IF NOT EXISTS lookup_component_by_type_and_version ON omicron.public.component_update ( - component_type, version -); + -- The system version described in the TUF repo. + -- + -- This is the "true" primary key, but is not treated as such in the + -- database because we may want to change this format in the future. + -- Re-doing primary keys is annoying. + -- + -- Because the system version is embedded in the repo's artifacts.json, + -- each system version is associated with exactly one checksum. + system_version STRING(64) NOT NULL, -/* - * Associate system updates with component updates. Not done with a - * system_update_id field on component_update because the same component update - * may be part of more than one system update. - */ -CREATE TABLE IF NOT EXISTS omicron.public.system_update_component_update ( - system_update_id UUID NOT NULL, - component_update_id UUID NOT NULL, + -- For debugging only: + -- Filename provided by the user. + file_name TEXT NOT NULL, - PRIMARY KEY (system_update_id, component_update_id) + CONSTRAINT unique_checksum UNIQUE (sha256), + CONSTRAINT unique_system_version UNIQUE (system_version) ); --- For now, the plan is to treat stopped, failed, completed as sub-cases of --- "steady" described by a "reason". But reason is not implemented yet. --- Obviously this could be a boolean, but boolean status fields never stay --- boolean for long. -CREATE TYPE IF NOT EXISTS omicron.public.update_status AS ENUM ( - 'updating', - 'steady' -); +-- Describes an individual artifact from an uploaded TUF repo. +-- +-- In the future, this may also be used to describe artifacts that are fetched +-- from a remote TUF repo, but that requires some additional design work. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_artifact ( + name STRING(63) NOT NULL, + version STRING(63) NOT NULL, + -- This used to be an enum but is now a string, because it can represent + -- artifact kinds currently unknown to a particular version of Nexus as + -- well. + kind STRING(63) NOT NULL, -/* - * Updateable components and their update status - */ -CREATE TABLE IF NOT EXISTS omicron.public.updateable_component ( - /* Identity metadata (asset) */ - id UUID PRIMARY KEY, + -- The time this artifact was first recorded. time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - - -- Free-form string that comes from the device - device_id STRING(40) NOT NULL, - - component_type omicron.public.updateable_component_type NOT NULL, - - -- The semver version of this component's own software - version STRING(64) NOT NULL, -- TODO: length - -- The version of the system update this component's software came from. - -- This may need to be nullable if we are registering components before we - -- know about system versions at all - system_version STRING(64) NOT NULL, -- TODO: length + -- The SHA256 hash of the artifact, typically obtained from the TUF + -- targets.json (and validated at extract time). + sha256 STRING(64) NOT NULL, + -- The length of the artifact, in bytes. + artifact_size INT8 NOT NULL, - status omicron.public.update_status NOT NULL - -- TODO: status reason for updateable_component -); - --- can't have two components of the same type with the same device ID -CREATE UNIQUE INDEX IF NOT EXISTS lookup_component_by_type_and_device ON omicron.public.updateable_component ( - component_type, device_id -); - -CREATE INDEX IF NOT EXISTS lookup_component_by_system_version ON omicron.public.updateable_component ( - system_version + PRIMARY KEY (name, version, kind) ); -/* - * System updates - */ -CREATE TABLE IF NOT EXISTS omicron.public.update_deployment ( - /* Identity metadata (asset) */ - id UUID PRIMARY KEY, - time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - - -- semver version of corresponding system update - -- TODO: this makes sense while version is the PK of system_update, but - -- if/when I change that back to ID, this needs to be the ID too - version STRING(64) NOT NULL, +-- Reflects that a particular artifact was provided by a particular TUF repo. +-- This is a many-many mapping. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo_artifact ( + tuf_repo_id UUID NOT NULL, + tuf_artifact_name STRING(63) NOT NULL, + tuf_artifact_version STRING(63) NOT NULL, + tuf_artifact_kind STRING(63) NOT NULL, - status omicron.public.update_status NOT NULL - -- TODO: status reason for update_deployment -); - -CREATE INDEX IF NOT EXISTS lookup_deployment_by_creation on omicron.public.update_deployment ( - time_created + /* + For the primary key, this definition uses the natural key rather than a + smaller surrogate key (UUID). That's because with CockroachDB the most + important factor in selecting a primary key is the ability to distribute + well. In this case, the first element of the primary key is the tuf_repo_id, + which is a random UUID. + + For more, see https://www.cockroachlabs.com/blog/how-to-choose-a-primary-key/. + */ + PRIMARY KEY ( + tuf_repo_id, tuf_artifact_name, tuf_artifact_version, tuf_artifact_kind + ) ); /*******************************************************************/ @@ -3296,7 +3196,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '26.0.0', NULL) + ( TRUE, NOW(), NOW(), '27.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs index 6144fd9171..13a1ec7623 100644 --- a/sled-agent/src/updates.rs +++ b/sled-agent/src/updates.rs @@ -127,7 +127,7 @@ impl UpdateManager { let response = nexus .cpapi_artifact_download( - nexus_client::types::KnownArtifactKind::ControlPlane, + &KnownArtifactKind::ControlPlane.to_string(), &artifact.name, &artifact.version.clone().into(), ) diff --git a/tufaceous-lib/src/assemble/manifest.rs b/tufaceous-lib/src/assemble/manifest.rs index 3974aa76b2..8825327c1d 100644 --- a/tufaceous-lib/src/assemble/manifest.rs +++ b/tufaceous-lib/src/assemble/manifest.rs @@ -343,10 +343,66 @@ impl DeserializedManifest { .context("error deserializing manifest") } + pub fn to_toml(&self) -> Result { + toml::to_string(self).context("error serializing manifest to TOML") + } + + /// For fake manifests, applies a set of changes to them. + /// + /// Intended for testing. + pub fn apply_tweaks(&mut self, tweaks: &[ManifestTweak]) -> Result<()> { + for tweak in tweaks { + match tweak { + ManifestTweak::SystemVersion(version) => { + self.system_version = version.clone(); + } + ManifestTweak::ArtifactVersion { kind, version } => { + let entries = + self.artifacts.get_mut(kind).with_context(|| { + format!( + "manifest does not have artifact kind \ + {kind}", + ) + })?; + for entry in entries { + entry.version = version.clone(); + } + } + ManifestTweak::ArtifactContents { kind, size_delta } => { + let entries = + self.artifacts.get_mut(kind).with_context(|| { + format!( + "manifest does not have artifact kind \ + {kind}", + ) + })?; + + for entry in entries { + entry.source.apply_size_delta(*size_delta)?; + } + } + } + } + + Ok(()) + } + /// Returns the fake manifest. pub fn fake() -> Self { Self::from_str(FAKE_MANIFEST_TOML).unwrap() } + + /// Returns a version of the fake manifest with a set of changes applied. + /// + /// This is primarily intended for testing. + pub fn tweaked_fake(tweaks: &[ManifestTweak]) -> Self { + let mut manifest = Self::fake(); + manifest + .apply_tweaks(tweaks) + .expect("builtin fake manifest should accept all tweaks"); + + manifest + } } #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] @@ -380,6 +436,39 @@ pub enum DeserializedArtifactSource { }, } +impl DeserializedArtifactSource { + fn apply_size_delta(&mut self, size_delta: i64) -> Result<()> { + match self { + DeserializedArtifactSource::File { .. } => { + bail!("cannot apply size delta to `file` source") + } + DeserializedArtifactSource::Fake { size } => { + *size = (*size).saturating_add_signed(size_delta); + Ok(()) + } + DeserializedArtifactSource::CompositeHost { phase_1, phase_2 } => { + phase_1.apply_size_delta(size_delta)?; + phase_2.apply_size_delta(size_delta)?; + Ok(()) + } + DeserializedArtifactSource::CompositeRot { + archive_a, + archive_b, + } => { + archive_a.apply_size_delta(size_delta)?; + archive_b.apply_size_delta(size_delta)?; + Ok(()) + } + DeserializedArtifactSource::CompositeControlPlane { zones } => { + for zone in zones { + zone.apply_size_delta(size_delta)?; + } + Ok(()) + } + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum DeserializedFileArtifactSource { @@ -416,6 +505,18 @@ impl DeserializedFileArtifactSource { let entry = CompositeEntry { data: &data, mtime_source }; f(entry) } + + fn apply_size_delta(&mut self, size_delta: i64) -> Result<()> { + match self { + DeserializedFileArtifactSource::File { .. } => { + bail!("cannot apply size delta to `file` source") + } + DeserializedFileArtifactSource::Fake { size } => { + *size = (*size).saturating_add_signed(size_delta); + Ok(()) + } + } + } } #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] @@ -459,6 +560,30 @@ impl DeserializedControlPlaneZoneSource { let entry = CompositeEntry { data: &data, mtime_source }; f(name, entry) } + + fn apply_size_delta(&mut self, size_delta: i64) -> Result<()> { + match self { + DeserializedControlPlaneZoneSource::File { .. } => { + bail!("cannot apply size delta to `file` source") + } + DeserializedControlPlaneZoneSource::Fake { size, .. } => { + (*size) = (*size).saturating_add_signed(size_delta); + Ok(()) + } + } + } +} +/// A change to apply to a manifest. +#[derive(Clone, Debug)] +pub enum ManifestTweak { + /// Update the system version. + SystemVersion(SemverVersion), + + /// Update the versions for this artifact. + ArtifactVersion { kind: KnownArtifactKind, version: SemverVersion }, + + /// Update the contents of this artifact (only support changing the size). + ArtifactContents { kind: KnownArtifactKind, size_delta: i64 }, } fn deserialize_byte_size<'de, D>(deserializer: D) -> Result diff --git a/update-common/Cargo.toml b/update-common/Cargo.toml index cc2ee86232..37542baa8f 100644 --- a/update-common/Cargo.toml +++ b/update-common/Cargo.toml @@ -9,6 +9,7 @@ anyhow.workspace = true bytes.workspace = true camino.workspace = true camino-tempfile.workspace = true +chrono.workspace = true debug-ignore.workspace = true display-error-chain.workspace = true dropshot.workspace = true diff --git a/update-common/src/artifacts/artifacts_with_plan.rs b/update-common/src/artifacts/artifacts_with_plan.rs index 9b579af29a..c2be69e82e 100644 --- a/update-common/src/artifacts/artifacts_with_plan.rs +++ b/update-common/src/artifacts/artifacts_with_plan.rs @@ -4,19 +4,28 @@ use super::ExtractedArtifactDataHandle; use super::UpdatePlan; +use super::UpdatePlanBuildOutput; use super::UpdatePlanBuilder; use crate::errors::RepositoryError; use anyhow::anyhow; +use bytes::Bytes; use camino_tempfile::Utf8TempDir; use debug_ignore::DebugIgnore; +use dropshot::HttpError; +use futures::Stream; +use futures::TryStreamExt; +use omicron_common::api::external::TufRepoDescription; +use omicron_common::api::external::TufRepoMeta; use omicron_common::update::ArtifactHash; use omicron_common::update::ArtifactHashId; use omicron_common::update::ArtifactId; +use sha2::{Digest, Sha256}; use slog::info; use slog::Logger; use std::collections::BTreeMap; use std::collections::HashMap; use std::io; +use tokio::io::AsyncWriteExt; use tough::TargetName; use tufaceous_lib::ArchiveExtractor; use tufaceous_lib::OmicronRepo; @@ -24,6 +33,9 @@ use tufaceous_lib::OmicronRepo; /// A collection of artifacts along with an update plan using those artifacts. #[derive(Debug)] pub struct ArtifactsWithPlan { + // A description of this repository. + description: TufRepoDescription, + // Map of top-level artifact IDs (present in the TUF repo) to the actual // artifacts we're serving (e.g., a top-level RoT artifact will map to two // artifact hashes: one for each of the A and B images). @@ -51,8 +63,65 @@ pub struct ArtifactsWithPlan { } impl ArtifactsWithPlan { + /// Creates a new `ArtifactsWithPlan` from the given stream of `Bytes`. + /// + /// This method reads the stream representing a TUF repo, and writes it to + /// a temporary file. Afterwards, it builds an `ArtifactsWithPlan` from the + /// contents of that file. + pub async fn from_stream( + body: impl Stream> + Send, + file_name: Option, + log: &Logger, + ) -> Result { + // Create a temporary file to store the incoming archive.`` + let tempfile = tokio::task::spawn_blocking(|| { + camino_tempfile::tempfile().map_err(RepositoryError::TempFileCreate) + }) + .await + .unwrap()?; + let mut tempfile = + tokio::io::BufWriter::new(tokio::fs::File::from_std(tempfile)); + + let mut body = std::pin::pin!(body); + + // Stream the uploaded body into our tempfile. + let mut hasher = Sha256::new(); + while let Some(bytes) = body + .try_next() + .await + .map_err(RepositoryError::ReadChunkFromStream)? + { + hasher.update(&bytes); + tempfile + .write_all(&bytes) + .await + .map_err(RepositoryError::TempFileWrite)?; + } + + let repo_hash = ArtifactHash(hasher.finalize().into()); + + // Flush writes. We don't need to seek back to the beginning of the file + // because extracting the repository will do its own seeking as a part of + // unzipping this repo. + tempfile.flush().await.map_err(RepositoryError::TempFileFlush)?; + + let tempfile = tempfile.into_inner().into_std().await; + + let artifacts_with_plan = Self::from_zip( + io::BufReader::new(tempfile), + file_name, + repo_hash, + log, + ) + .await?; + + Ok(artifacts_with_plan) + } + pub async fn from_zip( zip_data: T, + file_name: Option, + repo_hash: ArtifactHash, log: &Logger, ) -> Result where @@ -102,7 +171,7 @@ impl ArtifactsWithPlan { // `dir`, but we'll also unpack nested artifacts like the RoT dual A/B // archives. let mut builder = - UpdatePlanBuilder::new(artifacts.system_version, log)?; + UpdatePlanBuilder::new(artifacts.system_version.clone(), log)?; // Make a pass through each artifact in the repo. For each artifact, we // do one of the following: @@ -124,9 +193,7 @@ impl ArtifactsWithPlan { // priority - copying small SP artifacts is neglible compared to the // work we do to unpack host OS images. - let mut by_id = BTreeMap::new(); - let mut by_hash = HashMap::new(); - for artifact in artifacts.artifacts { + for artifact in &artifacts.artifacts { let target_name = TargetName::try_from(artifact.target.as_str()) .map_err(|error| RepositoryError::LocateTarget { target: artifact.target.clone(), @@ -167,21 +234,44 @@ impl ArtifactsWithPlan { })?; builder - .add_artifact( - artifact.into_id(), - artifact_hash, - stream, - &mut by_id, - &mut by_hash, - ) + .add_artifact(artifact.clone().into_id(), artifact_hash, stream) .await?; } // Ensure we know how to apply updates from this set of artifacts; we'll // remember the plan we create. - let artifacts = builder.build()?; + let UpdatePlanBuildOutput { plan, by_id, by_hash, artifacts_meta } = + builder.build()?; - Ok(Self { by_id, by_hash: by_hash.into(), plan: artifacts }) + let tuf_repository = repository.repo(); + + let file_name = file_name.unwrap_or_else(|| { + // Just pick a reasonable-sounding file name if we don't have one. + format!("system-update-v{}.zip", artifacts.system_version) + }); + + let repo_meta = TufRepoMeta { + hash: repo_hash, + targets_role_version: tuf_repository.targets().signed.version.get(), + valid_until: tuf_repository + .root() + .signed + .expires + .min(tuf_repository.snapshot().signed.expires) + .min(tuf_repository.targets().signed.expires) + .min(tuf_repository.timestamp().signed.expires), + system_version: artifacts.system_version, + file_name, + }; + let description = + TufRepoDescription { repo: repo_meta, artifacts: artifacts_meta }; + + Ok(Self { description, by_id, by_hash: by_hash.into(), plan }) + } + + /// Returns the `ArtifactsDocument` corresponding to this TUF repo. + pub fn description(&self) -> &TufRepoDescription { + &self.description } pub fn by_id(&self) -> &BTreeMap> { @@ -233,13 +323,14 @@ where mod tests { use super::*; use anyhow::{Context, Result}; + use camino::Utf8Path; use camino_tempfile::Utf8TempDir; use clap::Parser; use omicron_common::{ api::internal::nexus::KnownArtifactKind, update::ArtifactKind, }; use omicron_test_utils::dev::test_setup_log; - use std::collections::BTreeSet; + use std::{collections::BTreeSet, time::Duration}; /// Test that `ArtifactsWithPlan` can extract the fake repository generated /// by tufaceous. @@ -253,29 +344,22 @@ mod tests { let archive_path = temp_dir.path().join("archive.zip"); // Create the archive. - let args = tufaceous::Args::try_parse_from([ - "tufaceous", - "assemble", - "../tufaceous/manifests/fake.toml", - archive_path.as_str(), - ]) - .context("error parsing args")?; - - args.exec(&logctx.log) - .await - .context("error executing assemble command")?; + create_fake_archive(&logctx.log, &archive_path).await?; // Now check that it can be read by the archive extractor. - let zip_bytes = std::fs::File::open(&archive_path) - .context("error opening archive.zip")?; - let plan = ArtifactsWithPlan::from_zip(zip_bytes, &logctx.log) - .await - .context("error reading archive.zip")?; + let plan = + build_artifacts_with_plan(&logctx.log, &archive_path).await?; // Check that all known artifact kinds are present in the map. let by_id_kinds: BTreeSet<_> = plan.by_id().keys().map(|id| id.kind.clone()).collect(); let by_hash_kinds: BTreeSet<_> = plan.by_hash().keys().map(|id| id.kind.clone()).collect(); + let artifact_meta_kinds: BTreeSet<_> = plan + .description + .artifacts + .iter() + .map(|meta| meta.id.kind.clone()) + .collect(); // `by_id` should contain one entry for every `KnownArtifactKind`... let mut expected_kinds: BTreeSet<_> = @@ -315,6 +399,10 @@ mod tests { expected_kinds, by_hash_kinds, "expected kinds match by_hash kinds" ); + assert_eq!( + expected_kinds, artifact_meta_kinds, + "expected kinds match artifact_meta kinds" + ); // Every value present in `by_id` should also be a key in `by_hash`. for (id, hash_ids) in plan.by_id() { @@ -327,8 +415,81 @@ mod tests { } } + // + + logctx.cleanup_successful(); + + Ok(()) + } + + /// Test that the archive generated by running `tufaceous assemble` twice + /// has the same artifacts and hashes. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn test_fake_archive_idempotent() -> Result<()> { + let logctx = test_setup_log("test_fake_archive_idempotent"); + let temp_dir = Utf8TempDir::new()?; + let archive_path = temp_dir.path().join("archive1.zip"); + + // Create the archive and build a plan from it. + create_fake_archive(&logctx.log, &archive_path).await?; + let mut plan1 = + build_artifacts_with_plan(&logctx.log, &archive_path).await?; + + // Add a 2 second delay to ensure that if we bake any second-based + // timestamps in, that they end up being different from those in the + // first archive. + tokio::time::sleep(Duration::from_secs(2)).await; + + let archive2_path = temp_dir.path().join("archive2.zip"); + create_fake_archive(&logctx.log, &archive2_path).await?; + let mut plan2 = + build_artifacts_with_plan(&logctx.log, &archive2_path).await?; + + // At the moment, the repo .zip itself doesn't match because it bakes + // in timestamps. However, the artifacts inside should match exactly. + plan1.description.sort_artifacts(); + plan2.description.sort_artifacts(); + + assert_eq!( + plan1.description.artifacts, plan2.description.artifacts, + "artifacts match" + ); + logctx.cleanup_successful(); Ok(()) } + + async fn create_fake_archive( + log: &slog::Logger, + archive_path: &Utf8Path, + ) -> Result<()> { + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + "../tufaceous/manifests/fake.toml", + archive_path.as_str(), + ]) + .context("error parsing args")?; + + args.exec(log).await.context("error executing assemble command")?; + + Ok(()) + } + + async fn build_artifacts_with_plan( + log: &slog::Logger, + archive_path: &Utf8Path, + ) -> Result { + let zip_bytes = std::fs::File::open(&archive_path) + .context("error opening archive.zip")?; + // We could also compute the hash from the file here, but the repo hash + // doesn't matter for the test. + let repo_hash = ArtifactHash([0u8; 32]); + let plan = ArtifactsWithPlan::from_zip(zip_bytes, None, repo_hash, log) + .await + .with_context(|| format!("error reading {archive_path}"))?; + + Ok(plan) + } } diff --git a/update-common/src/artifacts/extracted_artifacts.rs b/update-common/src/artifacts/extracted_artifacts.rs index 06e0e5ec65..5ac4a3a395 100644 --- a/update-common/src/artifacts/extracted_artifacts.rs +++ b/update-common/src/artifacts/extracted_artifacts.rs @@ -106,7 +106,7 @@ pub struct ExtractedArtifacts { impl ExtractedArtifacts { pub fn new(log: &Logger) -> Result { let tempdir = camino_tempfile::Builder::new() - .prefix("wicketd-update-artifacts.") + .prefix("update-artifacts.") .tempdir() .map_err(RepositoryError::TempDirCreate)?; info!( @@ -189,7 +189,7 @@ impl ExtractedArtifacts { &self, ) -> Result { let file = NamedUtf8TempFile::new_in(self.tempdir.path()).map_err( - |error| RepositoryError::TempFileCreate { + |error| RepositoryError::NamedTempFileCreate { path: self.tempdir.path().to_owned(), error, }, diff --git a/update-common/src/artifacts/update_plan.rs b/update-common/src/artifacts/update_plan.rs index 7704d5fe8a..c5b171d648 100644 --- a/update-common/src/artifacts/update_plan.rs +++ b/update-common/src/artifacts/update_plan.rs @@ -2,7 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Constructor for the `UpdatePlan` wicketd uses to drive sled mupdates. +//! Constructor for the `UpdatePlan` wicketd and Nexus use to drive sled +//! mupdates. //! //! This is a "plan" in name only: it is a strict list of which artifacts to //! apply to which components; the ordering and application of the plan lives @@ -20,6 +21,7 @@ use futures::StreamExt; use futures::TryStreamExt; use hubtools::RawHubrisArchive; use omicron_common::api::external::SemverVersion; +use omicron_common::api::external::TufArtifactMeta; use omicron_common::api::internal::nexus::KnownArtifactKind; use omicron_common::update::ArtifactHash; use omicron_common::update::ArtifactHashId; @@ -107,6 +109,11 @@ pub struct UpdatePlanBuilder<'a> { host_phase_2_hash: Option, control_plane_hash: Option, + // The by_id and by_hash maps, and metadata, used in `ArtifactsWithPlan`. + by_id: BTreeMap>, + by_hash: HashMap, + artifacts_meta: Vec, + // extra fields we use to build the plan extracted_artifacts: ExtractedArtifacts, log: &'a Logger, @@ -135,30 +142,27 @@ impl<'a> UpdatePlanBuilder<'a> { host_phase_2_hash: None, control_plane_hash: None, + by_id: BTreeMap::new(), + by_hash: HashMap::new(), + artifacts_meta: Vec::new(), + extracted_artifacts, log, }) } + /// Adds an artifact with these contents to the by_id and by_hash maps. pub async fn add_artifact( &mut self, artifact_id: ArtifactId, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { // If we don't know this artifact kind, we'll still serve it up by hash, // but we don't do any further processing on it. let Some(artifact_kind) = artifact_id.kind.to_known() else { return self - .add_unknown_artifact( - artifact_id, - artifact_hash, - stream, - by_id, - by_hash, - ) + .add_unknown_artifact(artifact_id, artifact_hash, stream) .await; }; @@ -175,39 +179,25 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_kind, artifact_hash, stream, - by_id, - by_hash, ) .await } KnownArtifactKind::GimletRot | KnownArtifactKind::PscRot | KnownArtifactKind::SwitchRot => { - self.add_rot_artifact( - artifact_id, - artifact_kind, - stream, - by_id, - by_hash, - ) - .await + self.add_rot_artifact(artifact_id, artifact_kind, stream).await } KnownArtifactKind::Host => { - self.add_host_artifact(artifact_id, stream, by_id, by_hash) + self.add_host_artifact(artifact_id, stream) + } + KnownArtifactKind::Trampoline => { + self.add_trampoline_artifact(artifact_id, stream) } - KnownArtifactKind::Trampoline => self.add_trampoline_artifact( - artifact_id, - stream, - by_id, - by_hash, - ), KnownArtifactKind::ControlPlane => { self.add_control_plane_artifact( artifact_id, artifact_hash, stream, - by_id, - by_hash, ) .await } @@ -220,8 +210,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_kind: KnownArtifactKind, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { let sp_map = match artifact_kind { KnownArtifactKind::GimletSp => &mut self.gimlet_sp, @@ -276,10 +264,8 @@ impl<'a> UpdatePlanBuilder<'a> { data: data.clone(), }); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, data, artifact_kind.into(), self.log, @@ -293,8 +279,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_id: ArtifactId, artifact_kind: KnownArtifactKind, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { let (rot_a, rot_a_kind, rot_b, rot_b_kind) = match artifact_kind { KnownArtifactKind::GimletRot => ( @@ -353,18 +337,14 @@ impl<'a> UpdatePlanBuilder<'a> { rot_a.push(ArtifactIdData { id: rot_a_id, data: rot_a_data.clone() }); rot_b.push(ArtifactIdData { id: rot_b_id, data: rot_b_data.clone() }); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id.clone(), - by_id, - by_hash, rot_a_data, rot_a_kind, self.log, )?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, rot_b_data, rot_b_kind, self.log, @@ -377,8 +357,6 @@ impl<'a> UpdatePlanBuilder<'a> { &mut self, artifact_id: ArtifactId, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { if self.host_phase_1.is_some() || self.host_phase_2_hash.is_some() { return Err(RepositoryError::DuplicateArtifactKind( @@ -407,18 +385,14 @@ impl<'a> UpdatePlanBuilder<'a> { Some(ArtifactIdData { id: phase_1_id, data: phase_1_data.clone() }); self.host_phase_2_hash = Some(phase_2_data.hash()); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id.clone(), - by_id, - by_hash, phase_1_data, ArtifactKind::HOST_PHASE_1, self.log, )?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, phase_2_data, ArtifactKind::HOST_PHASE_2, self.log, @@ -431,8 +405,6 @@ impl<'a> UpdatePlanBuilder<'a> { &mut self, artifact_id: ArtifactId, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { if self.trampoline_phase_1.is_some() || self.trampoline_phase_2.is_some() @@ -470,18 +442,14 @@ impl<'a> UpdatePlanBuilder<'a> { self.trampoline_phase_2 = Some(ArtifactIdData { id: phase_2_id, data: phase_2_data.clone() }); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id.clone(), - by_id, - by_hash, phase_1_data, ArtifactKind::TRAMPOLINE_PHASE_1, self.log, )?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, phase_2_data, ArtifactKind::TRAMPOLINE_PHASE_2, self.log, @@ -495,8 +463,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_id: ArtifactId, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { if self.control_plane_hash.is_some() { return Err(RepositoryError::DuplicateArtifactKind( @@ -516,10 +482,8 @@ impl<'a> UpdatePlanBuilder<'a> { self.control_plane_hash = Some(data.hash()); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, data, KnownArtifactKind::ControlPlane.into(), self.log, @@ -533,8 +497,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_id: ArtifactId, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { let artifact_kind = artifact_id.kind.clone(); let artifact_hash_id = @@ -543,10 +505,8 @@ impl<'a> UpdatePlanBuilder<'a> { let data = self.extracted_artifacts.store(artifact_hash_id, stream).await?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, data, artifact_kind, self.log, @@ -660,7 +620,62 @@ impl<'a> UpdatePlanBuilder<'a> { Ok((image1, image2)) } - pub fn build(self) -> Result { + // Record an artifact in `by_id` and `by_hash`, or fail if either already has an + // entry for this id/hash. + fn record_extracted_artifact( + &mut self, + tuf_repo_artifact_id: ArtifactId, + data: ExtractedArtifactDataHandle, + data_kind: ArtifactKind, + log: &Logger, + ) -> Result<(), RepositoryError> { + use std::collections::hash_map::Entry; + + let artifact_hash_id = + ArtifactHashId { kind: data_kind.clone(), hash: data.hash() }; + + let by_hash_slot = match self.by_hash.entry(artifact_hash_id) { + Entry::Occupied(slot) => { + return Err(RepositoryError::DuplicateHashEntry( + slot.key().clone(), + )); + } + Entry::Vacant(slot) => slot, + }; + + info!( + log, "added artifact"; + "name" => %tuf_repo_artifact_id.name, + "kind" => %by_hash_slot.key().kind, + "version" => %tuf_repo_artifact_id.version, + "hash" => %by_hash_slot.key().hash, + "length" => data.file_size(), + ); + + self.by_id + .entry(tuf_repo_artifact_id.clone()) + .or_default() + .push(by_hash_slot.key().clone()); + + // In the artifacts_meta document, use the expanded artifact ID + // (artifact kind = data_kind, and name and version from + // tuf_repo_artifact_id). + let artifacts_meta_id = ArtifactId { + name: tuf_repo_artifact_id.name, + version: tuf_repo_artifact_id.version, + kind: data_kind, + }; + self.artifacts_meta.push(TufArtifactMeta { + id: artifacts_meta_id, + hash: data.hash(), + size: data.file_size() as u64, + }); + by_hash_slot.insert(data); + + Ok(()) + } + + pub fn build(self) -> Result { // Ensure our multi-board-supporting kinds have at least one board // present. for (kind, no_artifacts) in [ @@ -738,7 +753,7 @@ impl<'a> UpdatePlanBuilder<'a> { } } - Ok(UpdatePlan { + let plan = UpdatePlan { system_version: self.system_version, gimlet_sp: self.gimlet_sp, // checked above gimlet_rot_a: self.gimlet_rot_a, // checked above @@ -770,10 +785,24 @@ impl<'a> UpdatePlanBuilder<'a> { KnownArtifactKind::ControlPlane, ), )?, + }; + Ok(UpdatePlanBuildOutput { + plan, + by_id: self.by_id, + by_hash: self.by_hash, + artifacts_meta: self.artifacts_meta, }) } } +/// The output of [`UpdatePlanBuilder::build`]. +pub struct UpdatePlanBuildOutput { + pub plan: UpdatePlan, + pub by_id: BTreeMap>, + pub by_hash: HashMap, + pub artifacts_meta: Vec, +} + // This function takes and returns `id` to avoid an unnecessary clone; `id` will // be present in either the Ok tuple or the error. fn read_hubris_board_from_archive( @@ -807,48 +836,6 @@ fn read_hubris_board_from_archive( Ok((id, Board(board.to_string()))) } -// Record an artifact in `by_id` and `by_hash`, or fail if either already has an -// entry for this id/hash. -fn record_extracted_artifact( - tuf_repo_artifact_id: ArtifactId, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, - data: ExtractedArtifactDataHandle, - data_kind: ArtifactKind, - log: &Logger, -) -> Result<(), RepositoryError> { - use std::collections::hash_map::Entry; - - let artifact_hash_id = - ArtifactHashId { kind: data_kind, hash: data.hash() }; - - let by_hash_slot = match by_hash.entry(artifact_hash_id) { - Entry::Occupied(slot) => { - return Err(RepositoryError::DuplicateHashEntry( - slot.key().clone(), - )); - } - Entry::Vacant(slot) => slot, - }; - - info!( - log, "added artifact"; - "name" => %tuf_repo_artifact_id.name, - "kind" => %by_hash_slot.key().kind, - "version" => %tuf_repo_artifact_id.version, - "hash" => %by_hash_slot.key().hash, - "length" => data.file_size(), - ); - - by_id - .entry(tuf_repo_artifact_id) - .or_default() - .push(by_hash_slot.key().clone()); - by_hash_slot.insert(data); - - Ok(()) -} - #[cfg(test)] mod tests { use std::collections::BTreeSet; @@ -962,13 +949,11 @@ mod tests { let logctx = test_setup_log("test_update_plan_from_artifacts"); - let mut by_id = BTreeMap::new(); - let mut by_hash = HashMap::new(); let mut plan_builder = UpdatePlanBuilder::new("0.0.0".parse().unwrap(), &logctx.log) .unwrap(); - // Add a couple artifacts with kinds wicketd doesn't understand; it + // Add a couple artifacts with kinds wicketd/nexus don't understand; it // should still ingest and serve them. let mut expected_unknown_artifacts = BTreeSet::new(); @@ -986,8 +971,6 @@ mod tests { id, hash, futures::stream::iter([Ok(Bytes::from(data))]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1009,8 +992,6 @@ mod tests { id, hash, futures::stream::iter([Ok(Bytes::from(data))]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1038,8 +1019,6 @@ mod tests { id, hash, futures::stream::iter([Ok(Bytes::from(data))]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1067,8 +1046,6 @@ mod tests { id, hash, futures::stream::iter([Ok(data.clone())]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1095,14 +1072,13 @@ mod tests { id, hash, futures::stream::iter([Ok(data.clone())]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); } - let plan = plan_builder.build().unwrap(); + let UpdatePlanBuildOutput { plan, by_id, .. } = + plan_builder.build().unwrap(); assert_eq!(plan.gimlet_sp.len(), 2); assert_eq!(plan.psc_sp.len(), 2); diff --git a/update-common/src/errors.rs b/update-common/src/errors.rs index 5fba43b944..4d992e70b2 100644 --- a/update-common/src/errors.rs +++ b/update-common/src/errors.rs @@ -21,8 +21,20 @@ pub enum RepositoryError { #[error("error creating temporary directory")] TempDirCreate(#[source] std::io::Error), + #[error("error creating temporary file")] + TempFileCreate(#[source] std::io::Error), + + #[error("error reading chunk off of input stream")] + ReadChunkFromStream(#[source] HttpError), + + #[error("error writing to temporary file")] + TempFileWrite(#[source] std::io::Error), + + #[error("error flushing temporary file")] + TempFileFlush(#[source] std::io::Error), + #[error("error creating temporary file in {path}")] - TempFileCreate { + NamedTempFileCreate { path: Utf8PathBuf, #[source] error: std::io::Error, @@ -138,10 +150,21 @@ impl RepositoryError { // Errors we had that are unrelated to the contents of a repository // uploaded by a client. RepositoryError::TempDirCreate(_) - | RepositoryError::TempFileCreate { .. } => { + | RepositoryError::TempFileCreate(_) + | RepositoryError::TempFileWrite(_) + | RepositoryError::TempFileFlush(_) + | RepositoryError::NamedTempFileCreate { .. } => { HttpError::for_unavail(None, message) } + // This error is bubbled up. + RepositoryError::ReadChunkFromStream(error) => HttpError { + status_code: error.status_code, + error_code: error.error_code.clone(), + external_message: error.external_message.clone(), + internal_message: error.internal_message.clone(), + }, + // Errors that are definitely caused by bad repository contents. RepositoryError::DuplicateArtifactKind(_) | RepositoryError::LocateTarget { .. } diff --git a/wicketd/src/artifacts/store.rs b/wicketd/src/artifacts/store.rs index a5f24993a8..01543432a2 100644 --- a/wicketd/src/artifacts/store.rs +++ b/wicketd/src/artifacts/store.rs @@ -3,11 +3,9 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::http_entrypoints::InstallableArtifacts; -use dropshot::HttpError; use omicron_common::api::external::SemverVersion; use omicron_common::update::ArtifactHashId; use slog::Logger; -use std::io; use std::sync::Arc; use std::sync::Mutex; use update_common::artifacts::ArtifactsWithPlan; @@ -32,22 +30,12 @@ impl WicketdArtifactStore { Self { log, artifacts_with_plan: Default::default() } } - pub(crate) async fn put_repository( + pub(crate) fn set_artifacts_with_plan( &self, - data: T, - ) -> Result<(), HttpError> - where - T: io::Read + io::Seek + Send + 'static, - { - slog::debug!(self.log, "adding repository"); - - let log = self.log.clone(); - let new_artifacts = ArtifactsWithPlan::from_zip(data, &log) - .await - .map_err(|error| error.to_http_error())?; - self.replace(new_artifacts); - - Ok(()) + artifacts_with_plan: ArtifactsWithPlan, + ) { + slog::debug!(self.log, "setting artifacts_with_plan"); + self.replace(artifacts_with_plan); } pub(crate) fn system_version_and_artifact_ids( diff --git a/wicketd/src/http_entrypoints.rs b/wicketd/src/http_entrypoints.rs index dbd3e31072..9c1740679f 100644 --- a/wicketd/src/http_entrypoints.rs +++ b/wicketd/src/http_entrypoints.rs @@ -25,7 +25,6 @@ use dropshot::Path; use dropshot::RequestContext; use dropshot::StreamingBody; use dropshot::TypedBody; -use futures::TryStreamExt; use gateway_client::types::IgnitionCommand; use gateway_client::types::SpIdentifier; use gateway_client::types::SpType; @@ -44,11 +43,9 @@ use sled_hardware::Baseboard; use slog::o; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::io; use std::net::IpAddr; use std::net::Ipv6Addr; use std::time::Duration; -use tokio::io::AsyncWriteExt; use wicket_common::rack_setup::PutRssUserConfigInsensitive; use wicket_common::update_events::EventReport; use wicket_common::WICKETD_TIMEOUT; @@ -570,44 +567,7 @@ async fn put_repository( ) -> Result { let rqctx = rqctx.context(); - // Create a temporary file to store the incoming archive. - let tempfile = tokio::task::spawn_blocking(|| { - camino_tempfile::tempfile().map_err(|err| { - HttpError::for_unavail( - None, - format!("failed to create temp file: {err}"), - ) - }) - }) - .await - .unwrap()?; - let mut tempfile = - tokio::io::BufWriter::new(tokio::fs::File::from_std(tempfile)); - - let mut body = std::pin::pin!(body.into_stream()); - - // Stream the uploaded body into our tempfile. - while let Some(bytes) = body.try_next().await? { - tempfile.write_all(&bytes).await.map_err(|err| { - HttpError::for_unavail( - None, - format!("failed to write to temp file: {err}"), - ) - })?; - } - - // Flush writes. We don't need to seek back to the beginning of the file - // because extracting the repository will do its own seeking as a part of - // unzipping this repo. - tempfile.flush().await.map_err(|err| { - HttpError::for_unavail( - None, - format!("failed to flush temp file: {err}"), - ) - })?; - - let tempfile = tempfile.into_inner().into_std().await; - rqctx.update_tracker.put_repository(io::BufReader::new(tempfile)).await?; + rqctx.update_tracker.put_repository(body.into_stream()).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index 823a7964de..eec3ee5868 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -18,8 +18,10 @@ use anyhow::bail; use anyhow::ensure; use anyhow::Context; use base64::Engine; +use bytes::Bytes; use display_error_chain::DisplayErrorChain; use dropshot::HttpError; +use futures::Stream; use futures::TryFutureExt; use gateway_client::types::HostPhase2Progress; use gateway_client::types::HostPhase2RecoveryImageId; @@ -48,7 +50,6 @@ use slog::Logger; use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::io; use std::net::SocketAddrV6; use std::sync::atomic::AtomicBool; use std::sync::Arc; @@ -64,6 +65,7 @@ use tokio::sync::Mutex; use tokio::task::JoinHandle; use tokio_util::io::StreamReader; use update_common::artifacts::ArtifactIdData; +use update_common::artifacts::ArtifactsWithPlan; use update_common::artifacts::UpdatePlan; use update_engine::events::ProgressUnits; use update_engine::AbortHandle; @@ -342,15 +344,21 @@ impl UpdateTracker { } /// Updates the repository stored inside the update tracker. - pub(crate) async fn put_repository( + pub(crate) async fn put_repository( &self, - data: T, - ) -> Result<(), HttpError> - where - T: io::Read + io::Seek + Send + 'static, - { + stream: impl Stream> + Send + 'static, + ) -> Result<(), HttpError> { + // Build the ArtifactsWithPlan from the stream. + let artifacts_with_plan = ArtifactsWithPlan::from_stream( + stream, + // We don't have a good file name here because file contents are + // uploaded over stdin, so let ArtifactsWithPlan pick the name. + None, &self.log, + ) + .await + .map_err(|error| error.to_http_error())?; let mut update_data = self.sp_update_data.lock().await; - update_data.put_repository(data).await + update_data.set_artifacts_with_plan(artifacts_with_plan).await } /// Gets a list of artifacts stored in the update repository. @@ -725,10 +733,10 @@ impl UpdateTrackerData { } } - async fn put_repository(&mut self, data: T) -> Result<(), HttpError> - where - T: io::Read + io::Seek + Send + 'static, - { + async fn set_artifacts_with_plan( + &mut self, + artifacts_with_plan: ArtifactsWithPlan, + ) -> Result<(), HttpError> { // Are there any updates currently running? If so, then reject the new // repository. let running_sps = self @@ -745,8 +753,8 @@ impl UpdateTrackerData { )); } - // Put the repository into the artifact store. - self.artifact_store.put_repository(data).await?; + // Set the new artifacts_with_plan. + self.artifact_store.set_artifacts_with_plan(artifacts_with_plan); // Reset all running data: a new repository means starting afresh. self.sp_update_data.clear(); From f7ad3153a175b253d73a34f2597266c32280d153 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 05:31:12 +0000 Subject: [PATCH 41/91] Update taiki-e/install-action digest to 9f9bf5e (#4893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`cf2d7f1` -> `9f9bf5e`](https://togithub.com/taiki-e/install-action/compare/cf2d7f1...9f9bf5e) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 06da0395a1..46d09c0940 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@cf2d7f1118304815479579570ad3ec572fe94523 # v2 + uses: taiki-e/install-action@9f9bf5e8df111848fb25b8a97a361d8963025899 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 2634aad9147972cc3e046663903b0404d121609e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 08:15:59 +0000 Subject: [PATCH 42/91] Update Rust crate rustyline to v13 (#4897) --- Cargo.lock | 48 +++++++++++++++++++++++------------------------- Cargo.toml | 2 +- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2f3e1a949..837d42fbc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1003,13 +1003,11 @@ checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" [[package]] name = "clipboard-win" -version = "4.5.0" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" +checksum = "c57002a5d9be777c1ef967e33674dac9ebd310d8893e4e3437b14d5f0f6372cc" dependencies = [ "error-code", - "str-buf", - "winapi", ] [[package]] @@ -2161,13 +2159,9 @@ dependencies = [ [[package]] name = "error-code" -version = "2.3.1" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" -dependencies = [ - "libc", - "str-buf", -] +checksum = "281e452d3bad4005426416cdba5ccfd4f5c1280e10099e21db27f7c1c28347fc" [[package]] name = "expectorate" @@ -2215,6 +2209,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "fd-lock" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" +dependencies = [ + "cfg-if", + "rustix 0.38.30", + "windows-sys 0.52.0", +] + [[package]] name = "ff" version = "0.13.0" @@ -4452,11 +4457,11 @@ dependencies = [ [[package]] name = "nix" -version = "0.26.4" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "cfg-if", "libc", ] @@ -6759,7 +6764,7 @@ checksum = "68f4e89a0f80909b3ca4bca9759ed37e4bfddb6f5d2ffb1b4ceb2b1638a3e1eb" dependencies = [ "chrono", "crossterm", - "fd-lock", + "fd-lock 3.0.13", "itertools 0.12.0", "nu-ansi-term", "serde", @@ -7337,21 +7342,20 @@ dependencies = [ [[package]] name = "rustyline" -version = "12.0.0" +version = "13.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" +checksum = "02a2d683a4ac90aeef5b1013933f6d977bd37d51ff3f4dad829d4931a7e6be86" dependencies = [ "bitflags 2.4.0", "cfg-if", "clipboard-win", - "fd-lock", + "fd-lock 4.0.2", "home", "libc", "log", "memchr", - "nix 0.26.4", + "nix 0.27.1", "radix_trie", - "scopeguard", "unicode-segmentation", "unicode-width", "utf8parse", @@ -8347,12 +8351,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "str-buf" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" - [[package]] name = "string_cache" version = "0.8.7" diff --git a/Cargo.toml b/Cargo.toml index ed54ae8c6a..ba328fe612 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -324,7 +324,7 @@ rstest = "0.18.2" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.0.0" -rustyline = "12.0.0" +rustyline = "13.0.0" samael = { version = "0.0.14", features = ["xmlsec"] } schemars = "0.8.16" secrecy = "0.8.0" From 98ab7e2bbbfc83f592ad6a3c0ca8afc48a81c9a9 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Thu, 25 Jan 2024 10:52:27 -0800 Subject: [PATCH 43/91] Update progenitor from v0.4.0 -> v0.5.0 (#4874) --- Cargo.lock | 152 +++++++++--------- clients/dns-service-client/src/lib.rs | 6 +- common/src/api/external/error.rs | 49 ++---- .../app/sagas/switch_port_settings_apply.rs | 2 +- .../app/sagas/switch_port_settings_clear.rs | 2 +- openapi/bootstrap-agent.json | 4 +- openapi/nexus-internal.json | 23 +-- openapi/sled-agent.json | 7 +- openapi/wicketd.json | 22 ++- schema/rss-service-plan-v2.json | 4 + schema/rss-sled-plan.json | 4 +- sled-agent/src/instance.rs | 6 +- sled-agent/src/sim/http_entrypoints_pantry.rs | 9 ++ wicketd/src/preflight_check/uplink.rs | 9 +- workspace-hack/Cargo.toml | 8 +- 15 files changed, 161 insertions(+), 146 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 837d42fbc2..7ea3d2b96d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,7 +174,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -275,7 +275,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -297,7 +297,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -308,7 +308,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -359,7 +359,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -496,7 +496,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.46", + "syn 2.0.48", "which", ] @@ -992,7 +992,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1416,7 +1416,7 @@ checksum = "83fdaf97f4804dcebfa5862639bc9ce4121e82140bec2a987ac5140294865b5b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1464,7 +1464,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1486,7 +1486,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core 0.20.3", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1518,7 +1518,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1570,7 +1570,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1603,7 +1603,7 @@ checksum = "5fe87ce4529967e0ba1dcf8450bab64d97dfd5010a6256187ffe2e43e6f0e049" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1623,7 +1623,7 @@ checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1710,7 +1710,7 @@ dependencies = [ "diesel_table_macro_syntax", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1719,7 +1719,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" dependencies = [ - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1962,7 +1962,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -2324,7 +2324,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -2441,7 +2441,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -3665,7 +3665,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -4077,7 +4077,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -4404,7 +4404,7 @@ version = "0.1.0" dependencies = [ "omicron-workspace-hack", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -4557,7 +4557,7 @@ checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5283,7 +5283,7 @@ dependencies = [ "string_cache", "subtle", "syn 1.0.109", - "syn 2.0.46", + "syn 2.0.48", "time", "time-macros", "tokio", @@ -5397,7 +5397,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5682,7 +5682,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5834,7 +5834,7 @@ dependencies = [ "regex", "regex-syntax 0.7.5", "structmeta", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5980,7 +5980,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6050,7 +6050,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6299,7 +6299,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6347,17 +6347,17 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.74" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2de98502f212cfcea8d0bb305bd0f49d7ebdd75b64ba0a68f937d888f4e0d6db" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "progenitor" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6367,8 +6367,8 @@ dependencies = [ [[package]] name = "progenitor-client" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "bytes", "futures-core", @@ -6381,8 +6381,8 @@ dependencies = [ [[package]] name = "progenitor-impl" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "getopts", "heck 0.4.1", @@ -6395,7 +6395,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "syn 2.0.46", + "syn 2.0.48", "thiserror", "typify", "unicode-ident", @@ -6403,8 +6403,8 @@ dependencies = [ [[package]] name = "progenitor-macro" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "openapiv3", "proc-macro2", @@ -6415,7 +6415,7 @@ dependencies = [ "serde_json", "serde_tokenstream 0.2.0", "serde_yaml", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6793,7 +6793,7 @@ checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7040,7 +7040,7 @@ dependencies = [ "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.46", + "syn 2.0.48", "unicode-ident", ] @@ -7601,7 +7601,7 @@ checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7662,7 +7662,7 @@ checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7694,7 +7694,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7735,7 +7735,7 @@ dependencies = [ "darling 0.20.3", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8049,7 +8049,7 @@ source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f6 dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8304,7 +8304,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8405,7 +8405,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8416,7 +8416,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8475,7 +8475,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8523,9 +8523,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.46" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89456b690ff72fddcecf231caedbe615c59480c93358a93dfae7fc29e3ebbf0e" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -8707,7 +8707,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8732,22 +8732,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.49" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.49" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8934,7 +8934,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -9201,7 +9201,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -9427,8 +9427,8 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" +version = "0.0.15" +source = "git+https://github.com/oxidecomputer/typify#1f97f167923f001818d461b1286f8a5242abf8b1" dependencies = [ "typify-impl", "typify-macro", @@ -9436,8 +9436,8 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" +version = "0.0.15" +source = "git+https://github.com/oxidecomputer/typify#1f97f167923f001818d461b1286f8a5242abf8b1" dependencies = [ "heck 0.4.1", "log", @@ -9446,15 +9446,15 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.46", + "syn 2.0.48", "thiserror", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" +version = "0.0.15" +source = "git+https://github.com/oxidecomputer/typify#1f97f167923f001818d461b1286f8a5242abf8b1" dependencies = [ "proc-macro2", "quote", @@ -9462,7 +9462,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", "typify-impl", ] @@ -9852,7 +9852,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", "wasm-bindgen-shared", ] @@ -9886,7 +9886,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -10429,7 +10429,7 @@ checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -10440,7 +10440,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -10460,7 +10460,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] diff --git a/clients/dns-service-client/src/lib.rs b/clients/dns-service-client/src/lib.rs index 931e68322f..e437f1a7f6 100644 --- a/clients/dns-service-client/src/lib.rs +++ b/clients/dns-service-client/src/lib.rs @@ -29,8 +29,10 @@ pub fn is_retryable(error: &DnsConfigError) -> bool { let response_value = match error { DnsConfigError::CommunicationError(_) => return true, DnsConfigError::InvalidRequest(_) - | DnsConfigError::InvalidResponsePayload(_) - | DnsConfigError::UnexpectedResponse(_) => return false, + | DnsConfigError::InvalidResponsePayload(_, _) + | DnsConfigError::UnexpectedResponse(_) + | DnsConfigError::InvalidUpgrade(_) + | DnsConfigError::ResponseBodyError(_) => return false, DnsConfigError::ErrorResponse(response_value) => response_value, }; diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index 2661db7bb6..a3876fcac3 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -487,20 +487,19 @@ pub trait ClientError: std::fmt::Debug { impl From> for Error { fn from(e: progenitor::progenitor_client::Error) -> Self { match e { - // This error indicates that the inputs were not valid for this API - // call. It's reflective of either a client-side programming error. - progenitor::progenitor_client::Error::InvalidRequest(msg) => { - Error::internal_error(&format!("InvalidRequest: {}", msg)) + // For most error variants, we delegate to the display impl for the + // Progenitor error type, but we pick apart an error response more + // carefully. + progenitor::progenitor_client::Error::InvalidRequest(_) + | progenitor::progenitor_client::Error::CommunicationError(_) + | progenitor::progenitor_client::Error::InvalidResponsePayload( + .., + ) + | progenitor::progenitor_client::Error::UnexpectedResponse(_) + | progenitor::progenitor_client::Error::InvalidUpgrade(_) + | progenitor::progenitor_client::Error::ResponseBodyError(_) => { + Error::internal_error(&e.to_string()) } - - // This error indicates a problem with the request to the remote - // service that did not result in an HTTP response code, but rather - // pertained to local (i.e. client-side) encoding or network - // communication. - progenitor::progenitor_client::Error::CommunicationError(ee) => { - Error::internal_error(&format!("CommunicationError: {}", ee)) - } - // This error represents an expected error from the remote service. progenitor::progenitor_client::Error::ErrorResponse(rv) => { let message = rv.message(); @@ -515,30 +514,6 @@ impl From> for Error { _ => Error::internal_error(&message), } } - - // This error indicates that the body returned by the client didn't - // match what was documented in the OpenAPI description for the - // service. This could only happen for us in the case of a severe - // logic/encoding bug in the remote service or due to a failure of - // our version constraints (i.e. that the call was to a newer - // service with an incompatible response). - progenitor::progenitor_client::Error::InvalidResponsePayload( - ee, - ) => Error::internal_error(&format!( - "InvalidResponsePayload: {}", - ee, - )), - - // This error indicates that the client generated a response code - // that was not described in the OpenAPI description for the - // service; this could be a success or failure response, but either - // way it indicates a logic or version error as above. - progenitor::progenitor_client::Error::UnexpectedResponse(r) => { - Error::internal_error(&format!( - "UnexpectedResponse: status code {}", - r.status(), - )) - } } } } diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 979ec54afd..9d0573f6b0 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -307,7 +307,7 @@ async fn spa_undo_ensure_switch_port_settings( let log = sagactx.user_data().log(); let port_id: PortId = PortId::from_str(¶ms.switch_port_name) - .map_err(|e| external::Error::internal_error(e))?; + .map_err(|e| external::Error::internal_error(e.to_string().as_str()))?; let orig_port_settings_id = sagactx .lookup::>("original_switch_port_settings_id") diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index ff79de8e8e..15290dd75b 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -187,7 +187,7 @@ async fn spa_undo_clear_switch_port_settings( let log = sagactx.user_data().log(); let port_id: PortId = PortId::from_str(¶ms.port_name) - .map_err(|e| external::Error::internal_error(e))?; + .map_err(|e| external::Error::internal_error(e.to_string().as_str()))?; let orig_port_settings_id = sagactx .lookup::>("original_switch_port_settings_id") diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 2a7ff43202..6fd83cef47 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -355,6 +355,7 @@ ] }, "Certificate": { + "description": "Certificate\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"cert\", \"key\" ], \"properties\": { \"cert\": { \"type\": \"string\" }, \"key\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "cert": { @@ -903,6 +904,7 @@ "format": "uuid" }, "RecoverySiloConfig": { + "description": "RecoverySiloConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"silo_name\", \"user_name\", \"user_password_hash\" ], \"properties\": { \"silo_name\": { \"$ref\": \"#/components/schemas/Name\" }, \"user_name\": { \"$ref\": \"#/components/schemas/UserId\" }, \"user_password_hash\": { \"$ref\": \"#/components/schemas/NewPasswordHash\" } } } ```
", "type": "object", "properties": { "silo_name": { @@ -967,7 +969,7 @@ ] }, "UserId": { - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" } }, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 2a047068ee..8b0807d52c 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3218,6 +3218,7 @@ ] }, "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", "type": "object", "properties": { "generation": { @@ -3243,6 +3244,7 @@ ] }, "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "records": { @@ -3264,6 +3266,7 @@ ] }, "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -4189,6 +4192,7 @@ ] }, "IpNet": { + "description": "IpNet\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"title\": \"v4\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Ipv4Net\" } ] }, { \"title\": \"v6\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Ipv6Net\" } ] } ] } ```
", "anyOf": [ { "$ref": "#/components/schemas/Ipv4Net" @@ -4286,7 +4290,7 @@ ] }, "Ipv4Net": { - "description": "An IPv4 subnet, including prefix and subnet mask", + "description": "An IPv4 subnet, including prefix and subnet mask\n\n
JSON schema\n\n```json { \"title\": \"An IPv4 subnet\", \"description\": \"An IPv4 subnet, including prefix and subnet mask\", \"examples\": [ \"192.168.1.0/24\" ], \"type\": \"string\", \"pattern\": \"^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$\" } ```
", "type": "string" }, "Ipv4Network": { @@ -4312,7 +4316,7 @@ ] }, "Ipv6Net": { - "description": "An IPv6 subnet, including prefix and subnet mask", + "description": "An IPv6 subnet, including prefix and subnet mask\n\n
JSON schema\n\n```json { \"title\": \"An IPv6 subnet\", \"description\": \"An IPv6 subnet, including prefix and subnet mask\", \"examples\": [ \"fd12:3456::/64\" ], \"type\": \"string\", \"pattern\": \"^([fF][dD])[0-9a-fA-F]{2}:(([0-9a-fA-F]{1,4}:){6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,6}:)([0-9a-fA-F]{1,4})?\\\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$\" } ```
", "type": "string" }, "Ipv6Network": { @@ -4654,7 +4658,7 @@ "maxLength": 63 }, "NetworkInterface": { - "description": "Information required to construct a virtual network interface", + "description": "Information required to construct a virtual network interface\n\n
JSON schema\n\n```json { \"description\": \"Information required to construct a virtual network interface\", \"type\": \"object\", \"required\": [ \"id\", \"ip\", \"kind\", \"mac\", \"name\", \"primary\", \"slot\", \"subnet\", \"vni\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"ip\": { \"type\": \"string\", \"format\": \"ip\" }, \"kind\": { \"$ref\": \"#/components/schemas/NetworkInterfaceKind\" }, \"mac\": { \"$ref\": \"#/components/schemas/MacAddr\" }, \"name\": { \"$ref\": \"#/components/schemas/Name\" }, \"primary\": { \"type\": \"boolean\" }, \"slot\": { \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 }, \"subnet\": { \"$ref\": \"#/components/schemas/IpNet\" }, \"vni\": { \"$ref\": \"#/components/schemas/Vni\" } } } ```
", "type": "object", "properties": { "id": { @@ -4702,7 +4706,7 @@ ] }, "NetworkInterfaceKind": { - "description": "The type of network interface", + "description": "The type of network interface\n\n
JSON schema\n\n```json { \"description\": \"The type of network interface\", \"oneOf\": [ { \"description\": \"A vNIC attached to a guest instance\", \"type\": \"object\", \"required\": [ \"id\", \"type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"instance\" ] } } }, { \"description\": \"A vNIC associated with an internal service\", \"type\": \"object\", \"required\": [ \"id\", \"type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"service\" ] } } } ] } ```
", "oneOf": [ { "description": "A vNIC attached to a guest instance", @@ -4756,7 +4760,7 @@ "type": "string" }, "OmicronZoneConfig": { - "description": "Describes one Omicron-managed zone running on a sled", + "description": "Describes one Omicron-managed zone running on a sled\n\n
JSON schema\n\n```json { \"description\": \"Describes one Omicron-managed zone running on a sled\", \"type\": \"object\", \"required\": [ \"id\", \"underlay_address\", \"zone_type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"underlay_address\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"zone_type\": { \"$ref\": \"#/components/schemas/OmicronZoneType\" } } } ```
", "type": "object", "properties": { "id": { @@ -4778,7 +4782,7 @@ ] }, "OmicronZoneDataset": { - "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "description": "Describes a persistent ZFS dataset associated with an Omicron zone\n\n
JSON schema\n\n```json { \"description\": \"Describes a persistent ZFS dataset associated with an Omicron zone\", \"type\": \"object\", \"required\": [ \"pool_name\" ], \"properties\": { \"pool_name\": { \"$ref\": \"#/components/schemas/ZpoolName\" } } } ```
", "type": "object", "properties": { "pool_name": { @@ -4790,7 +4794,7 @@ ] }, "OmicronZoneType": { - "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration\n\n
JSON schema\n\n```json { \"description\": \"Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"address\", \"dns_servers\", \"nic\", \"ntp_servers\", \"snat_cfg\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dns_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"domain\": { \"type\": [ \"string\", \"null\" ] }, \"nic\": { \"description\": \"The service vNIC providing outbound connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"ntp_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } }, \"snat_cfg\": { \"description\": \"The SNAT configuration for outbound connections.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/SourceNatConfig\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"boundary_ntp\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"clickhouse\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"clickhouse_keeper\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"cockroach_db\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"crucible\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"crucible_pantry\" ] } } }, { \"type\": \"object\", \"required\": [ \"dataset\", \"dns_address\", \"http_address\", \"nic\", \"type\" ], \"properties\": { \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"dns_address\": { \"description\": \"The address at which the external DNS server is reachable.\", \"type\": \"string\" }, \"http_address\": { \"description\": \"The address at which the external DNS server API is reachable.\", \"type\": \"string\" }, \"nic\": { \"description\": \"The service vNIC providing external connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"external_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"dataset\", \"dns_address\", \"gz_address\", \"gz_address_index\", \"http_address\", \"type\" ], \"properties\": { \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"dns_address\": { \"type\": \"string\" }, \"gz_address\": { \"description\": \"The addresses in the global zone which should be created\\n\\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.\", \"type\": \"string\", \"format\": \"ipv6\" }, \"gz_address_index\": { \"description\": \"The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.\", \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"http_address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"internal_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dns_servers\", \"ntp_servers\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dns_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"domain\": { \"type\": [ \"string\", \"null\" ] }, \"ntp_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } }, \"type\": { \"type\": \"string\", \"enum\": [ \"internal_ntp\" ] } } }, { \"type\": \"object\", \"required\": [ \"external_dns_servers\", \"external_ip\", \"external_tls\", \"internal_address\", \"nic\", \"type\" ], \"properties\": { \"external_dns_servers\": { \"description\": \"External DNS servers Nexus can use to resolve external hosts.\", \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"external_ip\": { \"description\": \"The address at which the external nexus server is reachable.\", \"type\": \"string\", \"format\": \"ip\" }, \"external_tls\": { \"description\": \"Whether Nexus's external endpoint should use TLS\", \"type\": \"boolean\" }, \"internal_address\": { \"description\": \"The address at which the internal nexus server is reachable.\", \"type\": \"string\" }, \"nic\": { \"description\": \"The service vNIC providing external connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"nexus\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"oximeter\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -5135,7 +5139,7 @@ ] }, "OmicronZonesConfig": { - "description": "Describes the set of Omicron-managed zones running on a sled", + "description": "Describes the set of Omicron-managed zones running on a sled\n\n
JSON schema\n\n```json { \"description\": \"Describes the set of Omicron-managed zones running on a sled\", \"type\": \"object\", \"required\": [ \"generation\", \"zones\" ], \"properties\": { \"generation\": { \"description\": \"generation number of this configuration\\n\\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\\n\\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Generation\" } ] }, \"zones\": { \"description\": \"list of running zones\", \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/OmicronZoneConfig\" } } } } ```
", "type": "object", "properties": { "generation": { @@ -6386,6 +6390,7 @@ ] }, "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", "type": "object", "properties": { "port": { @@ -6499,7 +6504,7 @@ "minimum": 0 }, "ZpoolName": { - "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique\n\n
JSON schema\n\n```json { \"title\": \"The name of a Zpool\", \"description\": \"Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique\", \"type\": \"string\", \"pattern\": \"^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$\" } ```
", "type": "string" }, "ZpoolPutRequest": { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 3e3f6abec6..7b9a3efcda 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2645,6 +2645,7 @@ ] }, "CrucibleOpts": { + "description": "CrucibleOpts\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"lossy\", \"read_only\", \"target\" ], \"properties\": { \"cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"control\": { \"type\": [ \"string\", \"null\" ] }, \"flush_timeout\": { \"type\": [ \"number\", \"null\" ], \"format\": \"float\" }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"key\": { \"type\": [ \"string\", \"null\" ] }, \"key_pem\": { \"type\": [ \"string\", \"null\" ] }, \"lossy\": { \"type\": \"boolean\" }, \"read_only\": { \"type\": \"boolean\" }, \"root_cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"target\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } } } } ```
", "type": "object", "properties": { "cert_pem": { @@ -3410,6 +3411,7 @@ ] }, "DiskRequest": { + "description": "DiskRequest\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"device\", \"name\", \"read_only\", \"slot\", \"volume_construction_request\" ], \"properties\": { \"device\": { \"type\": \"string\" }, \"name\": { \"type\": \"string\" }, \"read_only\": { \"type\": \"boolean\" }, \"slot\": { \"$ref\": \"#/components/schemas/Slot\" }, \"volume_construction_request\": { \"$ref\": \"#/components/schemas/VolumeConstructionRequest\" } } } ```
", "type": "object", "properties": { "device": { @@ -6332,7 +6334,7 @@ ] }, "SledRole": { - "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", + "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.\n\n
JSON schema\n\n```json { \"description\": \"Describes the role of the sled within the rack.\\n\\nNote that this may change if the sled is physically moved within the rack.\", \"oneOf\": [ { \"description\": \"The sled is a general compute sled.\", \"type\": \"string\", \"enum\": [ \"gimlet\" ] }, { \"description\": \"The sled is attached to the network switch, and has additional responsibilities.\", \"type\": \"string\", \"enum\": [ \"scrimlet\" ] } ] } ```
", "oneOf": [ { "description": "The sled is a general compute sled.", @@ -6351,7 +6353,7 @@ ] }, "Slot": { - "description": "A stable index which is translated by Propolis into a PCI BDF, visible to the guest.", + "description": "A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\n\n
JSON schema\n\n```json { \"description\": \"A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\", \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 } ```
", "type": "integer", "format": "uint8", "minimum": 0 @@ -6602,6 +6604,7 @@ "minimum": 0 }, "VolumeConstructionRequest": { + "description": "VolumeConstructionRequest\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"block_size\", \"id\", \"sub_volumes\", \"type\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"read_only_parent\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/VolumeConstructionRequest\" } ] }, \"sub_volumes\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/VolumeConstructionRequest\" } }, \"type\": { \"type\": \"string\", \"enum\": [ \"volume\" ] } } }, { \"type\": \"object\", \"required\": [ \"block_size\", \"id\", \"type\", \"url\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"url\" ] }, \"url\": { \"type\": \"string\" } } }, { \"type\": \"object\", \"required\": [ \"block_size\", \"blocks_per_extent\", \"extent_count\", \"gen\", \"opts\", \"type\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"blocks_per_extent\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"extent_count\": { \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"gen\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"opts\": { \"$ref\": \"#/components/schemas/CrucibleOpts\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"region\" ] } } }, { \"type\": \"object\", \"required\": [ \"block_size\", \"id\", \"path\", \"type\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"path\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"file\" ] } } } ] } ```
", "oneOf": [ { "type": "object", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 804b2029c6..300e8412c3 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1628,7 +1628,7 @@ ] }, "PowerState": { - "description": "See RFD 81.\n\nThis enum only lists power states the SP is able to control; higher power states are controlled by ignition.", + "description": "See RFD 81.\n\nThis enum only lists power states the SP is able to control; higher power states are controlled by ignition.\n\n
JSON schema\n\n```json { \"description\": \"See RFD 81.\\n\\nThis enum only lists power states the SP is able to control; higher power states are controlled by ignition.\", \"type\": \"string\", \"enum\": [ \"A0\", \"A1\", \"A2\" ] } ```
", "type": "string", "enum": [ "A0", @@ -2186,6 +2186,7 @@ ] }, "RackInitId": { + "description": "RackInitId\n\n
JSON schema\n\n```json { \"type\": \"string\", \"format\": \"uuid\" } ```
", "type": "string", "format": "uuid" }, @@ -2230,7 +2231,7 @@ ] }, "RackOperationStatus": { - "description": "Current status of any rack-level operation being performed by this bootstrap agent.", + "description": "Current status of any rack-level operation being performed by this bootstrap agent.\n\n
JSON schema\n\n```json { \"description\": \"Current status of any rack-level operation being performed by this bootstrap agent.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initializing\" ] } } }, { \"description\": \"`id` will be none if the rack was already initialized on startup.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackInitId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_panicked\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"resetting\" ] } } }, { \"description\": \"`reset_id` will be None if the rack is in an uninitialized-on-startup, or Some if it is in an uninitialized state due to a reset operation completing.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"reset_id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackResetId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"uninitialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_panicked\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2397,6 +2398,7 @@ ] }, "RackResetId": { + "description": "RackResetId\n\n
JSON schema\n\n```json { \"type\": \"string\", \"format\": \"uuid\" } ```
", "type": "string", "format": "uuid" }, @@ -2444,6 +2446,7 @@ ] }, "RotSlot": { + "description": "RotSlot\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"slot\" ], \"properties\": { \"slot\": { \"type\": \"string\", \"enum\": [ \"a\" ] } } }, { \"type\": \"object\", \"required\": [ \"slot\" ], \"properties\": { \"slot\": { \"type\": \"string\", \"enum\": [ \"b\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2476,6 +2479,7 @@ ] }, "RotState": { + "description": "RotState\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"active\", \"persistent_boot_preference\", \"state\" ], \"properties\": { \"active\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"pending_persistent_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] }, \"persistent_boot_preference\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"slot_a_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"slot_b_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"state\": { \"type\": \"string\", \"enum\": [ \"enabled\" ] }, \"transient_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] } } }, { \"type\": \"object\", \"required\": [ \"message\", \"state\" ], \"properties\": { \"message\": { \"type\": \"string\" }, \"state\": { \"type\": \"string\", \"enum\": [ \"communication_failed\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2570,6 +2574,7 @@ "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, "SpComponentCaboose": { + "description": "SpComponentCaboose\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"board\", \"git_commit\", \"name\", \"version\" ], \"properties\": { \"board\": { \"type\": \"string\" }, \"git_commit\": { \"type\": \"string\" }, \"name\": { \"type\": \"string\" }, \"version\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "board": { @@ -2593,7 +2598,7 @@ ] }, "SpComponentInfo": { - "description": "Overview of a single SP component.", + "description": "Overview of a single SP component.\n\n
JSON schema\n\n```json { \"description\": \"Overview of a single SP component.\", \"type\": \"object\", \"required\": [ \"capabilities\", \"component\", \"description\", \"device\", \"presence\" ], \"properties\": { \"capabilities\": { \"description\": \"`capabilities` is a bitmask; interpret it via [`gateway_messages::DeviceCapabilities`].\", \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"component\": { \"description\": \"The unique identifier for this component.\", \"type\": \"string\" }, \"description\": { \"description\": \"A human-readable description of the component.\", \"type\": \"string\" }, \"device\": { \"description\": \"The name of the physical device.\", \"type\": \"string\" }, \"presence\": { \"description\": \"Whether or not the component is present, to the best of the SP's ability to judge.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/SpComponentPresence\" } ] }, \"serial_number\": { \"description\": \"The component's serial number, if it has one.\", \"type\": [ \"string\", \"null\" ] } } } ```
", "type": "object", "properties": { "capabilities": { @@ -2637,7 +2642,7 @@ ] }, "SpComponentPresence": { - "description": "Description of the presence or absence of a component.\n\nThe presence of some components may vary based on the power state of the sled (e.g., components that time out or appear unavailable if the sled is in A2 may become present when the sled moves to A0).", + "description": "Description of the presence or absence of a component.\n\nThe presence of some components may vary based on the power state of the sled (e.g., components that time out or appear unavailable if the sled is in A2 may become present when the sled moves to A0).\n\n
JSON schema\n\n```json { \"description\": \"Description of the presence or absence of a component.\\n\\nThe presence of some components may vary based on the power state of the sled (e.g., components that time out or appear unavailable if the sled is in A2 may become present when the sled moves to A0).\", \"oneOf\": [ { \"description\": \"The component is present.\", \"type\": \"string\", \"enum\": [ \"present\" ] }, { \"description\": \"The component is not present.\", \"type\": \"string\", \"enum\": [ \"not_present\" ] }, { \"description\": \"The component is present but in a failed or faulty state.\", \"type\": \"string\", \"enum\": [ \"failed\" ] }, { \"description\": \"The SP is unable to determine the presence of the component.\", \"type\": \"string\", \"enum\": [ \"unavailable\" ] }, { \"description\": \"The SP's attempt to determine the presence of the component timed out.\", \"type\": \"string\", \"enum\": [ \"timeout\" ] }, { \"description\": \"The SP's attempt to determine the presence of the component failed.\", \"type\": \"string\", \"enum\": [ \"error\" ] } ] } ```
", "oneOf": [ { "description": "The component is present.", @@ -2684,6 +2689,7 @@ ] }, "SpIdentifier": { + "description": "SpIdentifier\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"slot\", \"type\" ], \"properties\": { \"slot\": { \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"type\": { \"$ref\": \"#/components/schemas/SpType\" } } } ```
", "type": "object", "properties": { "slot": { @@ -2701,7 +2707,7 @@ ] }, "SpIgnition": { - "description": "State of an ignition target.\n\nTODO: Ignition returns much more information than we're reporting here: do we want to expand this?", + "description": "State of an ignition target.\n\nTODO: Ignition returns much more information than we're reporting here: do we want to expand this?\n\n
JSON schema\n\n```json { \"description\": \"State of an ignition target.\\n\\nTODO: Ignition returns much more information than we're reporting here: do we want to expand this?\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"present\" ], \"properties\": { \"present\": { \"type\": \"string\", \"enum\": [ \"no\" ] } } }, { \"type\": \"object\", \"required\": [ \"ctrl_detect_0\", \"ctrl_detect_1\", \"flt_a2\", \"flt_a3\", \"flt_rot\", \"flt_sp\", \"id\", \"power\", \"present\" ], \"properties\": { \"ctrl_detect_0\": { \"type\": \"boolean\" }, \"ctrl_detect_1\": { \"type\": \"boolean\" }, \"flt_a2\": { \"type\": \"boolean\" }, \"flt_a3\": { \"type\": \"boolean\" }, \"flt_rot\": { \"type\": \"boolean\" }, \"flt_sp\": { \"type\": \"boolean\" }, \"id\": { \"$ref\": \"#/components/schemas/SpIgnitionSystemType\" }, \"power\": { \"type\": \"boolean\" }, \"present\": { \"type\": \"string\", \"enum\": [ \"yes\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2766,7 +2772,7 @@ ] }, "SpIgnitionSystemType": { - "description": "TODO: Do we want to bake in specific board names, or use raw u16 ID numbers?", + "description": "TODO: Do we want to bake in specific board names, or use raw u16 ID numbers?\n\n
JSON schema\n\n```json { \"description\": \"TODO: Do we want to bake in specific board names, or use raw u16 ID numbers?\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"system_type\" ], \"properties\": { \"system_type\": { \"type\": \"string\", \"enum\": [ \"gimlet\" ] } } }, { \"type\": \"object\", \"required\": [ \"system_type\" ], \"properties\": { \"system_type\": { \"type\": \"string\", \"enum\": [ \"sidecar\" ] } } }, { \"type\": \"object\", \"required\": [ \"system_type\" ], \"properties\": { \"system_type\": { \"type\": \"string\", \"enum\": [ \"psc\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"system_type\" ], \"properties\": { \"id\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"system_type\": { \"type\": \"string\", \"enum\": [ \"unknown\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2892,6 +2898,7 @@ ] }, "SpState": { + "description": "SpState\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"base_mac_address\", \"hubris_archive_id\", \"model\", \"power_state\", \"revision\", \"rot\", \"serial_number\" ], \"properties\": { \"base_mac_address\": { \"type\": \"array\", \"items\": { \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 }, \"maxItems\": 6, \"minItems\": 6 }, \"hubris_archive_id\": { \"type\": \"string\" }, \"model\": { \"type\": \"string\" }, \"power_state\": { \"$ref\": \"#/components/schemas/PowerState\" }, \"revision\": { \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"rot\": { \"$ref\": \"#/components/schemas/RotState\" }, \"serial_number\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "base_mac_address": { @@ -2936,6 +2943,7 @@ ] }, "SpType": { + "description": "SpType\n\n
JSON schema\n\n```json { \"type\": \"string\", \"enum\": [ \"sled\", \"power\", \"switch\" ] } ```
", "type": "string", "enum": [ "sled", @@ -4691,7 +4699,7 @@ ] }, "IgnitionCommand": { - "description": "Ignition command.", + "description": "Ignition command.\n\n
JSON schema\n\n```json { \"description\": \"Ignition command.\", \"type\": \"string\", \"enum\": [ \"power_on\", \"power_off\", \"power_reset\" ] } ```
", "type": "string", "enum": [ "power_on", diff --git a/schema/rss-service-plan-v2.json b/schema/rss-service-plan-v2.json index 62ce358938..10d8f8ab95 100644 --- a/schema/rss-service-plan-v2.json +++ b/schema/rss-service-plan-v2.json @@ -19,6 +19,7 @@ }, "definitions": { "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", "type": "object", "required": [ "generation", @@ -44,6 +45,7 @@ } }, "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", "type": "object", "required": [ "records", @@ -65,6 +67,7 @@ } }, "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -701,6 +704,7 @@ } }, "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", "type": "object", "required": [ "port", diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 0396ccc685..cbd73ed066 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -227,6 +227,7 @@ ] }, "Certificate": { + "description": "Certificate\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"cert\", \"key\" ], \"properties\": { \"cert\": { \"type\": \"string\" }, \"key\": { \"type\": \"string\" } } } ```
", "type": "object", "required": [ "cert", @@ -594,6 +595,7 @@ } }, "RecoverySiloConfig": { + "description": "RecoverySiloConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"silo_name\", \"user_name\", \"user_password_hash\" ], \"properties\": { \"silo_name\": { \"$ref\": \"#/components/schemas/Name\" }, \"user_name\": { \"$ref\": \"#/components/schemas/UserId\" }, \"user_password_hash\": { \"$ref\": \"#/components/schemas/NewPasswordHash\" } } } ```
", "type": "object", "required": [ "silo_name", @@ -718,7 +720,7 @@ ] }, "UserId": { - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 3bbe0762f8..47e61cfe71 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -274,8 +274,10 @@ impl InstanceInner { )) } nexus_client::Error::InvalidRequest(_) - | nexus_client::Error::InvalidResponsePayload(_) - | nexus_client::Error::UnexpectedResponse(_) => { + | nexus_client::Error::InvalidResponsePayload(..) + | nexus_client::Error::UnexpectedResponse(_) + | nexus_client::Error::InvalidUpgrade(_) + | nexus_client::Error::ResponseBodyError(_) => { BackoffError::permanent(Error::Notification( err, )) diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index 8f572b46a0..49368f363a 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -365,6 +365,15 @@ mod tests { ); }; for (key, value) in map.iter() { + // We intentionally skip the "description" key, provided + // that the value is also a true String. This is mostly a + // one-off for the udpate to Progenitor 0.5.0, which caused + // this key to be added. But it's also pretty harmless, + // since it's not possible to get this key-value combination + // in a real JSON schema. + if key == "description" && value.is_string() { + continue; + } let new_path = format!("{path}/{key}"); let rhs_value = rhs_map.get(key).unwrap_or_else(|| { panic!("Real API JSON missing key: \"{new_path}\"") diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 25411f17a5..47995f0c10 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -161,8 +161,11 @@ fn add_steps_for_single_local_uplink_preflight_check<'a>( |_cx| async { // Check that the port name is valid and that it has no links // configured already. - let port_id = PortId::from_str(&uplink.port) - .map_err(UplinkPreflightTerminalError::InvalidPortName)?; + let port_id = PortId::from_str(&uplink.port).map_err(|_| { + UplinkPreflightTerminalError::InvalidPortName( + uplink.port.clone(), + ) + })?; let links = dpd_client .link_list(&port_id) .await @@ -892,7 +895,7 @@ type DpdError = dpd_client::Error; #[derive(Debug, Error)] pub(crate) enum UplinkPreflightTerminalError { #[error("invalid port name: {0}")] - InvalidPortName(&'static str), + InvalidPortName(String), #[error("failed to connect to dpd to check for current configuration")] GetCurrentConfig(#[source] DpdError), #[error("uplink already configured - is rack already initialized?")] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 25a72838a0..49b2489c40 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -81,7 +81,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.74" } +proc-macro2 = { version = "1.0.78" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.3" } @@ -101,7 +101,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.48", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.35.1", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -188,7 +188,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.74" } +proc-macro2 = { version = "1.0.78" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.3" } @@ -208,7 +208,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.48", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.35.1", features = ["full", "test-util"] } From 4fef59923f1cde8a6e8671c347a2e5ad25fa7aa8 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Thu, 25 Jan 2024 13:15:09 -0800 Subject: [PATCH 44/91] Update to USDT 0.5.0 (#4898) - Update diesel-dtrace - Update usdt - Handle API change in how providers are named, since dunders are no longer translated to dashes. --- Cargo.lock | 245 ++++++++++++++++++++----- Cargo.toml | 4 +- gateway/src/lib.rs | 5 +- nexus/db-queries/src/lib.rs | 2 +- oximeter/db/src/client.rs | 2 +- sled-agent/src/bootstrap/server.rs | 4 +- tools/dtrace/aggregate-query-latency.d | 4 +- tools/dtrace/slowest-queries.d | 6 +- tools/dtrace/trace-db-queries.d | 6 +- workspace-hack/Cargo.toml | 10 +- 10 files changed, 223 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ea3d2b96d..45d1d47199 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,7 +62,7 @@ dependencies = [ "getrandom 0.2.10", "once_cell", "version_check", - "zerocopy 0.7.31", + "zerocopy 0.7.32", ] [[package]] @@ -1691,12 +1691,12 @@ dependencies = [ [[package]] name = "diesel-dtrace" -version = "0.2.0" -source = "git+https://github.com/oxidecomputer/diesel-dtrace?branch=main#c1252df734b52b4e1243e0ca2bd5f00b17730408" +version = "0.3.0" +source = "git+https://github.com/oxidecomputer/diesel-dtrace?branch=main#62ef5ca0fe243a0929791bb9efbb7ed9c32c5368" dependencies = [ "diesel", "serde", - "usdt", + "usdt 0.5.0", "uuid", "version_check", ] @@ -1875,6 +1875,20 @@ dependencies = [ "zerocopy 0.3.0", ] +[[package]] +name = "dof" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "558e5396321b677a59d2c43b3cc3bc44683109c63ac49275f3bbbf41c0ecd002" +dependencies = [ + "goblin", + "pretty-hex 0.4.1", + "serde", + "serde_json", + "thiserror", + "zerocopy 0.7.32", +] + [[package]] name = "downcast" version = "0.11.0" @@ -1947,7 +1961,7 @@ dependencies = [ "tokio", "tokio-rustls 0.25.0", "toml 0.8.8", - "usdt", + "usdt 0.3.5", "uuid", "version_check", "waitgroup", @@ -1976,6 +1990,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "dtrace-parser" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71734e3eb68cd4df338d04dffdcc024f89eb0b238150cc95b826fbfad756452b" +dependencies = [ + "pest", + "pest_derive", + "thiserror", +] + [[package]] name = "dyn-clone" version = "1.0.13" @@ -2571,7 +2596,7 @@ dependencies = [ "thiserror", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git?branch=main)", "tokio", - "usdt", + "usdt 0.3.5", "uuid", "version_check", "zip", @@ -2672,6 +2697,17 @@ dependencies = [ "regex", ] +[[package]] +name = "goblin" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb07a4ffed2093b118a525b1d8f5204ae274faed5604537caf7135d0f18d9887" +dependencies = [ + "log", + "plain", + "scroll", +] + [[package]] name = "group" version = "0.13.0" @@ -3969,6 +4005,16 @@ version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "memoffset" version = "0.7.1" @@ -4282,7 +4328,7 @@ dependencies = [ "term", "thiserror", "tokio", - "usdt", + "usdt 0.5.0", "uuid", ] @@ -4832,7 +4878,7 @@ dependencies = [ "signal-hook", "signal-hook-tokio", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-error-chain", "sp-sim", "subprocess", @@ -4941,7 +4987,7 @@ dependencies = [ "sled-agent-client", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-error-chain", "slog-term", "sp-sim", @@ -5135,7 +5181,7 @@ dependencies = [ "sled-storage", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-term", "smf", "static_assertions", @@ -5148,7 +5194,7 @@ dependencies = [ "tokio-stream", "tokio-util", "toml 0.8.8", - "usdt", + "usdt 0.5.0", "uuid", "zeroize", "zone", @@ -5184,7 +5230,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "usdt", + "usdt 0.5.0", "walkdir", ] @@ -5218,6 +5264,7 @@ dependencies = [ "der", "diesel", "digest", + "dof 0.3.0", "either", "elliptic-curve", "errno", @@ -5298,10 +5345,11 @@ dependencies = [ "trust-dns-proto", "unicode-bidi", "unicode-normalization", - "usdt", + "usdt 0.3.5", + "usdt-impl 0.5.0", "uuid", "yasna", - "zerocopy 0.7.31", + "zerocopy 0.7.32", "zeroize", "zip", ] @@ -5531,7 +5579,7 @@ dependencies = [ "poptrie", "serde", "smoltcp 0.11.0", - "zerocopy 0.7.31", + "zerocopy 0.7.32", ] [[package]] @@ -5601,7 +5649,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-term", "strum", "subprocess", @@ -5641,7 +5689,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-term", "sqlformat", "sqlparser", @@ -5650,7 +5698,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "usdt", + "usdt 0.5.0", "uuid", ] @@ -5700,7 +5748,7 @@ dependencies = [ "schemars", "serde", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "thiserror", "tokio", "uuid", @@ -5952,19 +6000,20 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1acb4a4365a13f749a93f1a094a7805e5cfa0955373a9de860d962eaa3a5fe5a" +checksum = "1f200d8d83c44a45b21764d1916299752ca035d15ecd46faca3e9a2a2bf6ad06" dependencies = [ + "memchr", "thiserror", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "666d00490d4ac815001da55838c500eafb0320019bbaa44444137c48b443a853" +checksum = "bcd6ab1236bbdb3a49027e920e693192ebfe8913f6d60e294de57463a493cfde" dependencies = [ "pest", "pest_generator", @@ -5972,9 +6021,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68ca01446f50dbda87c1786af8770d535423fa8a53aec03b8f4e3d7eb10e0929" +checksum = "2a31940305ffc96863a735bef7c7994a00b325a7138fdbc5bda0f1a0476d3275" dependencies = [ "pest", "pest_meta", @@ -5985,9 +6034,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56af0a30af74d0445c0bf6d9d051c979b516a1a5af790d251daee76005420a48" +checksum = "a7ff62f5259e53b78d1af898941cdcdccfae7385cf7d793a6e55de5d05bb4b7d" dependencies = [ "once_cell", "pest", @@ -6092,6 +6141,12 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "platforms" version = "3.0.2" @@ -6461,7 +6516,7 @@ dependencies = [ "slog", "slog-async", "slog-bunyan", - "slog-dtrace", + "slog-dtrace 0.2.3", "slog-term", "thiserror", "tokio", @@ -7466,6 +7521,26 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scroll" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" +dependencies = [ + "scroll_derive", +] + +[[package]] +name = "scroll_derive" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "sct" version = "0.7.0" @@ -8014,7 +8089,21 @@ dependencies = [ "serde", "serde_json", "slog", - "usdt", + "usdt 0.3.5", + "version_check", +] + +[[package]] +name = "slog-dtrace" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16c4003e4582bc29415fcbf94f53346c9c379d5dafac45d4bafaa39c7f0453ac" +dependencies = [ + "chrono", + "serde", + "serde_json", + "slog", + "usdt 0.5.0", "version_check", ] @@ -8216,7 +8305,7 @@ dependencies = [ "omicron-workspace-hack", "serde", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "sprockets-rot", "thiserror", "tokio", @@ -9649,11 +9738,27 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b4c48f9e522b977bbe938a0d7c4d36633d267ba0155aaa253fb57d0531be0fb" dependencies = [ - "dtrace-parser", + "dtrace-parser 0.1.14", "serde", - "usdt-attr-macro", - "usdt-impl", - "usdt-macro", + "usdt-attr-macro 0.3.5", + "usdt-impl 0.3.5", + "usdt-macro 0.3.5", +] + +[[package]] +name = "usdt" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf5c47fb471a0bff3d7b17a250817bba8c6cc99b0492abaefe5b3bb99045f02" +dependencies = [ + "dof 0.3.0", + "dtrace-parser 0.2.0", + "goblin", + "memmap", + "serde", + "usdt-attr-macro 0.5.0", + "usdt-impl 0.5.0", + "usdt-macro 0.5.0", ] [[package]] @@ -9662,12 +9767,26 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80e6ae4f982ae74dcbaa8eb17baf36ca0d464a3abc8a7172b3bd74c73e9505d6" dependencies = [ - "dtrace-parser", + "dtrace-parser 0.1.14", "proc-macro2", "quote", "serde_tokenstream 0.1.7", "syn 1.0.109", - "usdt-impl", + "usdt-impl 0.3.5", +] + +[[package]] +name = "usdt-attr-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025161fff40db24774e7757f75df74ecc47e93d7e11e0f6cdfc31b40eacfe136" +dependencies = [ + "dtrace-parser 0.2.0", + "proc-macro2", + "quote", + "serde_tokenstream 0.2.0", + "syn 2.0.48", + "usdt-impl 0.5.0", ] [[package]] @@ -9677,8 +9796,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f53b4ca0b33aae466dc47b30b98adc4f88454928837af8010b6ed02d18474cb1" dependencies = [ "byteorder", - "dof", - "dtrace-parser", + "dof 0.1.5", + "dtrace-parser 0.1.14", "libc", "proc-macro2", "quote", @@ -9690,18 +9809,52 @@ dependencies = [ "version_check", ] +[[package]] +name = "usdt-impl" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f925814e5942ebb87af2d9fcf4c3f8665e37903f741eb11f0fa2396c6ef5f7b1" +dependencies = [ + "byteorder", + "dof 0.3.0", + "dtrace-parser 0.2.0", + "libc", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 2.0.48", + "thiserror", + "thread-id", + "version_check", +] + [[package]] name = "usdt-macro" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7cb093f9653dc91632621c754f9ed4ee25d14e46e0239b6ccaf74a6c0c2788bd" dependencies = [ - "dtrace-parser", + "dtrace-parser 0.1.14", "proc-macro2", "quote", "serde_tokenstream 0.1.7", "syn 1.0.109", - "usdt-impl", + "usdt-impl 0.3.5", +] + +[[package]] +name = "usdt-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ddd86f8f3abac0b7c87f59fe82446fc96a3854a413f176dd2797ed686b7af4c" +dependencies = [ + "dtrace-parser 0.2.0", + "proc-macro2", + "quote", + "serde_tokenstream 0.2.0", + "syn 2.0.48", + "usdt-impl 0.5.0", ] [[package]] @@ -10085,7 +10238,7 @@ dependencies = [ "sha2", "sled-hardware", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "snafu", "subprocess", "tar", @@ -10402,12 +10555,12 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ "byteorder", - "zerocopy-derive 0.7.31", + "zerocopy-derive 0.7.32", ] [[package]] @@ -10434,9 +10587,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index ba328fe612..5e94d82501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -349,7 +349,7 @@ sled-hardware = { path = "sled-hardware" } sled-storage = { path = "sled-storage" } slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "release_max_level_debug" ] } slog-async = "2.8" -slog-dtrace = "0.2" +slog-dtrace = "0.3" slog-envlogger = "2.2" slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } slog-term = "2.9" @@ -400,7 +400,7 @@ tui-tree-widget = "0.16.0" unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } -usdt = "0.3" +usdt = "0.5.0" uuid = { version = "1.7.0", features = ["serde", "v4"] } walkdir = "2.4" wicket = { path = "wicket" } diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index 5aa833f6e2..1354f30a0a 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -331,9 +331,8 @@ pub async fn start_server( .map_err(|message| format!("initializing logger: {}", message))?, ); let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); - if let slog_dtrace::ProbeRegistration::Failed(e) = registration { - let err = InlineErrorChain::new(&e); - error!(log, "failed to register DTrace probes"; &err); + if let slog_dtrace::ProbeRegistration::Failed(err) = registration { + error!(log, "failed to register DTrace probes"; "err" => &err); return Err(format!("failed to register DTrace probes: {err}")); } else { debug!(log, "registered DTrace probes"); diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs index 5d1927ebc7..60177990e8 100644 --- a/nexus/db-queries/src/lib.rs +++ b/nexus/db-queries/src/lib.rs @@ -19,7 +19,7 @@ extern crate newtype_derive; #[macro_use] extern crate diesel; -#[usdt::provider(provider = "nexus__db__queries")] +#[usdt::provider(provider = "nexus_db_queries")] mod probes { // Fires before we start a search over a range for a VNI. // diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index fc46a2c498..ca996dc894 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -50,7 +50,7 @@ use tokio::fs; use tokio::sync::Mutex; use uuid::Uuid; -#[usdt::provider(provider = "clickhouse__client")] +#[usdt::provider(provider = "clickhouse_client")] mod probes { fn query__start(_: &usdt::UniqueId, sql: &str) {} fn query__done(_: &usdt::UniqueId) {} diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 1a9d36c86b..47a8019ac5 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -61,8 +61,8 @@ pub enum StartError { #[error("Failed to initialize logger")] InitLogger(#[source] io::Error), - #[error("Failed to register DTrace probes")] - RegisterDTraceProbes(#[source] usdt::Error), + #[error("Failed to register DTrace probes: {0}")] + RegisterDTraceProbes(String), #[error("Failed to find address objects for maghemite")] FindMaghemiteAddrObjs(#[source] underlay::Error), diff --git a/tools/dtrace/aggregate-query-latency.d b/tools/dtrace/aggregate-query-latency.d index b1899cd970..c0ed1751fd 100755 --- a/tools/dtrace/aggregate-query-latency.d +++ b/tools/dtrace/aggregate-query-latency.d @@ -7,14 +7,14 @@ dtrace:::BEGIN printf("Tracing database query latency by connection ID for nexus PID %d, use Ctrl-C to exit\n", $target); } -diesel-db$target:::query-start +diesel_db$target:::query-start { @total_queries = count(); this->conn_id = json(copyinstr(arg1), "ok"); self->ts[this->conn_id] = timestamp; } -diesel-db$target:::query-done +diesel_db$target:::query-done /self->ts[json(copyinstr(arg1), "ok")] != 0/ { this->conn_id = json(copyinstr(arg1), "ok"); diff --git a/tools/dtrace/slowest-queries.d b/tools/dtrace/slowest-queries.d index 40e43fa252..76e22de22f 100755 --- a/tools/dtrace/slowest-queries.d +++ b/tools/dtrace/slowest-queries.d @@ -9,7 +9,7 @@ dtrace:::BEGIN printf("Tracing slowest queries for nexus PID %d, use Ctrl-C to exit\n", $target); } -diesel-db$target:::query-start +diesel_db$target:::query-start { this->conn_id = json(copyinstr(arg1), "ok"); ts[this->conn_id] = timestamp; @@ -17,12 +17,12 @@ diesel-db$target:::query-start } -diesel-db$target:::query-done +diesel_db$target:::query-done { this->conn_id = json(copyinstr(arg1), "ok"); } -diesel-db$target:::query-done +diesel_db$target:::query-done /ts[this->conn_id]/ { this->latency = timestamp - ts[this->conn_id]; diff --git a/tools/dtrace/trace-db-queries.d b/tools/dtrace/trace-db-queries.d index 033d849084..69878b55ba 100755 --- a/tools/dtrace/trace-db-queries.d +++ b/tools/dtrace/trace-db-queries.d @@ -9,19 +9,19 @@ dtrace:::BEGIN printf("Tracing all database queries for nexus PID %d, use Ctrl-C to exit\n", $target); } -diesel-db$target:::query-start +diesel_db$target:::query-start { this->conn_id = json(copyinstr(arg1), "ok"); ts[this->conn_id] = timestamp; query[this->conn_id] = copyinstr(arg2); } -diesel-db$target:::query-done +diesel_db$target:::query-done { this->conn_id = json(copyinstr(arg1), "ok"); } -diesel-db$target:::query-done +diesel_db$target:::query-done /ts[this->conn_id]/ { this->latency = (timestamp - ts[this->conn_id]) / 1000; diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 49b2489c40..cebd4cab36 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -114,9 +114,10 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } +usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zerocopy = { version = "0.7.31", features = ["derive", "simd"] } +zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } @@ -222,20 +223,23 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } +usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zerocopy = { version = "0.7.31", features = ["derive", "simd"] } +zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [target.x86_64-unknown-linux-gnu.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.30", features = ["fs", "termios"] } @@ -270,6 +274,7 @@ rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } @@ -279,6 +284,7 @@ toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", featu [target.x86_64-unknown-illumos.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } From 2b9c885074fee12c8e113e43b0669ee0b7765f60 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:22:14 -0800 Subject: [PATCH 45/91] Update Rust crate owo-colors to v4 (#4896) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45d1d47199..dd837b9891 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5541,9 +5541,9 @@ dependencies = [ [[package]] name = "owo-colors" -version = "3.5.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +checksum = "caff54706df99d2a78a5a4e3455ff45448d81ef1bb63c22cd14052ca0e993a3f" [[package]] name = "oxide-client" diff --git a/Cargo.toml b/Cargo.toml index 5e94d82501..47c412b9b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -280,7 +280,7 @@ openssl-sys = "0.9" openssl-probe = "0.1.5" opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" } oso = "0.27" -owo-colors = "3.5.0" +owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/" } From 5b28d0cff0a7e43dd7dd813a6e4e03b58c07ba11 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 05:30:55 +0000 Subject: [PATCH 46/91] Update taiki-e/install-action digest to 1f501f0 (#4901) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`9f9bf5e` -> `1f501f0`](https://togithub.com/taiki-e/install-action/compare/9f9bf5e...1f501f0) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 46d09c0940..d4a4a4750c 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@9f9bf5e8df111848fb25b8a97a361d8963025899 # v2 + uses: taiki-e/install-action@1f501f091c4240a626be17b7496626f8f0cf979a # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 4c15cc0d3fded2844eb8178556daf30a59f778e3 Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 10:21:34 +0000 Subject: [PATCH 47/91] Update maghemite to d12bdf8 (#4627) Updated maghemite to commit d12bdf8. --------- Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com> --- package-manifest.toml | 12 ++++++------ tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 2 +- tools/maghemite_mgd_checksums | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index 36e43157f9..f574f1ff5d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -446,10 +446,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "1cf9cb514d11275d93c4e4760500539a778f23039374508ca07528fcaf0ba3f8" +source.sha256 = "442ef3a927ce2f2a401b631daa3c67a708fbbed83a839552a6fbcadd68120783" output.type = "tarball" [package.mg-ddm] @@ -462,10 +462,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "a9b959b4287ac2ec7b45ed99ccd00e1f134b8e3d501099cd669cee5de9525ae3" +source.sha256 = "81a766a88fab3fe7cb7fb6698ec02d05224320500b7a4421bbea9f4123127fba" output.type = "zone" output.intermediate_only = true @@ -477,10 +477,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" +source.sha256 = "bc887e08e3d052d8440983a2d6186cd1d92a52345504092f64f4de2e5335a75d" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index be8772b7e6..a103b117e8 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +COMMIT="d12bdf89b9058065789cd00c8704e4ce0a352342" SHA2="0b0dbc2f8bbc5d2d9be92d64c4865f8f9335355aae62f7de9f67f81dfb3f1803" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 6bf1999c61..6981c98070 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +COMMIT="d12bdf89b9058065789cd00c8704e4ce0a352342" SHA2="7618511f905d26394ef7c552339dd78835ce36a6def0d85b05b6d1e363a5e7b4" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index b5fe84b662..8d0efb473a 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" +CIDL_SHA256="bc887e08e3d052d8440983a2d6186cd1d92a52345504092f64f4de2e5335a75d" MGD_LINUX_SHA256="93331c1001e3aa506a8c1b83346abba1995e489910bff2c94a86730b96617a34" \ No newline at end of file From d5dace65fa093174a7502e0f6a3dde4ccabe6337 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 26 Jan 2024 09:54:23 -0600 Subject: [PATCH 48/91] Fix IP pools data migration (#4903) Closes #4875 ## Problem After the IP pools migrations on the dogfood rack, the `default` pool was not marked `is_default=true` for the `oxide` silo when it should have been. ## Diagnosis When checking for silo-scoped default pools overriding a fleet-scoped default, I neglected to require that the silo-scoped defaults in question were non-deleted. This means that if there was a deleted pool with `silo_id=` and `is_default=true`, that would be considered an overriding default and leave us with `is_default=false` on the `default` pool. Well, I can't check `silo_id` and `is_default` on the pools because those columns have been dropped, but there is a deleted pool called `oxide-default` that says in the description it was meant as the default pool for only the `oxide` silo. ``` oot@[fd00:1122:3344:105::3]:32221/omicron> select * from omicron.public.ip_pool; id | name | description | time_created | time_modified | time_deleted | rcgen ---------------------------------------+--------------------+--------------------------------+-------------------------------+-------------------------------+-------------------------------+-------- 1efa49a2-3f3a-43ab-97ac-d38658069c39 | oxide-default | oxide silo-only pool - default | 2023-08-31 05:33:00.11079+00 | 2023-08-31 05:33:00.11079+00 | 2023-08-31 06:03:22.426488+00 | 1 ``` I think we can be pretty confident this is what got us. ## Fix Add `AND time_deleted IS NULL` to the subquery. ## Mitigation in existing systems Already done. Dogfood is the only long-running system where the bad migration ran, and all I had to do there was use the API to set `is_default=true` for the (`default` pool, `oxide` silo) link. --- nexus/tests/integration_tests/schema.rs | 90 ++++++++++++------------- schema/crdb/23.0.0/up4.sql | 7 +- 2 files changed, 47 insertions(+), 50 deletions(-) diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index c3ba02d5ce..2d496fcd8e 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -951,9 +951,11 @@ const SILO1: Uuid = Uuid::from_u128(0x111151F0_5c3d_4647_83b0_8f3515da7be1); const SILO2: Uuid = Uuid::from_u128(0x222251F0_5c3d_4647_83b0_8f3515da7be1); // "6001" -> "Pool" +const POOL0: Uuid = Uuid::from_u128(0x00006001_5c3d_4647_83b0_8f3515da7be1); const POOL1: Uuid = Uuid::from_u128(0x11116001_5c3d_4647_83b0_8f3515da7be1); const POOL2: Uuid = Uuid::from_u128(0x22226001_5c3d_4647_83b0_8f3515da7be1); const POOL3: Uuid = Uuid::from_u128(0x33336001_5c3d_4647_83b0_8f3515da7be1); +const POOL4: Uuid = Uuid::from_u128(0x44446001_5c3d_4647_83b0_8f3515da7be1); // "513D" -> "Sled" const SLED1: Uuid = Uuid::from_u128(0x1111513d_5c3d_4647_83b0_8f3515da7be1); @@ -975,9 +977,11 @@ fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { // no corresponding silo. client.batch_execute(&format!("INSERT INTO ip_pool (id, name, description, time_created, time_modified, time_deleted, rcgen, silo_id, is_default) VALUES + ('{POOL0}', 'pool2', '', now(), now(), now(), 1, '{SILO2}', true), ('{POOL1}', 'pool1', '', now(), now(), NULL, 1, '{SILO1}', true), ('{POOL2}', 'pool2', '', now(), now(), NULL, 1, '{SILO2}', false), - ('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true); + ('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true), + ('{POOL4}', 'pool4', '', now(), now(), NULL, 1, null, false); ")).await.expect("Failed to create IP Pool"); }) } @@ -992,56 +996,46 @@ fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> { .expect("Failed to query ip pool resource"); let ip_pool_resources = process_rows(&rows); - assert_eq!(ip_pool_resources.len(), 4); + assert_eq!(ip_pool_resources.len(), 6); + + fn assert_row( + row: &Vec, + ip_pool_id: Uuid, + silo_id: Uuid, + is_default: bool, + ) { + let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo")); + assert_eq!( + row, + &vec![ + ColumnValue::new("ip_pool_id", ip_pool_id), + ColumnValue::new("resource_type", type_silo), + ColumnValue::new("resource_id", silo_id), + ColumnValue::new("is_default", is_default), + ], + ); + } - let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo")); + // pool1 was default on silo1, so gets an entry in the join table + // reflecting that + assert_row(&ip_pool_resources[0].values, POOL1, SILO1, true); - // pool1, which referenced silo1 in the "ip_pool" table, has a newly - // created resource. - // - // The same relationship is true for pool2 / silo2. - assert_eq!( - ip_pool_resources[0].values, - vec![ - ColumnValue::new("ip_pool_id", POOL1), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO1), - ColumnValue::new("is_default", true), - ], - ); - assert_eq!( - ip_pool_resources[1].values, - vec![ - ColumnValue::new("ip_pool_id", POOL2), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO2), - ColumnValue::new("is_default", false), - ], - ); + // pool1 was default on silo1, so gets an entry in the join table + // reflecting that + assert_row(&ip_pool_resources[1].values, POOL2, SILO2, false); - // pool3 did not previously have a corresponding silo, so now it's associated - // with both silos as a new resource in each. - // - // Additionally, silo1 already had a default pool (pool1), but silo2 did - // not have one. As a result, pool3 becomes the new default pool for silo2. - assert_eq!( - ip_pool_resources[2].values, - vec![ - ColumnValue::new("ip_pool_id", POOL3), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO1), - ColumnValue::new("is_default", false), - ], - ); - assert_eq!( - ip_pool_resources[3].values, - vec![ - ColumnValue::new("ip_pool_id", POOL3), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO2), - ColumnValue::new("is_default", true), - ], - ); + // fleet-scoped silos are a little more complicated + + // pool3 was a fleet-level default, so now it's associated with both + // silos. silo1 had its own default pool as well (pool1), so pool3 + // cannot also be default for silo1. silo2 did not have its own default, + // so pool3 is default for silo2. + assert_row(&ip_pool_resources[2].values, POOL3, SILO1, false); + assert_row(&ip_pool_resources[3].values, POOL3, SILO2, true); + + // fleet-level pool that was not default becomes non-default on all silos + assert_row(&ip_pool_resources[4].values, POOL4, SILO1, false); + assert_row(&ip_pool_resources[5].values, POOL4, SILO2, false); }) } diff --git a/schema/crdb/23.0.0/up4.sql b/schema/crdb/23.0.0/up4.sql index 8fb43f9cf1..2235d0aa01 100644 --- a/schema/crdb/23.0.0/up4.sql +++ b/schema/crdb/23.0.0/up4.sql @@ -23,8 +23,11 @@ SELECT -- AND NOT EXISTS here causes is_default to be false in row 1 if there is a -- conflicting silo default pool. row 2 is inserted in up5. p.is_default AND NOT EXISTS ( - SELECT 1 FROM omicron.public.ip_pool - WHERE silo_id = s.id AND is_default + SELECT 1 + FROM omicron.public.ip_pool p0 + WHERE p0.silo_id = s.id + AND p0.is_default + AND p0.time_deleted IS NULL ) FROM omicron.public.ip_pool AS p -- cross join means we are looking at the cartesian product of all fleet-scoped From a3a9844994d2c7742918d0815c2242aa0fc8a925 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 26 Jan 2024 15:17:13 -0600 Subject: [PATCH 49/91] Better error message when there's no default IP pool (#4880) Closes #4864 This is a bad error message to get when the problem is that there is no default IP pool configured for your current silo: ``` not found: ip-pool with id "Default pool for current silo" ``` "Default pool for current silo" is not an id, so why would we call it one? This is better: ``` not found: default IP pool for current silo ``` This PR is just making that possible. --- common/src/api/external/error.rs | 27 +++++++++---------- .../src/authz/policy_test/resource_builder.rs | 4 +-- nexus/db-queries/src/db/datastore/ip_pool.rs | 11 ++------ nexus/tests/integration_tests/instances.rs | 3 +-- 4 files changed, 18 insertions(+), 27 deletions(-) diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index a3876fcac3..d2e062f2e1 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -144,11 +144,11 @@ pub enum LookupType { ByName(String), /// a specific id was requested ById(Uuid), - /// a session token was requested - BySessionToken(String), /// a specific id was requested with some composite type /// (caller summarizes it) ByCompositeId(String), + /// object selected by criteria that would be confusing to call an ID + ByOther(String), } impl LookupType { @@ -359,23 +359,22 @@ impl From for HttpError { fn from(error: Error) -> HttpError { match error { Error::ObjectNotFound { type_name: t, lookup_type: lt } => { - // TODO-cleanup is there a better way to express this? - let (lookup_field, lookup_value) = match lt { - LookupType::ByName(name) => ("name", name), - LookupType::ById(id) => ("id", id.to_string()), - LookupType::ByCompositeId(label) => ("id", label), - LookupType::BySessionToken(token) => { - ("session token", token) + let message = match lt { + LookupType::ByName(name) => { + format!("{} with name \"{}\"", t, name) } + LookupType::ById(id) => { + format!("{} with id \"{}\"", t, id) + } + LookupType::ByCompositeId(label) => { + format!("{} with id \"{}\"", t, label) + } + LookupType::ByOther(msg) => msg, }; - let message = format!( - "not found: {} with {} \"{}\"", - t, lookup_field, lookup_value - ); HttpError::for_client_error( Some(String::from("ObjectNotFound")), http::StatusCode::NOT_FOUND, - message, + format!("not found: {}", message), ) } diff --git a/nexus/db-queries/src/authz/policy_test/resource_builder.rs b/nexus/db-queries/src/authz/policy_test/resource_builder.rs index dc18b2e47f..59cb283a95 100644 --- a/nexus/db-queries/src/authz/policy_test/resource_builder.rs +++ b/nexus/db-queries/src/authz/policy_test/resource_builder.rs @@ -92,7 +92,7 @@ impl<'a> ResourceBuilder<'a> { // (e.g., "fleet"). resource.resource_type().to_string().to_lowercase() } - LookupType::BySessionToken(_) | LookupType::ByCompositeId(_) => { + LookupType::ByCompositeId(_) | LookupType::ByOther(_) => { panic!("test resources must be given names"); } }; @@ -212,7 +212,7 @@ where LookupType::ByName(name) => format!("{:?}", name), LookupType::ById(id) => format!("id {:?}", id.to_string()), LookupType::ByCompositeId(id) => format!("id {:?}", id), - LookupType::BySessionToken(_) => { + LookupType::ByOther(_) => { unimplemented!() } }; diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index 6d3a95af7d..d316d1adb7 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -134,12 +134,8 @@ impl DataStore { // .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) // .await?; - // join ip_pool to ip_pool_resource and filter - - // used in both success and error outcomes - let lookup_type = LookupType::ByCompositeId( - "Default pool for current silo".to_string(), - ); + let lookup_type = + LookupType::ByOther("default IP pool for current silo".to_string()); ip_pool::table .inner_join(ip_pool_resource::table) @@ -161,9 +157,6 @@ impl DataStore { ) .await .map_err(|e| { - // janky to do this manually, but this is an unusual kind of - // lookup in that it is by (silo_id, is_default=true), which is - // arguably a composite ID. public_error_from_diesel_lookup( e, ResourceType::IpPool, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 8d97df6cda..57b731c692 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3841,8 +3841,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( let url = format!("/v1/instances?project={}", PROJECT_NAME); let error = object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; - let msg = "not found: ip-pool with id \"Default pool for current silo\"" - .to_string(); + let msg = "not found: default IP pool for current silo".to_string(); assert_eq!(error.message, msg); // same deal if you specify a pool that doesn't exist From 80cc00105e82d83cc7e2658dc079382e2a238bd9 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 26 Jan 2024 16:44:14 -0500 Subject: [PATCH 50/91] Serialize blueprints in the database (#4899) This replaces the in-memory blueprint storage added as a placeholder in #4804 with cockroachdb-backed tables. Both the tables and related queries are _heavily_ derived from the similar tables in the inventory system (particularly serializing omicron zones and their related properties). The tables are effectively identical as of this PR, but we opted to keep the separate because we expect them to diverge some over time (e.g., inventory might start collecting additional per-zone properties that don't exist for blueprints, such as uptime). The big exception to "basically the same as inventory" is the `bp_target` table which tracks the current (and past) target blueprint. Inserting into this table has some subtleties, and we use a CTE to check and enforce the invariants. This is the first diesel/CTE I've written; it's based on other similar CTEs in Nexus, but I'd still appreciate a particularly careful look there. Fixes #4793. --- Cargo.lock | 2 + dev-tools/omdb/src/bin/omdb/nexus.rs | 2 +- nexus/db-model/src/deployment.rs | 263 +++ nexus/db-model/src/inventory.rs | 448 +---- nexus/db-model/src/lib.rs | 3 + nexus/db-model/src/omicron_zone_config.rs | 456 +++++ nexus/db-model/src/schema.rs | 85 +- nexus/db-queries/Cargo.toml | 2 + .../db-queries/src/db/datastore/deployment.rs | 1583 +++++++++++++++++ nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/deployment/Cargo.toml | 2 +- nexus/deployment/src/blueprint_builder.rs | 23 +- nexus/inventory/src/builder.rs | 26 +- nexus/inventory/src/lib.rs | 2 + nexus/src/app/deployment.rs | 165 +- nexus/src/app/mod.rs | 5 - nexus/src/internal_api/http_entrypoints.rs | 50 +- nexus/types/src/deployment.rs | 37 +- openapi/nexus-internal.json | 46 +- schema/crdb/28.0.0/up1.sql | 7 + schema/crdb/28.0.0/up2.sql | 6 + schema/crdb/28.0.0/up3.sql | 31 + schema/crdb/28.0.0/up4.sql | 13 + schema/crdb/28.0.0/up5.sql | 6 + schema/crdb/28.0.0/up6.sql | 6 + schema/crdb/dbinit.sql | 206 ++- 26 files changed, 2884 insertions(+), 592 deletions(-) create mode 100644 nexus/db-model/src/deployment.rs create mode 100644 nexus/db-model/src/omicron_zone_config.rs create mode 100644 nexus/db-queries/src/db/datastore/deployment.rs create mode 100644 schema/crdb/28.0.0/up1.sql create mode 100644 schema/crdb/28.0.0/up2.sql create mode 100644 schema/crdb/28.0.0/up3.sql create mode 100644 schema/crdb/28.0.0/up4.sql create mode 100644 schema/crdb/28.0.0/up5.sql create mode 100644 schema/crdb/28.0.0/up6.sql diff --git a/Cargo.lock b/Cargo.lock index dd837b9891..b2815d9a1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4283,12 +4283,14 @@ dependencies = [ "http 0.2.11", "hyper 0.14.27", "hyper-rustls 0.26.0", + "illumos-utils", "internal-dns", "ipnetwork", "itertools 0.12.0", "macaddr", "newtype_derive", "nexus-db-model", + "nexus-deployment", "nexus-inventory", "nexus-test-utils", "nexus-types", diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index fef069d536..ea89923caa 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -866,7 +866,7 @@ async fn cmd_nexus_blueprints_target_show( .await .context("fetching target blueprint")?; println!("target blueprint: {}", target.target_id); - println!("set at: {}", target.time_set); + println!("made target at: {}", target.time_made_target); println!("enabled: {}", target.enabled); Ok(()) } diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs new file mode 100644 index 0000000000..34fe08d78c --- /dev/null +++ b/nexus/db-model/src/deployment.rs @@ -0,0 +1,263 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for representing the deployed software and configuration in the +//! database + +use crate::inventory::ZoneType; +use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; +use crate::schema::{ + blueprint, bp_omicron_zone, bp_omicron_zone_nic, + bp_omicron_zones_not_in_service, bp_sled_omicron_zones, bp_target, +}; +use crate::{ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8}; +use chrono::{DateTime, Utc}; +use ipnetwork::IpNetwork; +use nexus_types::deployment::BlueprintTarget; +use uuid::Uuid; + +/// See [`nexus_types::deployment::Blueprint`]. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = blueprint)] +pub struct Blueprint { + pub id: Uuid, + pub parent_blueprint_id: Option, + pub time_created: DateTime, + pub creator: String, + pub comment: String, +} + +impl From<&'_ nexus_types::deployment::Blueprint> for Blueprint { + fn from(bp: &'_ nexus_types::deployment::Blueprint) -> Self { + Self { + id: bp.id, + parent_blueprint_id: bp.parent_blueprint_id, + time_created: bp.time_created, + creator: bp.creator.clone(), + comment: bp.comment.clone(), + } + } +} + +impl From for nexus_types::deployment::BlueprintMetadata { + fn from(value: Blueprint) -> Self { + Self { + id: value.id, + parent_blueprint_id: value.parent_blueprint_id, + time_created: value.time_created, + creator: value.creator, + comment: value.comment, + } + } +} + +/// See [`nexus_types::deployment::BlueprintTarget`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_target)] +pub struct BpTarget { + pub version: SqlU32, + pub blueprint_id: Uuid, + pub enabled: bool, + pub time_made_target: DateTime, +} + +impl BpTarget { + pub fn new(version: u32, target: BlueprintTarget) -> Self { + Self { + version: version.into(), + blueprint_id: target.target_id, + enabled: target.enabled, + time_made_target: target.time_made_target, + } + } +} + +impl From for nexus_types::deployment::BlueprintTarget { + fn from(value: BpTarget) -> Self { + Self { + target_id: value.blueprint_id, + enabled: value.enabled, + time_made_target: value.time_made_target, + } + } +} + +/// See [`nexus_types::deployment::OmicronZonesConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_sled_omicron_zones)] +pub struct BpSledOmicronZones { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + pub generation: Generation, +} + +impl BpSledOmicronZones { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + zones_config: &nexus_types::deployment::OmicronZonesConfig, + ) -> Self { + Self { + blueprint_id, + sled_id, + generation: Generation(zones_config.generation), + } + } +} + +/// See [`nexus_types::deployment::OmicronZoneConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zone)] +pub struct BpOmicronZone { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + pub id: Uuid, + pub underlay_address: ipv6::Ipv6Addr, + pub zone_type: ZoneType, + pub primary_service_ip: ipv6::Ipv6Addr, + pub primary_service_port: SqlU16, + pub second_service_ip: Option, + pub second_service_port: Option, + pub dataset_zpool_name: Option, + pub bp_nic_id: Option, + pub dns_gz_address: Option, + pub dns_gz_address_index: Option, + pub ntp_ntp_servers: Option>, + pub ntp_dns_servers: Option>, + pub ntp_domain: Option, + pub nexus_external_tls: Option, + pub nexus_external_dns_servers: Option>, + pub snat_ip: Option, + pub snat_first_port: Option, + pub snat_last_port: Option, +} + +impl BpOmicronZone { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result { + let zone = OmicronZone::new(sled_id, zone)?; + Ok(Self { + blueprint_id, + sled_id: zone.sled_id, + id: zone.id, + underlay_address: zone.underlay_address, + zone_type: zone.zone_type, + primary_service_ip: zone.primary_service_ip, + primary_service_port: zone.primary_service_port, + second_service_ip: zone.second_service_ip, + second_service_port: zone.second_service_port, + dataset_zpool_name: zone.dataset_zpool_name, + bp_nic_id: zone.nic_id, + dns_gz_address: zone.dns_gz_address, + dns_gz_address_index: zone.dns_gz_address_index, + ntp_ntp_servers: zone.ntp_ntp_servers, + ntp_dns_servers: zone.ntp_dns_servers, + ntp_domain: zone.ntp_domain, + nexus_external_tls: zone.nexus_external_tls, + nexus_external_dns_servers: zone.nexus_external_dns_servers, + snat_ip: zone.snat_ip, + snat_first_port: zone.snat_first_port, + snat_last_port: zone.snat_last_port, + }) + } + + pub fn into_omicron_zone_config( + self, + nic_row: Option, + ) -> Result { + let zone = OmicronZone { + sled_id: self.sled_id, + id: self.id, + underlay_address: self.underlay_address, + zone_type: self.zone_type, + primary_service_ip: self.primary_service_ip, + primary_service_port: self.primary_service_port, + second_service_ip: self.second_service_ip, + second_service_port: self.second_service_port, + dataset_zpool_name: self.dataset_zpool_name, + nic_id: self.bp_nic_id, + dns_gz_address: self.dns_gz_address, + dns_gz_address_index: self.dns_gz_address_index, + ntp_ntp_servers: self.ntp_ntp_servers, + ntp_dns_servers: self.ntp_dns_servers, + ntp_domain: self.ntp_domain, + nexus_external_tls: self.nexus_external_tls, + nexus_external_dns_servers: self.nexus_external_dns_servers, + snat_ip: self.snat_ip, + snat_first_port: self.snat_first_port, + snat_last_port: self.snat_last_port, + }; + zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from)) + } +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zone_nic)] +pub struct BpOmicronZoneNic { + blueprint_id: Uuid, + pub id: Uuid, + name: Name, + ip: IpNetwork, + mac: MacAddr, + subnet: IpNetwork, + vni: SqlU32, + is_primary: bool, + slot: SqlU8, +} + +impl From for OmicronZoneNic { + fn from(value: BpOmicronZoneNic) -> Self { + OmicronZoneNic { + id: value.id, + name: value.name, + ip: value.ip, + mac: value.mac, + subnet: value.subnet, + vni: value.vni, + is_primary: value.is_primary, + slot: value.slot, + } + } +} + +impl BpOmicronZoneNic { + pub fn new( + blueprint_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result, anyhow::Error> { + let zone_nic = OmicronZoneNic::new(zone)?; + Ok(zone_nic.map(|nic| Self { + blueprint_id, + id: nic.id, + name: nic.name, + ip: nic.ip, + mac: nic.mac, + subnet: nic.subnet, + vni: nic.vni, + is_primary: nic.is_primary, + slot: nic.slot, + })) + } + + pub fn into_network_interface_for_zone( + self, + zone_id: Uuid, + ) -> Result { + let zone_nic = OmicronZoneNic::from(self); + zone_nic.into_network_interface_for_zone(zone_id) + } +} + +/// Nexus wants to think in terms of "zones in service", but since most zones of +/// most blueprints are in service, we store the zones NOT in service in the +/// database. We handle that inversion internally in the db-queries layer. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zones_not_in_service)] +pub struct BpOmicronZoneNotInService { + pub blueprint_id: Uuid, + pub bp_omicron_zone_id: Uuid, +} diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 17d74be0aa..d8314f97b8 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -4,6 +4,7 @@ //! Types for representing the hardware/software inventory in the database +use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, inv_omicron_zone, inv_omicron_zone_nic, inv_root_of_trust, @@ -14,8 +15,7 @@ use crate::{ impl_enum_type, ipv6, ByteCount, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8, }; -use anyhow::{anyhow, ensure}; -use anyhow::{bail, Context}; +use anyhow::anyhow; use chrono::DateTime; use chrono::Utc; use diesel::backend::Backend; @@ -26,10 +26,8 @@ use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; use ipnetwork::IpNetwork; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, OmicronZoneType, PowerState, RotPage, - RotSlot, + BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, }; -use std::net::SocketAddrV6; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -750,165 +748,29 @@ impl InvOmicronZone { sled_id: Uuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result { - let id = zone.id; - let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); - let mut nic_id = None; - let mut dns_gz_address = None; - let mut dns_gz_address_index = None; - let mut ntp_ntp_servers = None; - let mut ntp_dns_servers = None; - let mut ntp_ntp_domain = None; - let mut nexus_external_tls = None; - let mut nexus_external_dns_servers = None; - let mut snat_ip = None; - let mut snat_first_port = None; - let mut snat_last_port = None; - let mut second_service_ip = None; - let mut second_service_port = None; - - let (zone_type, primary_service_sockaddr_str, dataset) = match &zone - .zone_type - { - OmicronZoneType::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - snat_cfg, - } => { - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); - snat_ip = Some(IpNetwork::from(snat_cfg.ip)); - snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); - snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); - nic_id = Some(nic.id); - (ZoneType::BoundaryNtp, address, None) - } - OmicronZoneType::Clickhouse { address, dataset } => { - (ZoneType::Clickhouse, address, Some(dataset)) - } - OmicronZoneType::ClickhouseKeeper { address, dataset } => { - (ZoneType::ClickhouseKeeper, address, Some(dataset)) - } - OmicronZoneType::CockroachDb { address, dataset } => { - (ZoneType::CockroachDb, address, Some(dataset)) - } - OmicronZoneType::Crucible { address, dataset } => { - (ZoneType::Crucible, address, Some(dataset)) - } - OmicronZoneType::CruciblePantry { address } => { - (ZoneType::CruciblePantry, address, None) - } - OmicronZoneType::ExternalDns { - dataset, - http_address, - dns_address, - nic, - } => { - nic_id = Some(nic.id); - let sockaddr = dns_address - .parse::() - .with_context(|| { - format!( - "parsing address for external DNS server {:?}", - dns_address - ) - })?; - second_service_ip = Some(sockaddr.ip()); - second_service_port = Some(SqlU16::from(sockaddr.port())); - (ZoneType::ExternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalDns { - dataset, - http_address, - dns_address, - gz_address, - gz_address_index, - } => { - dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); - dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); - let sockaddr = dns_address - .parse::() - .with_context(|| { - format!( - "parsing address for internal DNS server {:?}", - dns_address - ) - })?; - second_service_ip = Some(sockaddr.ip()); - second_service_port = Some(SqlU16::from(sockaddr.port())); - (ZoneType::InternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalNtp { - address, - ntp_servers, - dns_servers, - domain, - } => { - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); - (ZoneType::InternalNtp, address, None) - } - OmicronZoneType::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers, - } => { - nic_id = Some(nic.id); - nexus_external_tls = Some(*external_tls); - nexus_external_dns_servers = Some(external_dns_servers.clone()); - second_service_ip = Some(*external_ip); - (ZoneType::Nexus, internal_address, None) - } - OmicronZoneType::Oximeter { address } => { - (ZoneType::Oximeter, address, None) - } - }; - - let dataset_zpool_name = - dataset.map(|d| d.pool_name.as_str().to_string()); - let primary_service_sockaddr = primary_service_sockaddr_str - .parse::() - .with_context(|| { - format!( - "parsing socket address for primary IP {:?}", - primary_service_sockaddr_str - ) - })?; - let (primary_service_ip, primary_service_port) = ( - ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), - SqlU16::from(primary_service_sockaddr.port()), - ); - - Ok(InvOmicronZone { + let zone = OmicronZone::new(sled_id, zone)?; + Ok(Self { inv_collection_id, - sled_id, - id, - underlay_address, - zone_type, - primary_service_ip, - primary_service_port, - second_service_ip: second_service_ip.map(IpNetwork::from), - second_service_port, - dataset_zpool_name, - nic_id, - dns_gz_address, - dns_gz_address_index, - ntp_ntp_servers, - ntp_dns_servers: ntp_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - ntp_domain: ntp_ntp_domain, - nexus_external_tls, - nexus_external_dns_servers: nexus_external_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - snat_ip, - snat_first_port, - snat_last_port, + sled_id: zone.sled_id, + id: zone.id, + underlay_address: zone.underlay_address, + zone_type: zone.zone_type, + primary_service_ip: zone.primary_service_ip, + primary_service_port: zone.primary_service_port, + second_service_ip: zone.second_service_ip, + second_service_port: zone.second_service_port, + dataset_zpool_name: zone.dataset_zpool_name, + nic_id: zone.nic_id, + dns_gz_address: zone.dns_gz_address, + dns_gz_address_index: zone.dns_gz_address_index, + ntp_ntp_servers: zone.ntp_ntp_servers, + ntp_dns_servers: zone.ntp_dns_servers, + ntp_domain: zone.ntp_domain, + nexus_external_tls: zone.nexus_external_tls, + nexus_external_dns_servers: zone.nexus_external_dns_servers, + snat_ip: zone.snat_ip, + snat_first_port: zone.snat_first_port, + snat_last_port: zone.snat_last_port, }) } @@ -916,169 +778,29 @@ impl InvOmicronZone { self, nic_row: Option, ) -> Result { - let address = SocketAddrV6::new( - std::net::Ipv6Addr::from(self.primary_service_ip), - *self.primary_service_port, - 0, - 0, - ) - .to_string(); - - // Assemble a value that we can use to extract the NIC _if necessary_ - // and report an error if it was needed but not found. - // - // Any error here should be impossible. By the time we get here, the - // caller should have provided `nic_row` iff there's a corresponding - // `nic_id` in this row, and the ids should match up. And whoever - // created this row ought to have provided a nic_id iff this type of - // zone needs a NIC. This last issue is not under our control, though, - // so we definitely want to handle that as an operational error. The - // others could arguably be programmer errors (i.e., we could `assert`), - // but it seems excessive to crash here. - // - // Note that we immediately return for any of the caller errors here. - // For the other error, we will return only later, if some code path - // below tries to use `nic` when it's not present. - let nic = match (self.nic_id, nic_row) { - (Some(expected_id), Some(nic_row)) => { - ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); - Ok(nic_row.into_network_interface_for_zone(self.id)?) - } - (None, None) => Err(anyhow!( - "expected zone to have an associated NIC, but it doesn't" - )), - (Some(_), None) => bail!("caller provided no NIC"), - (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), - }; - - // Similarly, assemble a value that we can use to extract the dataset, - // if necessary. We only return this error if code below tries to use - // this value. - let dataset = self - .dataset_zpool_name - .map(|zpool_name| -> Result<_, anyhow::Error> { - Ok(nexus_types::inventory::OmicronZoneDataset { - pool_name: zpool_name.parse().map_err(|e| { - anyhow!("parsing zpool name {:?}: {}", zpool_name, e) - })?, - }) - }) - .transpose()? - .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); - - // Do the same for the DNS server address. - let dns_address = - match (self.second_service_ip, self.second_service_port) { - (Some(dns_ip), Some(dns_port)) => { - Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port) - .to_string()) - } - _ => Err(anyhow!( - "expected second service IP and port, \ - found one missing" - )), - }; - - // Do the same for NTP zone properties. - let ntp_dns_servers = self - .ntp_dns_servers - .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) - .map(|list| { - list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() - }); - let ntp_ntp_servers = - self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); - - let zone_type = match self.zone_type { - ZoneType::BoundaryNtp => { - let snat_cfg = match ( - self.snat_ip, - self.snat_first_port, - self.snat_last_port, - ) { - (Some(ip), Some(first_port), Some(last_port)) => { - nexus_types::inventory::SourceNatConfig { - ip: ip.ip(), - first_port: *first_port, - last_port: *last_port, - } - } - _ => bail!( - "expected non-NULL snat properties, \ - found at least one NULL" - ), - }; - OmicronZoneType::BoundaryNtp { - address, - dns_servers: ntp_dns_servers?, - domain: self.ntp_domain, - nic: nic?, - ntp_servers: ntp_ntp_servers?, - snat_cfg, - } - } - ZoneType::Clickhouse => { - OmicronZoneType::Clickhouse { address, dataset: dataset? } - } - ZoneType::ClickhouseKeeper => { - OmicronZoneType::ClickhouseKeeper { address, dataset: dataset? } - } - ZoneType::CockroachDb => { - OmicronZoneType::CockroachDb { address, dataset: dataset? } - } - ZoneType::Crucible => { - OmicronZoneType::Crucible { address, dataset: dataset? } - } - ZoneType::CruciblePantry => { - OmicronZoneType::CruciblePantry { address } - } - ZoneType::ExternalDns => OmicronZoneType::ExternalDns { - dataset: dataset?, - dns_address: dns_address?, - http_address: address, - nic: nic?, - }, - ZoneType::InternalDns => OmicronZoneType::InternalDns { - dataset: dataset?, - dns_address: dns_address?, - http_address: address, - gz_address: *self.dns_gz_address.ok_or_else(|| { - anyhow!("expected dns_gz_address, found none") - })?, - gz_address_index: *self.dns_gz_address_index.ok_or_else( - || anyhow!("expected dns_gz_address_index, found none"), - )?, - }, - ZoneType::InternalNtp => OmicronZoneType::InternalNtp { - address, - dns_servers: ntp_dns_servers?, - domain: self.ntp_domain, - ntp_servers: ntp_ntp_servers?, - }, - ZoneType::Nexus => OmicronZoneType::Nexus { - internal_address: address, - nic: nic?, - external_tls: self - .nexus_external_tls - .ok_or_else(|| anyhow!("expected 'external_tls'"))?, - external_ip: self - .second_service_ip - .ok_or_else(|| anyhow!("expected second service IP"))? - .ip(), - external_dns_servers: self - .nexus_external_dns_servers - .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? - .into_iter() - .map(|i| i.ip()) - .collect(), - }, - ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, - }; - Ok(nexus_types::inventory::OmicronZoneConfig { + let zone = OmicronZone { + sled_id: self.sled_id, id: self.id, - underlay_address: std::net::Ipv6Addr::from(self.underlay_address), - zone_type, - }) + underlay_address: self.underlay_address, + zone_type: self.zone_type, + primary_service_ip: self.primary_service_ip, + primary_service_port: self.primary_service_port, + second_service_ip: self.second_service_ip, + second_service_port: self.second_service_port, + dataset_zpool_name: self.dataset_zpool_name, + nic_id: self.nic_id, + dns_gz_address: self.dns_gz_address, + dns_gz_address_index: self.dns_gz_address_index, + ntp_ntp_servers: self.ntp_ntp_servers, + ntp_dns_servers: self.ntp_dns_servers, + ntp_domain: self.ntp_domain, + nexus_external_tls: self.nexus_external_tls, + nexus_external_dns_servers: self.nexus_external_dns_servers, + snat_ip: self.snat_ip, + snat_first_port: self.snat_first_port, + snat_last_port: self.snat_last_port, + }; + zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from)) } } @@ -1096,63 +818,45 @@ pub struct InvOmicronZoneNic { slot: SqlU8, } +impl From for OmicronZoneNic { + fn from(value: InvOmicronZoneNic) -> Self { + OmicronZoneNic { + id: value.id, + name: value.name, + ip: value.ip, + mac: value.mac, + subnet: value.subnet, + vni: value.vni, + is_primary: value.is_primary, + slot: value.slot, + } + } +} + impl InvOmicronZoneNic { pub fn new( inv_collection_id: Uuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result, anyhow::Error> { - match &zone.zone_type { - OmicronZoneType::ExternalDns { nic, .. } - | OmicronZoneType::BoundaryNtp { nic, .. } - | OmicronZoneType::Nexus { nic, .. } => { - // We do not bother storing the NIC's kind and associated id - // because it should be inferrable from the other information - // that we have. Verify that here. - ensure!( - matches!( - nic.kind, - nexus_types::inventory::NetworkInterfaceKind::Service( - id - ) if id == zone.id - ), - "expected zone's NIC kind to be \"service\" and the \ - id to match the zone's id ({})", - zone.id - ); - - Ok(Some(InvOmicronZoneNic { - inv_collection_id, - id: nic.id, - name: Name::from(nic.name.clone()), - ip: IpNetwork::from(nic.ip), - mac: MacAddr::from(nic.mac), - subnet: IpNetwork::from(nic.subnet.clone()), - vni: SqlU32::from(u32::from(nic.vni)), - is_primary: nic.primary, - slot: SqlU8::from(nic.slot), - })) - } - _ => Ok(None), - } + let zone_nic = OmicronZoneNic::new(zone)?; + Ok(zone_nic.map(|nic| Self { + inv_collection_id, + id: nic.id, + name: nic.name, + ip: nic.ip, + mac: nic.mac, + subnet: nic.subnet, + vni: nic.vni, + is_primary: nic.is_primary, + slot: nic.slot, + })) } pub fn into_network_interface_for_zone( self, zone_id: Uuid, ) -> Result { - Ok(nexus_types::inventory::NetworkInterface { - id: self.id, - ip: self.ip.ip(), - kind: nexus_types::inventory::NetworkInterfaceKind::Service( - zone_id, - ), - mac: *self.mac, - name: self.name.into(), - primary: self.is_primary, - slot: *self.slot, - vni: omicron_common::api::external::Vni::try_from(*self.vni) - .context("parsing VNI")?, - subnet: self.subnet.into(), - }) + let zone_nic = OmicronZoneNic::from(self); + zone_nic.into_network_interface_for_zone(zone_id) } } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 5c0a68c253..7fa95822a7 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -52,7 +52,9 @@ mod switch_port; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. +mod deployment; mod ipv4_nat_entry; +mod omicron_zone_config; pub mod queries; mod quota; mod rack; @@ -114,6 +116,7 @@ pub use console_session::*; pub use dataset::*; pub use dataset_kind::*; pub use db_metadata::*; +pub use deployment::*; pub use device_auth::*; pub use digest::*; pub use disk::*; diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs new file mode 100644 index 0000000000..f4726ccd92 --- /dev/null +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -0,0 +1,456 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for sharing nontrivial conversions between various `OmicronZoneConfig` +//! database serializations and the corresponding Nexus/sled-agent type +//! +//! Both inventory and deployment have nearly-identical tables to serialize +//! `OmicronZoneConfigs` that are collected or generated, respectively. We +//! expect those tables to diverge over time (e.g., inventory may start +//! collecting extra metadata like uptime). This module provides conversion +//! helpers for the parts of those tables that are common between the two. + +use std::net::SocketAddrV6; + +use crate::inventory::ZoneType; +use crate::{ipv6, MacAddr, Name, SqlU16, SqlU32, SqlU8}; +use anyhow::{anyhow, bail, ensure, Context}; +use ipnetwork::IpNetwork; +use nexus_types::inventory::OmicronZoneType; +use uuid::Uuid; + +#[derive(Debug)] +pub(crate) struct OmicronZone { + pub(crate) sled_id: Uuid, + pub(crate) id: Uuid, + pub(crate) underlay_address: ipv6::Ipv6Addr, + pub(crate) zone_type: ZoneType, + pub(crate) primary_service_ip: ipv6::Ipv6Addr, + pub(crate) primary_service_port: SqlU16, + pub(crate) second_service_ip: Option, + pub(crate) second_service_port: Option, + pub(crate) dataset_zpool_name: Option, + pub(crate) nic_id: Option, + pub(crate) dns_gz_address: Option, + pub(crate) dns_gz_address_index: Option, + pub(crate) ntp_ntp_servers: Option>, + pub(crate) ntp_dns_servers: Option>, + pub(crate) ntp_domain: Option, + pub(crate) nexus_external_tls: Option, + pub(crate) nexus_external_dns_servers: Option>, + pub(crate) snat_ip: Option, + pub(crate) snat_first_port: Option, + pub(crate) snat_last_port: Option, +} + +impl OmicronZone { + pub(crate) fn new( + sled_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> anyhow::Result { + let id = zone.id; + let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); + let mut nic_id = None; + let mut dns_gz_address = None; + let mut dns_gz_address_index = None; + let mut ntp_ntp_servers = None; + let mut ntp_dns_servers = None; + let mut ntp_ntp_domain = None; + let mut nexus_external_tls = None; + let mut nexus_external_dns_servers = None; + let mut snat_ip = None; + let mut snat_first_port = None; + let mut snat_last_port = None; + let mut second_service_ip = None; + let mut second_service_port = None; + + let (zone_type, primary_service_sockaddr_str, dataset) = match &zone + .zone_type + { + OmicronZoneType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + snat_ip = Some(IpNetwork::from(snat_cfg.ip)); + snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); + snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); + nic_id = Some(nic.id); + (ZoneType::BoundaryNtp, address, None) + } + OmicronZoneType::Clickhouse { address, dataset } => { + (ZoneType::Clickhouse, address, Some(dataset)) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + (ZoneType::ClickhouseKeeper, address, Some(dataset)) + } + OmicronZoneType::CockroachDb { address, dataset } => { + (ZoneType::CockroachDb, address, Some(dataset)) + } + OmicronZoneType::Crucible { address, dataset } => { + (ZoneType::Crucible, address, Some(dataset)) + } + OmicronZoneType::CruciblePantry { address } => { + (ZoneType::CruciblePantry, address, None) + } + OmicronZoneType::ExternalDns { + dataset, + http_address, + dns_address, + nic, + } => { + nic_id = Some(nic.id); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for external DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::ExternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + } => { + dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); + dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for internal DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::InternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + (ZoneType::InternalNtp, address, None) + } + OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + } => { + nic_id = Some(nic.id); + nexus_external_tls = Some(*external_tls); + nexus_external_dns_servers = Some(external_dns_servers.clone()); + second_service_ip = Some(*external_ip); + (ZoneType::Nexus, internal_address, None) + } + OmicronZoneType::Oximeter { address } => { + (ZoneType::Oximeter, address, None) + } + }; + + let dataset_zpool_name = + dataset.map(|d| d.pool_name.as_str().to_string()); + let primary_service_sockaddr = primary_service_sockaddr_str + .parse::() + .with_context(|| { + format!( + "parsing socket address for primary IP {:?}", + primary_service_sockaddr_str + ) + })?; + let (primary_service_ip, primary_service_port) = ( + ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), + SqlU16::from(primary_service_sockaddr.port()), + ); + + Ok(Self { + sled_id, + id, + underlay_address, + zone_type, + primary_service_ip, + primary_service_port, + second_service_ip: second_service_ip.map(IpNetwork::from), + second_service_port, + dataset_zpool_name, + nic_id, + dns_gz_address, + dns_gz_address_index, + ntp_ntp_servers, + ntp_dns_servers: ntp_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + ntp_domain: ntp_ntp_domain, + nexus_external_tls, + nexus_external_dns_servers: nexus_external_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + snat_ip, + snat_first_port, + snat_last_port, + }) + } + + pub(crate) fn into_omicron_zone_config( + self, + nic_row: Option, + ) -> anyhow::Result { + let address = SocketAddrV6::new( + std::net::Ipv6Addr::from(self.primary_service_ip), + *self.primary_service_port, + 0, + 0, + ) + .to_string(); + + // Assemble a value that we can use to extract the NIC _if necessary_ + // and report an error if it was needed but not found. + // + // Any error here should be impossible. By the time we get here, the + // caller should have provided `nic_row` iff there's a corresponding + // `nic_id` in this row, and the ids should match up. And whoever + // created this row ought to have provided a nic_id iff this type of + // zone needs a NIC. This last issue is not under our control, though, + // so we definitely want to handle that as an operational error. The + // others could arguably be programmer errors (i.e., we could `assert`), + // but it seems excessive to crash here. + // + // Note that we immediately return for any of the caller errors here. + // For the other error, we will return only later, if some code path + // below tries to use `nic` when it's not present. + let nic = match (self.nic_id, nic_row) { + (Some(expected_id), Some(nic_row)) => { + ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); + Ok(nic_row.into_network_interface_for_zone(self.id)?) + } + // We don't expect and don't have a NIC. This is reasonable, so we + // don't `bail!` like we do in the next two cases, but we also + // _don't have a NIC_. Put an error into `nic`, and then if we land + // in a zone below that expects one, we'll fail then. + (None, None) => Err(anyhow!( + "expected zone to have an associated NIC, but it doesn't" + )), + (Some(_), None) => bail!("caller provided no NIC"), + (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), + }; + + // Similarly, assemble a value that we can use to extract the dataset, + // if necessary. We only return this error if code below tries to use + // this value. + let dataset = self + .dataset_zpool_name + .map(|zpool_name| -> Result<_, anyhow::Error> { + Ok(nexus_types::inventory::OmicronZoneDataset { + pool_name: zpool_name.parse().map_err(|e| { + anyhow!("parsing zpool name {:?}: {}", zpool_name, e) + })?, + }) + }) + .transpose()? + .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); + + // Do the same for the DNS server address. + let dns_address = + match (self.second_service_ip, self.second_service_port) { + (Some(dns_ip), Some(dns_port)) => { + Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port) + .to_string()) + } + _ => Err(anyhow!( + "expected second service IP and port, \ + found one missing" + )), + }; + + // Do the same for NTP zone properties. + let ntp_dns_servers = self + .ntp_dns_servers + .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) + .map(|list| { + list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() + }); + let ntp_ntp_servers = + self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); + + let zone_type = match self.zone_type { + ZoneType::BoundaryNtp => { + let snat_cfg = match ( + self.snat_ip, + self.snat_first_port, + self.snat_last_port, + ) { + (Some(ip), Some(first_port), Some(last_port)) => { + nexus_types::inventory::SourceNatConfig { + ip: ip.ip(), + first_port: *first_port, + last_port: *last_port, + } + } + _ => bail!( + "expected non-NULL snat properties, \ + found at least one NULL" + ), + }; + OmicronZoneType::BoundaryNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + nic: nic?, + ntp_servers: ntp_ntp_servers?, + snat_cfg, + } + } + ZoneType::Clickhouse => { + OmicronZoneType::Clickhouse { address, dataset: dataset? } + } + ZoneType::ClickhouseKeeper => { + OmicronZoneType::ClickhouseKeeper { address, dataset: dataset? } + } + ZoneType::CockroachDb => { + OmicronZoneType::CockroachDb { address, dataset: dataset? } + } + ZoneType::Crucible => { + OmicronZoneType::Crucible { address, dataset: dataset? } + } + ZoneType::CruciblePantry => { + OmicronZoneType::CruciblePantry { address } + } + ZoneType::ExternalDns => OmicronZoneType::ExternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + nic: nic?, + }, + ZoneType::InternalDns => OmicronZoneType::InternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + gz_address: *self.dns_gz_address.ok_or_else(|| { + anyhow!("expected dns_gz_address, found none") + })?, + gz_address_index: *self.dns_gz_address_index.ok_or_else( + || anyhow!("expected dns_gz_address_index, found none"), + )?, + }, + ZoneType::InternalNtp => OmicronZoneType::InternalNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + ntp_servers: ntp_ntp_servers?, + }, + ZoneType::Nexus => OmicronZoneType::Nexus { + internal_address: address, + nic: nic?, + external_tls: self + .nexus_external_tls + .ok_or_else(|| anyhow!("expected 'external_tls'"))?, + external_ip: self + .second_service_ip + .ok_or_else(|| anyhow!("expected second service IP"))? + .ip(), + external_dns_servers: self + .nexus_external_dns_servers + .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? + .into_iter() + .map(|i| i.ip()) + .collect(), + }, + ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, + }; + Ok(nexus_types::inventory::OmicronZoneConfig { + id: self.id, + underlay_address: std::net::Ipv6Addr::from(self.underlay_address), + zone_type, + }) + } +} + +#[derive(Debug)] +pub(crate) struct OmicronZoneNic { + pub(crate) id: Uuid, + pub(crate) name: Name, + pub(crate) ip: IpNetwork, + pub(crate) mac: MacAddr, + pub(crate) subnet: IpNetwork, + pub(crate) vni: SqlU32, + pub(crate) is_primary: bool, + pub(crate) slot: SqlU8, +} + +impl OmicronZoneNic { + pub(crate) fn new( + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> anyhow::Result> { + match &zone.zone_type { + OmicronZoneType::ExternalDns { nic, .. } + | OmicronZoneType::BoundaryNtp { nic, .. } + | OmicronZoneType::Nexus { nic, .. } => { + // We do not bother storing the NIC's kind and associated id + // because it should be inferrable from the other information + // that we have. Verify that here. + ensure!( + matches!( + nic.kind, + nexus_types::inventory::NetworkInterfaceKind::Service( + id + ) if id == zone.id + ), + "expected zone's NIC kind to be \"service\" and the \ + id to match the zone's id ({})", + zone.id + ); + + Ok(Some(Self { + id: nic.id, + name: Name::from(nic.name.clone()), + ip: IpNetwork::from(nic.ip), + mac: MacAddr::from(nic.mac), + subnet: IpNetwork::from(nic.subnet.clone()), + vni: SqlU32::from(u32::from(nic.vni)), + is_primary: nic.primary, + slot: SqlU8::from(nic.slot), + })) + } + _ => Ok(None), + } + } + + pub(crate) fn into_network_interface_for_zone( + self, + zone_id: Uuid, + ) -> anyhow::Result { + Ok(nexus_types::inventory::NetworkInterface { + id: self.id, + ip: self.ip.ip(), + kind: nexus_types::inventory::NetworkInterfaceKind::Service( + zone_id, + ), + mac: *self.mac, + name: self.name.into(), + primary: self.is_primary, + slot: *self.slot, + vni: omicron_common::api::external::Vni::try_from(*self.vni) + .context("parsing VNI")?, + subnet: self.subnet.into(), + }) + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index eb71a12f04..ddb5ba8e03 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(27, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(28, 0, 0); table! { disk (id) { @@ -1388,6 +1388,89 @@ table! { } } +/* blueprints */ + +table! { + blueprint (id) { + id -> Uuid, + + parent_blueprint_id -> Nullable, + + time_created -> Timestamptz, + creator -> Text, + comment -> Text, + } +} + +table! { + bp_target (version) { + version -> Int8, + + blueprint_id -> Uuid, + + enabled -> Bool, + time_made_target -> Timestamptz, + } +} + +table! { + bp_sled_omicron_zones (blueprint_id, sled_id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + generation -> Int8, + } +} + +table! { + bp_omicron_zone (blueprint_id, id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + id -> Uuid, + underlay_address -> Inet, + zone_type -> crate::ZoneTypeEnum, + + primary_service_ip -> Inet, + primary_service_port -> Int4, + second_service_ip -> Nullable, + second_service_port -> Nullable, + dataset_zpool_name -> Nullable, + bp_nic_id -> Nullable, + dns_gz_address -> Nullable, + dns_gz_address_index -> Nullable, + ntp_ntp_servers -> Nullable>, + ntp_dns_servers -> Nullable>, + ntp_domain -> Nullable, + nexus_external_tls -> Nullable, + nexus_external_dns_servers -> Nullable>, + snat_ip -> Nullable, + snat_first_port -> Nullable, + snat_last_port -> Nullable, + } +} + +table! { + bp_omicron_zone_nic (blueprint_id, id) { + blueprint_id -> Uuid, + id -> Uuid, + name -> Text, + ip -> Inet, + mac -> Int8, + subnet -> Inet, + vni -> Int8, + is_primary -> Bool, + slot -> Int2, + } +} + +table! { + bp_omicron_zones_not_in_service (blueprint_id, bp_omicron_zone_id) { + blueprint_id -> Uuid, + bp_omicron_zone_id -> Uuid, + } +} + table! { bootstore_keys (key, generation) { key -> Text, diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 3240c54f3f..9cdcc88e6a 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -64,8 +64,10 @@ camino-tempfile.workspace = true expectorate.workspace = true hyper-rustls.workspace = true gateway-client.workspace = true +illumos-utils.workspace = true internal-dns.workspace = true itertools.workspace = true +nexus-deployment.workspace = true nexus-inventory.workspace = true nexus-test-utils.workspace = true omicron-sled-agent.workspace = true diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs new file mode 100644 index 0000000000..72adb1d3df --- /dev/null +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -0,0 +1,1583 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::DataStore; +use crate::authz; +use crate::authz::ApiResource; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; +use crate::db::DbConnection; +use crate::db::TransactionError; +use anyhow::Context; +use async_bb8_diesel::AsyncConnection; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::DateTime; +use chrono::Utc; +use diesel::expression::SelectableHelper; +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; +use diesel::result::DatabaseErrorKind; +use diesel::result::Error as DieselError; +use diesel::sql_types; +use diesel::Column; +use diesel::ExpressionMethods; +use diesel::OptionalExtension; +use diesel::QueryDsl; +use diesel::RunQueryDsl; +use nexus_db_model::Blueprint as DbBlueprint; +use nexus_db_model::BpOmicronZone; +use nexus_db_model::BpOmicronZoneNic; +use nexus_db_model::BpOmicronZoneNotInService; +use nexus_db_model::BpSledOmicronZones; +use nexus_db_model::BpTarget; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::BlueprintTarget; +use nexus_types::deployment::OmicronZonesConfig; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; +use omicron_common::bail_unless; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::num::NonZeroU32; +use uuid::Uuid; + +/// "limit" used in SQL queries that paginate through all sleds, omicron +/// zones, etc. +/// +/// While we always load an entire blueprint in one operation, we use a +/// [`Paginator`] to guard against single queries returning an unchecked number +/// of rows. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; + +impl DataStore { + /// List blueprints + pub async fn blueprints_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::blueprint; + + opctx + .authorize(authz::Action::ListChildren, &authz::BLUEPRINT_CONFIG) + .await?; + + let blueprints = paginated(blueprint::table, blueprint::id, pagparams) + .select(DbBlueprint::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(blueprints.into_iter().map(BlueprintMetadata::from).collect()) + } + + /// Store a complete blueprint into the database + pub async fn blueprint_insert( + &self, + opctx: &OpContext, + blueprint: &Blueprint, + ) -> Result<(), Error> { + opctx + .authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG) + .await?; + + // In the database, the blueprint is represented essentially as a tree + // rooted at a `blueprint` row. Other nodes in the tree point + // back at the `blueprint` via `blueprint_id`. + // + // It's helpful to assemble some values before entering the transaction + // so that we can produce the `Error` type that we want here. + let row_blueprint = DbBlueprint::from(blueprint); + let blueprint_id = row_blueprint.id; + let sled_omicron_zones = blueprint + .omicron_zones + .iter() + .map(|(sled_id, zones_config)| { + BpSledOmicronZones::new(blueprint_id, *sled_id, zones_config) + }) + .collect::>(); + let omicron_zones = blueprint + .omicron_zones + .iter() + .flat_map(|(sled_id, zones_config)| { + zones_config.zones.iter().map(|zone| { + BpOmicronZone::new(blueprint_id, *sled_id, zone) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) + }) + }) + .collect::, Error>>()?; + let omicron_zone_nics = blueprint + .omicron_zones + .values() + .flat_map(|zones_config| { + zones_config.zones.iter().filter_map(|zone| { + BpOmicronZoneNic::new(blueprint_id, zone) + .with_context(|| format!("zone {:?}", zone.id)) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) + .transpose() + }) + }) + .collect::, _>>()?; + + // `Blueprint` stores a set of zones in service, but in the database we + // store the set of zones NOT in service (which we expect to be much + // smaller, often empty). Build that inverted set here. + let omicron_zones_not_in_service = { + let mut zones_not_in_service = Vec::new(); + for zone in &omicron_zones { + if !blueprint.zones_in_service.contains(&zone.id) { + zones_not_in_service.push(BpOmicronZoneNotInService { + blueprint_id, + bp_omicron_zone_id: zone.id, + }); + } + } + zones_not_in_service + }; + + // This implementation inserts all records associated with the + // blueprint in one transaction. This is required: we don't want + // any planner or executor to see a half-inserted blueprint, nor do we + // want to leave a partial blueprint around if we crash. However, it + // does mean this is likely to be a big transaction and if that becomes + // a problem we could break this up as long as we address those + // problems. + // + // The SQL here is written so that it doesn't have to be an + // *interactive* transaction. That is, it should in principle be + // possible to generate all this SQL up front and send it as one big + // batch rather than making a bunch of round-trips to the database. + // We'd do that if we had an interface for doing that with bound + // parameters, etc. See oxidecomputer/omicron#973. + let pool = self.pool_connection_authorized(opctx).await?; + pool.transaction_async(|conn| async move { + // Insert the row for the blueprint. + { + use db::schema::blueprint::dsl; + let _: usize = diesel::insert_into(dsl::blueprint) + .values(row_blueprint) + .execute_async(&conn) + .await?; + } + + // Insert all the Omicron zones for this blueprint. + { + use db::schema::bp_sled_omicron_zones::dsl as sled_zones; + let _ = diesel::insert_into(sled_zones::bp_sled_omicron_zones) + .values(sled_omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zone::dsl as omicron_zone; + let _ = diesel::insert_into(omicron_zone::bp_omicron_zone) + .values(omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zone_nic::dsl as omicron_zone_nic; + let _ = + diesel::insert_into(omicron_zone_nic::bp_omicron_zone_nic) + .values(omicron_zone_nics) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zones_not_in_service::dsl; + let _ = + diesel::insert_into(dsl::bp_omicron_zones_not_in_service) + .values(omicron_zones_not_in_service) + .execute_async(&conn) + .await?; + } + + Ok(()) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + info!( + &opctx.log, + "inserted blueprint"; + "blueprint_id" => %blueprint.id, + ); + + Ok(()) + } + + /// Read a complete blueprint from the database + pub async fn blueprint_read( + &self, + opctx: &OpContext, + authz_blueprint: &authz::Blueprint, + ) -> Result { + opctx.authorize(authz::Action::Read, authz_blueprint).await?; + let conn = self.pool_connection_authorized(opctx).await?; + let blueprint_id = authz_blueprint.id(); + + // Read the metadata from the primary blueprint row, and ensure that it + // exists. + let (parent_blueprint_id, time_created, creator, comment) = { + use db::schema::blueprint::dsl; + + let Some(blueprint) = dsl::blueprint + .filter(dsl::id.eq(blueprint_id)) + .select(DbBlueprint::as_select()) + .get_result_async(&*conn) + .await + .optional() + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + else { + return Err(authz_blueprint.not_found()); + }; + + ( + blueprint.parent_blueprint_id, + blueprint.time_created, + blueprint.creator, + blueprint.comment, + ) + }; + + // Read this blueprint's `bp_sled_omicron_zones` rows, which describes + // the `OmicronZonesConfig` generation number for each sled that is a + // part of this blueprint. Construct the BTreeMap we ultimately need, + // but all the `zones` vecs will be empty until our next query below. + let mut omicron_zones: BTreeMap = { + use db::schema::bp_sled_omicron_zones::dsl; + + let mut omicron_zones = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_sled_omicron_zones, + dsl::sled_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpSledOmicronZones::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|s| s.sled_id); + + for s in batch { + let old = omicron_zones.insert( + s.sled_id, + OmicronZonesConfig { + generation: *s.generation, + zones: Vec::new(), + }, + ); + bail_unless!( + old.is_none(), + "found duplicate sled ID in bp_sled_omicron_zones: {}", + s.sled_id + ); + } + } + + omicron_zones + }; + + // Assemble a mutable map of all the NICs found, by NIC id. As we + // match these up with the corresponding zone below, we'll remove items + // from this set. That way we can tell if the same NIC was used twice + // or not used at all. + let mut omicron_zone_nics = { + use db::schema::bp_omicron_zone_nic::dsl; + + let mut omicron_zone_nics = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_omicron_zone_nic, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZoneNic::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|n| n.id); + + for n in batch { + let nic_id = n.id; + let old = omicron_zone_nics.insert(nic_id, n); + bail_unless!( + old.is_none(), + "found duplicate NIC ID in bp_omicron_zone_nic: {}", + nic_id, + ); + } + } + + omicron_zone_nics + }; + + // Load the list of not-in-service zones. Similar to NICs, we'll use a + // mutable set of zone IDs so we can tell if a zone we expected to be + // inactive wasn't present in the blueprint at all. + let mut omicron_zones_not_in_service = { + use db::schema::bp_omicron_zones_not_in_service::dsl; + + let mut omicron_zones_not_in_service = BTreeSet::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_omicron_zones_not_in_service, + dsl::bp_omicron_zone_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZoneNotInService::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|z| z.bp_omicron_zone_id); + + for z in batch { + let inserted = omicron_zones_not_in_service + .insert(z.bp_omicron_zone_id); + bail_unless!( + inserted, + "found duplicate zone ID in \ + bp_omicron_zones_not_in_service: {}", + z.bp_omicron_zone_id, + ); + } + } + + omicron_zones_not_in_service + }; + + // Create the in-memory list of zones _in_ service, which we'll + // calculate below as we load zones. (Any zone that isn't present in + // `omicron_zones_not_in_service` is considered in service.) + let mut zones_in_service = BTreeSet::new(); + + // Load all the zones for each sled. + { + use db::schema::bp_omicron_zone::dsl; + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + // `paginated` implicitly orders by our `id`, which is also + // handy for testing: the zones are always consistently ordered + let batch = paginated( + dsl::bp_omicron_zone, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZone::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|z| z.id); + + for z in batch { + let nic_row = z + .bp_nic_id + .map(|id| { + // This error means that we found a row in + // bp_omicron_zone that references a NIC by id but + // there's no corresponding row in + // bp_omicron_zone_nic with that id. This should be + // impossible and reflects either a bug or database + // corruption. + omicron_zone_nics.remove(&id).ok_or_else(|| { + Error::internal_error(&format!( + "zone {:?}: expected to find NIC {:?}, \ + but didn't", + z.id, z.bp_nic_id + )) + }) + }) + .transpose()?; + let sled_zones = + omicron_zones.get_mut(&z.sled_id).ok_or_else(|| { + // This error means that we found a row in + // bp_omicron_zone with no associated record in + // bp_sled_omicron_zones. This should be + // impossible and reflects either a bug or database + // corruption. + Error::internal_error(&format!( + "zone {:?}: unknown sled: {:?}", + z.id, z.sled_id + )) + })?; + let zone_id = z.id; + let zone = z + .into_omicron_zone_config(nic_row) + .with_context(|| { + format!("zone {:?}: parse from database", zone_id) + }) + .map_err(|e| { + Error::internal_error(&format!( + "{:#}", + e.to_string() + )) + })?; + sled_zones.zones.push(zone); + + // If we can remove `zone_id` from + // `omicron_zones_not_in_service`, then the zone is not in + // service. Otherwise, add it to the list of in-service + // zones. + if !omicron_zones_not_in_service.remove(&zone_id) { + zones_in_service.insert(zone_id); + } + } + } + } + + bail_unless!( + omicron_zone_nics.is_empty(), + "found extra Omicron zone NICs: {:?}", + omicron_zone_nics.keys() + ); + bail_unless!( + omicron_zones_not_in_service.is_empty(), + "found extra Omicron zones not in service: {:?}", + omicron_zones_not_in_service, + ); + + Ok(Blueprint { + id: blueprint_id, + omicron_zones, + zones_in_service, + parent_blueprint_id, + time_created, + creator, + comment, + }) + } + + /// Delete a blueprint from the database + pub async fn blueprint_delete( + &self, + opctx: &OpContext, + authz_blueprint: &authz::Blueprint, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Delete, authz_blueprint).await?; + let blueprint_id = authz_blueprint.id(); + + // As with inserting a whole blueprint, we remove it in one big + // transaction. Similar considerations apply. We could + // break it up if these transactions become too big. But we'd need a + // way to stop other clients from discovering a collection after we + // start removing it and we'd also need to make sure we didn't leak a + // collection if we crash while deleting it. + let conn = self.pool_connection_authorized(opctx).await?; + + let ( + nblueprints, + nsled_agent_zones, + nzones, + nnics, + nzones_not_in_service, + ) = conn + .transaction_async(|conn| async move { + // Ensure that blueprint we're about to delete is not the + // current target. + let current_target = + self.blueprint_current_target_only(&conn).await?; + if let Some(current_target) = current_target { + if current_target.target_id == blueprint_id { + return Err(TransactionError::CustomError( + Error::conflict(format!( + "blueprint {blueprint_id} is the \ + current target and cannot be deleted", + )), + )); + } + } + + // Remove the record describing the blueprint itself. + let nblueprints = { + use db::schema::blueprint::dsl; + diesel::delete( + dsl::blueprint.filter(dsl::id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + // Bail out if this blueprint didn't exist; there won't be + // references to it in any of the remaining tables either, since + // deletion always goes through this transaction. + if nblueprints == 0 { + return Err(TransactionError::CustomError( + authz_blueprint.not_found(), + )); + } + + // Remove rows associated with Omicron zones + let nsled_agent_zones = { + use db::schema::bp_sled_omicron_zones::dsl; + diesel::delete( + dsl::bp_sled_omicron_zones + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nzones = { + use db::schema::bp_omicron_zone::dsl; + diesel::delete( + dsl::bp_omicron_zone + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nnics = { + use db::schema::bp_omicron_zone_nic::dsl; + diesel::delete( + dsl::bp_omicron_zone_nic + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nzones_not_in_service = { + use db::schema::bp_omicron_zones_not_in_service::dsl; + diesel::delete( + dsl::bp_omicron_zones_not_in_service + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + Ok(( + nblueprints, + nsled_agent_zones, + nzones, + nnics, + nzones_not_in_service, + )) + }) + .await + .map_err(|error| match error { + TransactionError::CustomError(e) => e, + TransactionError::Database(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; + + info!(&opctx.log, "removed blueprint"; + "blueprint_id" => blueprint_id.to_string(), + "nblueprints" => nblueprints, + "nsled_agent_zones" => nsled_agent_zones, + "nzones" => nzones, + "nnics" => nnics, + "nzones_not_in_service" => nzones_not_in_service, + ); + + Ok(()) + } + + /// Set the current target blueprint + /// + /// In order to become the target blueprint, `target`'s parent blueprint + /// must be the current target + pub async fn blueprint_target_set_current( + &self, + opctx: &OpContext, + target: BlueprintTarget, + ) -> Result<(), Error> { + opctx + .authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG) + .await?; + + let query = InsertTargetQuery { + target_id: target.target_id, + enabled: target.enabled, + time_made_target: target.time_made_target, + }; + + let conn = self.pool_connection_authorized(opctx).await?; + + query + .execute_async(&*conn) + .await + .map_err(|e| Error::from(query.decode_error(e)))?; + + Ok(()) + } + + /// Get the current target blueprint, if one exists + /// + /// Returns both the metadata about the target and the full blueprint + /// contents. If you only need the target metadata, use + /// `blueprint_target_get_current` instead. + pub async fn blueprint_target_get_current_full( + &self, + opctx: &OpContext, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; + + let conn = self.pool_connection_authorized(opctx).await?; + let Some(target) = self.blueprint_current_target_only(&conn).await? + else { + return Ok(None); + }; + + // The blueprint for the current target cannot be deleted while it is + // the current target, but it's possible someone else (a) made a new + // blueprint the target and (b) deleted the blueprint pointed to by our + // `target` between the above query and the below query. In such a case, + // this query will fail with an "unknown blueprint ID" error. This + // should be rare in practice. + let authz_blueprint = authz_blueprint_from_id(target.target_id); + let blueprint = self.blueprint_read(opctx, &authz_blueprint).await?; + + Ok(Some((target, blueprint))) + } + + /// Get the current target blueprint, if one exists + pub async fn blueprint_target_get_current( + &self, + opctx: &OpContext, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; + let conn = self.pool_connection_authorized(opctx).await?; + self.blueprint_current_target_only(&conn).await + } + + // Helper to fetch the current blueprint target (without fetching the entire + // blueprint for that target). + // + // Caller is responsible for checking authz for this operation. + async fn blueprint_current_target_only( + &self, + conn: &async_bb8_diesel::Connection, + ) -> Result, Error> { + use db::schema::bp_target::dsl; + + let current_target = dsl::bp_target + .order_by(dsl::version.desc()) + .first_async::(conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(current_target.map(BlueprintTarget::from)) + } +} + +// Helper to create an `authz::Blueprint` for a specific blueprint ID +fn authz_blueprint_from_id(blueprint_id: Uuid) -> authz::Blueprint { + authz::Blueprint::new( + authz::FLEET, + blueprint_id, + LookupType::ById(blueprint_id), + ) +} + +/// Errors related to inserting a target blueprint +#[derive(Debug)] +enum InsertTargetError { + /// The requested target blueprint ID does not exist in the blueprint table. + NoSuchBlueprint(Uuid), + /// The requested target blueprint's parent does not match the current + /// target. + ParentNotTarget(Uuid), + /// Any other error + Other(DieselError), +} + +impl From for Error { + fn from(value: InsertTargetError) -> Self { + match value { + InsertTargetError::NoSuchBlueprint(id) => { + Error::not_found_by_id(ResourceType::Blueprint, &id) + } + InsertTargetError::ParentNotTarget(id) => { + Error::invalid_request(format!( + "Blueprint {id}'s parent blueprint is not the current \ + target blueprint" + )) + } + InsertTargetError::Other(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + } + } +} + +/// Query to insert a new current target blueprint. +/// +/// The `bp_target` table's primary key is the `version` field, and we enforce +/// the following invariants: +/// +/// * The first "current target" blueprint is assigned version 1. +/// * In order to be inserted as the first current target blueprint, a +/// blueprint must have a parent_blueprint_id of NULL. +/// * After the first, any subsequent blueprint can only be assigned as the +/// current target if its parent_blueprint_id is the current target blueprint. +/// * When inserting a new child blueprint as the current target, it is assigned +/// a version of 1 + its parent's version. +/// +/// The result of this is a linear history of blueprints, where each target is a +/// direct child of the previous current target. Enforcing the above has some +/// subtleties (particularly around handling the "first blueprint with no +/// parent" case). These are expanded on below through inline comments on the +/// query we generate: +/// +/// ```sql +/// WITH +/// -- Subquery to fetch the current target (i.e., the row with the max +/// -- veresion in `bp_target`). +/// current_target AS ( +/// SELECT +/// "version" AS version, +/// "blueprint_id" AS blueprint_id +/// FROM "bp_target" +/// ORDER BY "version" DESC +/// LIMIT 1 +/// ), +/// +/// -- Error checking subquery: This uses similar tricks as elsewhere in +/// -- this crate to `CAST(... AS UUID)` with non-UUID values that result +/// -- in runtime errors in specific cases, allowing us to give accurate +/// -- error messages. +/// -- +/// -- These checks are not required for correct behavior by the insert +/// -- below. If we removed them, the insert would insert 0 rows if +/// -- these checks would have failed. But they make it easier to report +/// -- specific problems to our caller. +/// -- +/// -- The specific cases we check here are noted below. +/// check_validity AS MATERIALIZED ( +/// SELECT CAST(IF( +/// -- Return `no-such-blueprint` if the ID we're being told to +/// -- set as the target doesn't exist in the blueprint table. +/// (SELECT "id" FROM "blueprint" WHERE "id" = ) IS NULL, +/// 'no-such-blueprint', +/// IF( +/// -- Check for whether our new target's parent matches our current +/// -- target. There are two cases here: The first is the common case +/// -- (i.e., the new target has a parent: does it match the current +/// -- target ID?). The second is the bootstrapping check: if we're +/// -- trying to insert a new target that does not have a parent, +/// -- we should not have a current target at all. +/// -- +/// -- If either of these cases fails, we return `parent-not-target`. +/// ( +/// SELECT "parent_blueprint_id" FROM "blueprint", current_target +/// WHERE +/// "id" = +/// AND current_target.blueprint_id = "parent_blueprint_id" +/// ) IS NOT NULL +/// OR +/// ( +/// SELECT 1 FROM "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NULL +/// AND NOT EXISTS (SELECT version FROM current_target) +/// ) = 1, +/// , +/// 'parent-not-target' +/// ) +/// ) AS UUID) +/// ), +/// +/// -- Determine the new version number to use: either 1 if this is the +/// -- first blueprint being made the current target, or 1 higher than +/// -- the previous target's version. +/// -- +/// -- The final clauses of each of these WHERE clauses repeat the +/// -- checks performed above in `check_validity`, and will cause this +/// -- subquery to return no rows if we should not allow the new +/// -- target to be set. +/// new_target AS ( +/// SELECT 1 AS new_version FROM "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NULL +/// AND NOT EXISTS (SELECT version FROM current_target) +/// UNION +/// SELECT current_target.version + 1 FROM current_target, "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NOT NULL +/// AND "parent_blueprint_id" = current_target.blueprint_id +/// ) +/// +/// -- Perform the actual insertion. +/// INSERT INTO "bp_target"( +/// "version","blueprint_id","enabled","time_made_target" +/// ) +/// SELECT +/// new_target.new_version, +/// , +/// , +/// +/// FROM new_target +/// ``` +#[derive(Debug, Clone, Copy)] +struct InsertTargetQuery { + target_id: Uuid, + enabled: bool, + time_made_target: DateTime, +} + +// Uncastable sentinel used to detect we attempt to make a blueprint the target +// when it does not exist in the blueprint table. +const NO_SUCH_BLUEPRINT_SENTINEL: &str = "no-such-blueprint"; + +// Uncastable sentinel used to detect we attempt to make a blueprint the target +// when its parent_blueprint_id is not the current target. +const PARENT_NOT_TARGET_SENTINEL: &str = "parent-not-target"; + +// Error messages generated from the above sentinel values. +const NO_SUCH_BLUEPRINT_ERROR_MESSAGE: &str = + "could not parse \"no-such-blueprint\" as type uuid: \ + uuid: incorrect UUID length: no-such-blueprint"; +const PARENT_NOT_TARGET_ERROR_MESSAGE: &str = + "could not parse \"parent-not-target\" as type uuid: \ + uuid: incorrect UUID length: parent-not-target"; + +impl InsertTargetQuery { + fn decode_error(&self, err: DieselError) -> InsertTargetError { + match err { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == NO_SUCH_BLUEPRINT_ERROR_MESSAGE => + { + InsertTargetError::NoSuchBlueprint(self.target_id) + } + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == PARENT_NOT_TARGET_ERROR_MESSAGE => + { + InsertTargetError::ParentNotTarget(self.target_id) + } + other => InsertTargetError::Other(other), + } + } +} + +impl QueryId for InsertTargetQuery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for InsertTargetQuery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + use crate::db::schema::blueprint::dsl as bp_dsl; + use crate::db::schema::bp_target::dsl; + + type FromClause = + diesel::internal::table_macro::StaticQueryFragmentInstance; + type BpTargetFromClause = FromClause; + type BlueprintFromClause = FromClause; + const BP_TARGET_FROM_CLAUSE: BpTargetFromClause = + BpTargetFromClause::new(); + const BLUEPRINT_FROM_CLAUSE: BlueprintFromClause = + BlueprintFromClause::new(); + + out.push_sql("WITH "); + + out.push_sql("current_target AS (SELECT "); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(" AS version,"); + out.push_identifier(dsl::blueprint_id::NAME)?; + out.push_sql(" AS blueprint_id FROM "); + BP_TARGET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" ORDER BY "); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(" DESC LIMIT 1),"); + + out.push_sql( + "check_validity AS MATERIALIZED ( \ + SELECT \ + CAST( \ + IF( \ + (SELECT ", + ); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(") IS NULL, "); + out.push_bind_param::( + &NO_SUCH_BLUEPRINT_SENTINEL, + )?; + out.push_sql( + ", \ + IF( \ + (SELECT ", + ); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(", current_target WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND current_target.blueprint_id = "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " ) IS NOT NULL \ + OR \ + (SELECT 1 FROM ", + ); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " IS NULL \ + AND NOT EXISTS ( \ + SELECT version FROM current_target) \ + ) = 1, ", + ); + out.push_bind_param::(&self.target_id)?; + out.push_sql(", "); + out.push_bind_param::( + &PARENT_NOT_TARGET_SENTINEL, + )?; + out.push_sql( + " ) \ + ) \ + AS UUID) \ + ), ", + ); + + out.push_sql("new_target AS (SELECT 1 AS new_version FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " IS NULL \ + AND NOT EXISTS \ + (SELECT version FROM current_target) \ + UNION \ + SELECT current_target.version + 1 FROM \ + current_target, ", + ); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" IS NOT NULL AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" = current_target.blueprint_id) "); + + out.push_sql("INSERT INTO "); + BP_TARGET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql("("); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::blueprint_id::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::enabled::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::time_made_target::NAME)?; + out.push_sql(") SELECT new_target.new_version, "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(","); + out.push_bind_param::(&self.enabled)?; + out.push_sql(","); + out.push_bind_param::>( + &self.time_made_target, + )?; + out.push_sql(" FROM new_target"); + + Ok(()) + } +} + +impl RunQueryDsl for InsertTargetQuery {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::datastore_test; + use nexus_deployment::blueprint_builder::BlueprintBuilder; + use nexus_deployment::blueprint_builder::Ensure; + use nexus_inventory::now_db_precision; + use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::Policy; + use nexus_types::deployment::SledResources; + use nexus_types::inventory::Collection; + use omicron_common::address::Ipv6Subnet; + use omicron_test_utils::dev; + use rand::thread_rng; + use rand::Rng; + use std::mem; + use std::net::Ipv6Addr; + + static EMPTY_POLICY: Policy = Policy { sleds: BTreeMap::new() }; + + // This is a not-super-future-maintainer-friendly helper to check that all + // the subtables related to blueprints have been pruned of a specific + // blueprint ID. If additional blueprint tables are added in the future, + // this function will silently ignore them unless they're manually added. + async fn ensure_blueprint_fully_deleted( + datastore: &DataStore, + blueprint_id: Uuid, + ) { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + macro_rules! query_count { + ($table:ident, $blueprint_id_col:ident) => {{ + use db::schema::$table::dsl; + let result = dsl::$table + .filter(dsl::$blueprint_id_col.eq(blueprint_id)) + .count() + .get_result_async(&*conn) + .await; + (stringify!($table), result) + }}; + } + + for (table_name, result) in [ + query_count!(blueprint, id), + query_count!(bp_omicron_zone, blueprint_id), + query_count!(bp_omicron_zone_nic, blueprint_id), + query_count!(bp_omicron_zones_not_in_service, blueprint_id), + ] { + let count: i64 = result.unwrap(); + assert_eq!( + count, 0, + "nonzero row count for blueprint \ + {blueprint_id} in table {table_name}" + ); + } + } + + // Create a fake set of `SledResources`, either with a subnet matching + // `ip` or with an arbitrary one. + fn fake_sled_resources(ip: Option) -> SledResources { + use illumos_utils::zpool::ZpoolName; + let zpools = (0..4) + .map(|_| { + let name = ZpoolName::new_external(Uuid::new_v4()).to_string(); + name.parse().unwrap() + }) + .collect(); + let ip = ip.unwrap_or_else(|| thread_rng().gen::().into()); + SledResources { zpools, subnet: Ipv6Subnet::new(ip) } + } + + // Create a `Policy` that contains all the sleds found in `collection` + fn policy_from_collection(collection: &Collection) -> Policy { + Policy { + sleds: collection + .sled_agents + .iter() + .map(|(sled_id, agent)| { + // `Collection` doesn't currently hold zpool names, so + // we'll construct fake resources for each sled. + ( + *sled_id, + fake_sled_resources(Some( + *agent.sled_agent_address.ip(), + )), + ) + }) + .collect(), + } + } + + fn representative() -> (Collection, Policy, Blueprint) { + // We'll start with a representative collection... + let mut collection = + nexus_inventory::examples::representative().builder.build(); + + // ...and then mutate it such that the omicron zones it reports match + // the sled agent IDs it reports. Steal the sled agent info and drop the + // fake sled-agent IDs: + let mut empty_map = BTreeMap::new(); + mem::swap(&mut empty_map, &mut collection.sled_agents); + let mut sled_agents = empty_map.into_values().collect::>(); + + // Now reinsert them with IDs pulled from the omicron zones. This + // assumes we have more fake sled agents than omicron zones, which is + // currently true for the representative collection. + for &sled_id in collection.omicron_zones.keys() { + let some_sled_agent = sled_agents.pop().expect( + "fewer representative sled agents than \ + representative omicron zones sleds", + ); + collection.sled_agents.insert(sled_id, some_sled_agent); + } + + let policy = policy_from_collection(&collection); + let blueprint = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .unwrap(); + + (collection, policy, blueprint) + } + + async fn blueprint_list_all_ids( + opctx: &OpContext, + datastore: &DataStore, + ) -> Vec { + datastore + .blueprints_list(opctx, &DataPageParams::max_page()) + .await + .unwrap() + .into_iter() + .map(|bp| bp.id) + .collect() + } + + #[tokio::test] + async fn test_empty_blueprint() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create an empty collection and a blueprint from it + let collection = + nexus_inventory::CollectionBuilder::new("test").build(); + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &EMPTY_POLICY, + "test", + ) + .unwrap(); + let authz_blueprint = authz_blueprint_from_id(blueprint1.id); + + // Trying to read it from the database should fail with the relevant + // "not found" error. + let err = datastore + .blueprint_read(&opctx, &authz_blueprint) + .await + .unwrap_err(); + assert_eq!(err, authz_blueprint.not_found()); + + // Write it to the database and read it back. + datastore + .blueprint_insert(&opctx, &blueprint1) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint) + .await + .expect("failed to read collection back"); + assert_eq!(blueprint1, blueprint_read); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint1.id] + ); + + // There ought to be no sleds or zones in service, and no parent + // blueprint. + assert_eq!(blueprint1.omicron_zones.len(), 0); + assert_eq!(blueprint1.zones_in_service.len(), 0); + assert_eq!(blueprint1.parent_blueprint_id, None); + + // Trying to insert the same blueprint again should fail. + let err = + datastore.blueprint_insert(&opctx, &blueprint1).await.unwrap_err(); + assert!(err.to_string().contains("duplicate key")); + + // Delete the blueprint and ensure it's really gone. + datastore.blueprint_delete(&opctx, &authz_blueprint).await.unwrap(); + ensure_blueprint_fully_deleted(&datastore, blueprint1.id).await; + assert_eq!(blueprint_list_all_ids(&opctx, &datastore).await, []); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_representative_blueprint() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a cohesive representative collection/policy/blueprint + let (collection, mut policy, blueprint1) = representative(); + let authz_blueprint1 = authz_blueprint_from_id(blueprint1.id); + + // Write it to the database and read it back. + datastore + .blueprint_insert(&opctx, &blueprint1) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint1) + .await + .expect("failed to read collection back"); + assert_eq!(blueprint1, blueprint_read); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint1.id] + ); + + // Check the number of blueprint elements against our collection. + assert_eq!(blueprint1.omicron_zones.len(), policy.sleds.len()); + assert_eq!( + blueprint1.omicron_zones.len(), + collection.omicron_zones.len() + ); + assert_eq!( + blueprint1.all_omicron_zones().count(), + collection.all_omicron_zones().count() + ); + // All zones should be in service. + assert_eq!( + blueprint1.zones_in_service.len(), + blueprint1.all_omicron_zones().count() + ); + assert_eq!(blueprint1.parent_blueprint_id, None); + + // Set blueprint1 as the current target, and ensure that we cannot + // delete it (as the current target cannot be deleted). + let bp1_target = BlueprintTarget { + target_id: blueprint1.id, + enabled: true, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp1_target, blueprint1.clone())) + ); + let err = datastore + .blueprint_delete(&opctx, &authz_blueprint1) + .await + .unwrap_err(); + assert!( + err.to_string().contains(&format!( + "blueprint {} is the current target and cannot be deleted", + blueprint1.id + )), + "unexpected error: {err}" + ); + + // Add a new sled to `policy`. + let new_sled_id = Uuid::new_v4(); + policy.sleds.insert(new_sled_id, fake_sled_resources(None)); + let new_sled_zpools = &policy.sleds.get(&new_sled_id).unwrap().zpools; + + // Create a builder for a child blueprint. + let mut builder = + BlueprintBuilder::new_based_on(&blueprint1, &policy, "test"); + + // Add zones to our new sled. + assert_eq!( + builder.sled_ensure_zone_ntp(new_sled_id).unwrap(), + Ensure::Added + ); + for zpool_name in new_sled_zpools { + assert_eq!( + builder + .sled_ensure_zone_crucible(new_sled_id, zpool_name.clone()) + .unwrap(), + Ensure::Added + ); + } + let num_new_sled_zones = 1 + new_sled_zpools.len(); + + let blueprint2 = builder.build(); + let authz_blueprint2 = authz_blueprint_from_id(blueprint2.id); + + // Check that we added the new sled and its zones. + assert_eq!( + blueprint1.omicron_zones.len() + 1, + blueprint2.omicron_zones.len() + ); + assert_eq!( + blueprint1.all_omicron_zones().count() + num_new_sled_zones, + blueprint2.all_omicron_zones().count() + ); + + // All zones should be in service. + assert_eq!( + blueprint2.zones_in_service.len(), + blueprint2.all_omicron_zones().count() + ); + assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); + + // Check that we can write it to the DB and read it back. + datastore + .blueprint_insert(&opctx, &blueprint2) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint2) + .await + .expect("failed to read collection back"); + println!("diff: {}", blueprint2.diff(&blueprint_read)); + assert_eq!(blueprint2, blueprint_read); + { + let mut expected_ids = [blueprint1.id, blueprint2.id]; + expected_ids.sort(); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + expected_ids + ); + } + + // Set blueprint2 as the current target and ensure that means we can not + // delete it. + let bp2_target = BlueprintTarget { + target_id: blueprint2.id, + enabled: true, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp2_target, blueprint2.clone())) + ); + let err = datastore + .blueprint_delete(&opctx, &authz_blueprint2) + .await + .unwrap_err(); + assert!( + err.to_string().contains(&format!( + "blueprint {} is the current target and cannot be deleted", + blueprint2.id + )), + "unexpected error: {err}" + ); + + // Now that blueprint2 is the target, we should be able to delete + // blueprint1. + datastore.blueprint_delete(&opctx, &authz_blueprint1).await.unwrap(); + ensure_blueprint_fully_deleted(&datastore, blueprint1.id).await; + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint2.id] + ); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_set_target() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Trying to insert a target that doesn't reference a blueprint should + // fail with a relevant error message. + let nonexistent_blueprint_id = Uuid::new_v4(); + let err = datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: nonexistent_blueprint_id, + enabled: true, + time_made_target: now_db_precision(), + }, + ) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::NoSuchBlueprint( + nonexistent_blueprint_id + )) + ); + + // There should be no current target still. + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + None + ); + + // Create three blueprints: + // * `blueprint1` has no parent + // * `blueprint2` and `blueprint3` both have `blueprint1` as parent + let collection = + nexus_inventory::CollectionBuilder::new("test").build(); + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &EMPTY_POLICY, + "test1", + ) + .unwrap(); + let blueprint2 = + BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test2") + .build(); + let blueprint3 = + BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test3") + .build(); + assert_eq!(blueprint1.parent_blueprint_id, None); + assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); + assert_eq!(blueprint3.parent_blueprint_id, Some(blueprint1.id)); + + // Insert all three into the blueprint table. + datastore.blueprint_insert(&opctx, &blueprint1).await.unwrap(); + datastore.blueprint_insert(&opctx, &blueprint2).await.unwrap(); + datastore.blueprint_insert(&opctx, &blueprint3).await.unwrap(); + + let bp1_target = BlueprintTarget { + target_id: blueprint1.id, + enabled: true, + time_made_target: now_db_precision(), + }; + let bp2_target = BlueprintTarget { + target_id: blueprint2.id, + enabled: true, + time_made_target: now_db_precision(), + }; + let bp3_target = BlueprintTarget { + target_id: blueprint3.id, + enabled: true, + time_made_target: now_db_precision(), + }; + + // Attempting to make blueprint2 the current target should fail because + // it has a non-NULL parent_blueprint_id, but there is no current target + // (i.e., only a blueprint with no parent can be made the current + // target). + let err = datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint2.id)) + ); + + // There should be no current target still. + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + None + ); + + // We should be able to insert blueprint1, which has no parent (matching + // the currently-empty `bp_target` table's lack of a target). + datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp1_target, blueprint1.clone())) + ); + + // Now that blueprint1 is the current target, we should be able to + // insert blueprint2 or blueprint3. WLOG, pick blueprint3. + datastore + .blueprint_target_set_current(&opctx, bp3_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp3_target, blueprint3.clone())) + ); + + // Now that blueprint3 is the target, trying to insert blueprint1 or + // blueprint2 should fail, because neither of their parents (NULL and + // blueprint1, respectively) match the current target. + let err = datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint1.id)) + ); + let err = datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint2.id)) + ); + + // Create a child of blueprint3, and ensure when we set it as the target + // with enabled=false, that status is serialized. + let blueprint4 = + BlueprintBuilder::new_based_on(&blueprint3, &EMPTY_POLICY, "test3") + .build(); + assert_eq!(blueprint4.parent_blueprint_id, Some(blueprint3.id)); + datastore.blueprint_insert(&opctx, &blueprint4).await.unwrap(); + let bp4_target = BlueprintTarget { + target_id: blueprint4.id, + enabled: false, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp4_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp4_target, blueprint4)) + ); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 78a7aeda87..96832b25bf 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -54,6 +54,7 @@ mod certificate; mod console_session; mod dataset; mod db_metadata; +mod deployment; mod device_auth; mod disk; mod dns; diff --git a/nexus/deployment/Cargo.toml b/nexus/deployment/Cargo.toml index b166f947bf..115dec98a5 100644 --- a/nexus/deployment/Cargo.toml +++ b/nexus/deployment/Cargo.toml @@ -9,6 +9,7 @@ chrono.workspace = true internal-dns.workspace = true ipnet.workspace = true ipnetwork.workspace = true +nexus-inventory.workspace = true nexus-types.workspace = true omicron-common.workspace = true slog.workspace = true @@ -18,6 +19,5 @@ uuid.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] -nexus-inventory.workspace = true omicron-test-utils.workspace = true sled-agent-client.workspace = true diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index 689e2d8e2c..ac2fe70e6b 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -9,6 +9,7 @@ use anyhow::anyhow; use internal_dns::config::Host; use internal_dns::config::ZoneVariant; use ipnet::IpAdd; +use nexus_inventory::now_db_precision; use nexus_types::deployment::Blueprint; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; @@ -94,7 +95,7 @@ impl<'a> BlueprintBuilder<'a> { .sleds .keys() .map(|sled_id| { - let zones = collection + let mut zones = collection .omicron_zones .get(sled_id) .map(|z| z.zones.clone()) @@ -118,6 +119,11 @@ impl<'a> BlueprintBuilder<'a> { sled_id )) })?; + + // This is not strictly necessary. But for testing, it's + // helpful for things to be in sorted order. + zones.zones.sort_by_key(|zone| zone.id); + Ok((*sled_id, zones)) }) .collect::>()?; @@ -125,10 +131,10 @@ impl<'a> BlueprintBuilder<'a> { collection.all_omicron_zones().map(|z| z.id).collect(); Ok(Blueprint { id: Uuid::new_v4(), - omicron_zones: omicron_zones, + omicron_zones, zones_in_service, parent_blueprint_id: None, - time_created: chrono::Utc::now(), + time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("from collection {}", collection.id), }) @@ -162,7 +168,7 @@ impl<'a> BlueprintBuilder<'a> { .map(|sled_id| { // Start with self.omicron_zones, which contains entries for any // sled whose zones config is changing in this blueprint. - let zones = self + let mut zones = self .omicron_zones .remove(sled_id) // If it's not there, use the config from the parent @@ -180,15 +186,20 @@ impl<'a> BlueprintBuilder<'a> { generation: Generation::new(), zones: vec![], }); + + // This is not strictly necessary. But for testing, it's + // helpful for things to be in sorted order. + zones.zones.sort_by_key(|zone| zone.id); + (*sled_id, zones) }) .collect(); Blueprint { id: Uuid::new_v4(), - omicron_zones: omicron_zones, + omicron_zones, zones_in_service: self.zones_in_service, parent_blueprint_id: Some(self.parent_blueprint.id), - time_created: chrono::Utc::now(), + time_created: now_db_precision(), creator: self.creator, comment: self.comments.join(", "), } diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 62d338c1ee..08a905143c 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -96,7 +96,7 @@ impl CollectionBuilder { pub fn new(collector: &str) -> Self { CollectionBuilder { errors: vec![], - time_started: now(), + time_started: now_db_precision(), collector: collector.to_owned(), baseboards: BTreeSet::new(), cabooses: BTreeSet::new(), @@ -122,7 +122,7 @@ impl CollectionBuilder { id: Uuid::new_v4(), errors: self.errors.into_iter().map(|e| e.to_string()).collect(), time_started: self.time_started, - time_done: now(), + time_done: now_db_precision(), collector: self.collector, baseboards: self.baseboards, cabooses: self.cabooses, @@ -178,7 +178,7 @@ impl CollectionBuilder { // Separate the SP state into the SP-specific state and the RoT state, // if any. - let now = now(); + let now = now_db_precision(); let _ = self.sps.entry(baseboard.clone()).or_insert_with(|| { ServiceProcessor { time_collected: now, @@ -279,7 +279,7 @@ impl CollectionBuilder { if let Some(previous) = by_id.insert( baseboard.clone(), CabooseFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_owned(), caboose: sw_caboose.clone(), }, @@ -348,7 +348,7 @@ impl CollectionBuilder { if let Some(previous) = by_id.insert( baseboard.clone(), RotPageFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_owned(), page: sw_rot_page.clone(), }, @@ -456,7 +456,7 @@ impl CollectionBuilder { usable_hardware_threads: inventory.usable_hardware_threads, usable_physical_ram: inventory.usable_physical_ram, reservoir_size: inventory.reservoir_size, - time_collected: now(), + time_collected: now_db_precision(), sled_id, }; @@ -491,7 +491,7 @@ impl CollectionBuilder { self.omicron_zones.insert( sled_id, OmicronZonesFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_string(), sled_id, zones, @@ -507,7 +507,7 @@ impl CollectionBuilder { /// This exists because the database doesn't store nanosecond-precision, so if /// we store nanosecond-precision timestamps, then DateTime conversion is lossy /// when round-tripping through the database. That's rather inconvenient. -fn now() -> DateTime { +pub fn now_db_precision() -> DateTime { let ts = Utc::now(); let nanosecs = ts.timestamp_subsec_nanos(); let micros = ts.timestamp_subsec_micros(); @@ -517,7 +517,7 @@ fn now() -> DateTime { #[cfg(test)] mod test { - use super::now; + use super::now_db_precision; use super::CollectionBuilder; use crate::examples::representative; use crate::examples::sp_state; @@ -541,10 +541,10 @@ mod test { // Verify the contents of an empty collection. #[test] fn test_empty() { - let time_before = now(); + let time_before = now_db_precision(); let builder = CollectionBuilder::new("test_empty"); let collection = builder.build(); - let time_after = now(); + let time_after = now_db_precision(); assert!(collection.errors.is_empty()); assert!(time_before <= collection.time_started); @@ -577,7 +577,7 @@ mod test { // a useful quick check. #[test] fn test_basic() { - let time_before = now(); + let time_before = now_db_precision(); let Representative { builder, sleds: [sled1_bb, sled2_bb, sled3_bb, sled4_bb], @@ -587,7 +587,7 @@ mod test { [sled_agent_id_basic, sled_agent_id_extra, sled_agent_id_pc, sled_agent_id_unknown], } = representative(); let collection = builder.build(); - let time_after = now(); + let time_after = now_db_precision(); println!("{:#?}", collection); assert!(time_before <= collection.time_started); assert!(collection.time_started <= collection.time_done); diff --git a/nexus/inventory/src/lib.rs b/nexus/inventory/src/lib.rs index f11af8fede..6dee7bb7ec 100644 --- a/nexus/inventory/src/lib.rs +++ b/nexus/inventory/src/lib.rs @@ -27,6 +27,8 @@ pub use builder::CollectionBuilder; pub use builder::CollectorBug; pub use builder::InventoryError; +pub use builder::now_db_precision; + pub use collector::Collector; pub use sled_agent_enumerator::SledAgentEnumerator; diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 9439cdc6d5..b9718a0367 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -5,13 +5,12 @@ //! Configuration of the deployment system use nexus_db_queries::authz; -use nexus_db_queries::authz::Action; -use nexus_db_queries::authz::ApiResource; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; use nexus_deployment::planner::Planner; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::deployment::Policy; @@ -27,7 +26,6 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -47,28 +45,6 @@ const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; const SQL_LIMIT_INVENTORY: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; -/// Temporary in-memory store of blueprints -/// -/// Blueprints eventually need to be stored in the database. That will obviate -/// the need for this structure. -pub struct Blueprints { - all_blueprints: BTreeMap, - target: BlueprintTarget, -} - -impl Blueprints { - pub fn new() -> Blueprints { - Blueprints { - all_blueprints: BTreeMap::new(), - target: BlueprintTarget { - target_id: None, - enabled: false, - time_set: chrono::Utc::now(), - }, - } - } -} - /// Common structure for collecting information that the planner needs struct PlanningContext { policy: Policy, @@ -76,30 +52,14 @@ struct PlanningContext { } impl super::Nexus { - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_list( &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(Action::ListChildren, &authz::BLUEPRINT_CONFIG).await?; - Ok(self - .blueprints - .lock() - .unwrap() - .all_blueprints - .values() - .filter_map(|f| match pagparams.marker { - None => Some(f.clone()), - Some(marker) if f.id > *marker => Some(f.clone()), - _ => None, - }) - .collect()) + ) -> ListResultVec { + self.db_datastore.blueprints_list(opctx, pagparams).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_view( &self, opctx: &OpContext, @@ -110,18 +70,9 @@ impl super::Nexus { blueprint_id, LookupType::ById(blueprint_id), ); - opctx.authorize(Action::Read, &blueprint).await?; - self.blueprints - .lock() - .unwrap() - .all_blueprints - .get(&blueprint_id) - .cloned() - .ok_or_else(|| blueprint.not_found()) + self.db_datastore.blueprint_read(opctx, &blueprint).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_delete( &self, opctx: &OpContext, @@ -132,90 +83,35 @@ impl super::Nexus { blueprint_id, LookupType::ById(blueprint_id), ); - opctx.authorize(Action::Delete, &blueprint).await?; - - let mut blueprints = self.blueprints.lock().unwrap(); - if let Some(target_id) = blueprints.target.target_id { - if target_id == blueprint_id { - return Err(Error::conflict(format!( - "blueprint {} is the current target and cannot be deleted", - blueprint_id - ))); - } - } - - if blueprints.all_blueprints.remove(&blueprint_id).is_none() { - return Err(blueprint.not_found()); - } - - Ok(()) + self.db_datastore.blueprint_delete(opctx, &blueprint).await } pub async fn blueprint_target_view( &self, opctx: &OpContext, - ) -> Result { - self.blueprint_target(opctx).await.map(|(target, _)| target) - } - - // This is a stand-in for a datastore function that fetches the current - // target information and the target blueprint's contents. This helper - // exists to combine the authz check with the lookup, which is what the - // datastore function will eventually do. - async fn blueprint_target( - &self, - opctx: &OpContext, - ) -> Result<(BlueprintTarget, Option), Error> { - opctx.authorize(Action::Read, &authz::BLUEPRINT_CONFIG).await?; - let blueprints = self.blueprints.lock().unwrap(); - Ok(( - blueprints.target.clone(), - blueprints.target.target_id.and_then(|target_id| { - blueprints.all_blueprints.get(&target_id).cloned() - }), - )) + ) -> Result, Error> { + self.db_datastore.blueprint_target_get_current(opctx).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_target_set( &self, opctx: &OpContext, params: BlueprintTargetSet, ) -> Result { - opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; - let new_target_id = params.target_id; - let enabled = params.enabled; - let mut blueprints = self.blueprints.lock().unwrap(); - if let Some(blueprint) = blueprints.all_blueprints.get(&new_target_id) { - if blueprint.parent_blueprint_id != blueprints.target.target_id { - return Err(Error::conflict(&format!( - "blueprint {:?}: parent is {:?}, which is not the current \ - target {:?}", - new_target_id, - blueprint - .parent_blueprint_id - .map(|p| p.to_string()) - .unwrap_or_else(|| String::from("")), - blueprints - .target - .target_id - .map(|p| p.to_string()) - .unwrap_or_else(|| String::from("")), - ))); - } - blueprints.target = BlueprintTarget { - target_id: Some(new_target_id), - enabled, - time_set: chrono::Utc::now(), - }; + let new_target = BlueprintTarget { + target_id: params.target_id, + enabled: params.enabled, + time_made_target: chrono::Utc::now(), + }; + + self.db_datastore + .blueprint_target_set_current(opctx, new_target) + .await?; + + // When we add a background task executing the target blueprint, + // this is the point where we'd signal it to update its target. - // When we add a background task executing the target blueprint, - // this is the point where we'd signal it to update its target. - Ok(blueprints.target.clone()) - } else { - Err(Error::not_found_by_id(ResourceType::Blueprint, &new_target_id)) - } + Ok(new_target) } async fn blueprint_planning_context( @@ -286,20 +182,12 @@ impl super::Nexus { Ok(PlanningContext { creator, policy: Policy { sleds } }) } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. async fn blueprint_add( &self, opctx: &OpContext, - blueprint: Blueprint, + blueprint: &Blueprint, ) -> Result<(), Error> { - opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; - let mut blueprints = self.blueprints.lock().unwrap(); - assert!(blueprints - .all_blueprints - .insert(blueprint.id, blueprint) - .is_none()); - Ok(()) + self.db_datastore.blueprint_insert(opctx, blueprint).await } pub async fn blueprint_generate_from_collection( @@ -329,7 +217,7 @@ impl super::Nexus { )) })?; - self.blueprint_add(&opctx, blueprint.clone()).await?; + self.blueprint_add(&opctx, &blueprint).await?; Ok(blueprint) } @@ -337,8 +225,9 @@ impl super::Nexus { &self, opctx: &OpContext, ) -> CreateResult { - let (_, maybe_parent) = self.blueprint_target(opctx).await?; - let Some(parent_blueprint) = maybe_parent else { + let maybe_target = + self.db_datastore.blueprint_target_get_current_full(opctx).await?; + let Some((_, parent_blueprint)) = maybe_target else { return Err(Error::conflict( "cannot regenerate blueprint without existing target", )); @@ -358,7 +247,7 @@ impl super::Nexus { )) })?; - self.blueprint_add(&opctx, blueprint.clone()).await?; + self.blueprint_add(&opctx, &blueprint).await?; Ok(blueprint) } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d6ad7c98ea..bf8522452a 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -183,10 +183,6 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, - - /// information about blueprints (deployment configurations) - // This will go away once these are stored in the database. - blueprints: std::sync::Mutex, } impl Nexus { @@ -419,7 +415,6 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), - blueprints: std::sync::Mutex::new(deployment::Blueprints::new()), }; // TODO-cleanup all the extra Arcs here seems wrong diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 58038cb37a..0122d9b439 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -26,6 +26,8 @@ use dropshot::TypedBody; use hyper::Body; use nexus_db_model::Ipv4NatEntryView; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; @@ -45,7 +47,6 @@ use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; use serde::Deserialize; -use serde::Serialize; use std::collections::BTreeMap; use std::sync::Arc; use uuid::Uuid; @@ -620,7 +621,7 @@ async fn ipv4_nat_changeset( async fn blueprint_list( rqctx: RequestContext>, query_params: Query, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -631,7 +632,7 @@ async fn blueprint_list( Ok(HttpResponseOk(ScanById::results_page( &query, blueprints, - &|_, blueprint: &Blueprint| blueprint.id, + &|_, blueprint: &BlueprintMetadata| blueprint.id, )?)) }; @@ -680,35 +681,6 @@ async fn blueprint_delete( // Managing the current target blueprint -/// Describes what blueprint, if any, the system is currently working toward -#[derive(Debug, Serialize, JsonSchema)] -pub struct BlueprintTarget { - /// id of the blueprint that the system is trying to make real - pub target_id: Uuid, - /// policy: should the system actively work towards this blueprint - /// - /// This should generally be left enabled. - pub enabled: bool, - /// when this blueprint was made the target - pub time_set: chrono::DateTime, -} - -impl TryFrom for BlueprintTarget { - type Error = Error; - - fn try_from( - value: nexus_types::deployment::BlueprintTarget, - ) -> Result { - Ok(BlueprintTarget { - target_id: value.target_id.ok_or_else(|| { - Error::conflict("no target blueprint has been configured") - })?, - enabled: value.enabled, - time_set: value.time_set, - }) - } -} - /// Fetches the current target blueprint, if any #[endpoint { method = GET, @@ -721,8 +693,11 @@ async fn blueprint_target_view( let handler = async { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; - let target = nexus.blueprint_target_view(&opctx).await?; - Ok(HttpResponseOk(BlueprintTarget::try_from(target)?)) + let target = + nexus.blueprint_target_view(&opctx).await?.ok_or_else(|| { + Error::conflict("no target blueprint has been configured") + })?; + Ok(HttpResponseOk(target)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } @@ -741,11 +716,8 @@ async fn blueprint_target_set( let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; let target = target.into_inner(); - let result = nexus.blueprint_target_set(&opctx, target).await?; - Ok(HttpResponseOk( - BlueprintTarget::try_from(result) - .map_err(|e| Error::conflict(e.to_string()))?, - )) + let target = nexus.blueprint_target_set(&opctx, target).await?; + Ok(HttpResponseOk(target)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 95404a2c17..3b4c3b3142 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -16,6 +16,7 @@ pub use crate::inventory::OmicronZoneConfig; pub use crate::inventory::OmicronZoneDataset; pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; +pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -184,13 +185,39 @@ impl Blueprint { } } -/// Describes which blueprint the system is currently trying to make real -// This is analogous to the db model type until we have that. -#[derive(Debug, Clone)] +/// Describe high-level metadata about a blueprint +// These fields are a subset of [`Blueprint`], and include only the data we can +// quickly fetch from the main blueprint table (e.g., when listing all +// blueprints). +#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Serialize)] +pub struct BlueprintMetadata { + /// unique identifier for this blueprint + pub id: Uuid, + + /// which blueprint this blueprint is based on + pub parent_blueprint_id: Option, + + /// when this blueprint was generated (for debugging) + pub time_created: chrono::DateTime, + /// identity of the component that generated the blueprint (for debugging) + /// This would generally be the Uuid of a Nexus instance. + pub creator: String, + /// human-readable string describing why this blueprint was created + /// (for debugging) + pub comment: String, +} + +/// Describes what blueprint, if any, the system is currently working toward +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, JsonSchema)] pub struct BlueprintTarget { - pub target_id: Option, + /// id of the blueprint that the system is trying to make real + pub target_id: Uuid, + /// policy: should the system actively work towards this blueprint + /// + /// This should generally be left enabled. pub enabled: bool, - pub time_set: chrono::DateTime, + /// when this blueprint was made the target + pub time_made_target: chrono::DateTime, } /// Specifies what blueprint, if any, the system should be working toward diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 8b0807d52c..bc26736b37 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -164,7 +164,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/BlueprintResultsPage" + "$ref": "#/components/schemas/BlueprintMetadataResultsPage" } } } @@ -2132,7 +2132,43 @@ "zones_in_service" ] }, - "BlueprintResultsPage": { + "BlueprintMetadata": { + "description": "Describe high-level metadata about a blueprint", + "type": "object", + "properties": { + "comment": { + "description": "human-readable string describing why this blueprint was created (for debugging)", + "type": "string" + }, + "creator": { + "description": "identity of the component that generated the blueprint (for debugging) This would generally be the Uuid of a Nexus instance.", + "type": "string" + }, + "id": { + "description": "unique identifier for this blueprint", + "type": "string", + "format": "uuid" + }, + "parent_blueprint_id": { + "nullable": true, + "description": "which blueprint this blueprint is based on", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "when this blueprint was generated (for debugging)", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "comment", + "creator", + "id", + "time_created" + ] + }, + "BlueprintMetadataResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -2140,7 +2176,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/Blueprint" + "$ref": "#/components/schemas/BlueprintMetadata" } }, "next_page": { @@ -2166,7 +2202,7 @@ "type": "string", "format": "uuid" }, - "time_set": { + "time_made_target": { "description": "when this blueprint was made the target", "type": "string", "format": "date-time" @@ -2175,7 +2211,7 @@ "required": [ "enabled", "target_id", - "time_set" + "time_made_target" ] }, "BlueprintTargetSet": { diff --git a/schema/crdb/28.0.0/up1.sql b/schema/crdb/28.0.0/up1.sql new file mode 100644 index 0000000000..fda4e3ed5c --- /dev/null +++ b/schema/crdb/28.0.0/up1.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( + id UUID PRIMARY KEY, + parent_blueprint_id UUID, + time_created TIMESTAMPTZ NOT NULL, + creator TEXT NOT NULL, + comment TEXT NOT NULL +); diff --git a/schema/crdb/28.0.0/up2.sql b/schema/crdb/28.0.0/up2.sql new file mode 100644 index 0000000000..a51c1a31fa --- /dev/null +++ b/schema/crdb/28.0.0/up2.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_zones ( + blueprint_id UUID NOT NULL, + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); diff --git a/schema/crdb/28.0.0/up3.sql b/schema/crdb/28.0.0/up3.sql new file mode 100644 index 0000000000..55e09ca719 --- /dev/null +++ b/schema/crdb/28.0.0/up3.sql @@ -0,0 +1,31 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( + blueprint_id UUID NOT NULL, + sled_id UUID NOT NULL, + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + dataset_zpool_name TEXT, + bp_nic_id UUID, + dns_gz_address INET, + dns_gz_address_index INT8, + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (blueprint_id, id) +); diff --git a/schema/crdb/28.0.0/up4.sql b/schema/crdb/28.0.0/up4.sql new file mode 100644 index 0000000000..beff4da802 --- /dev/null +++ b/schema/crdb/28.0.0/up4.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( + blueprint_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); diff --git a/schema/crdb/28.0.0/up5.sql b/schema/crdb/28.0.0/up5.sql new file mode 100644 index 0000000000..72c34400a3 --- /dev/null +++ b/schema/crdb/28.0.0/up5.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zones_not_in_service ( + blueprint_id UUID NOT NULL, + bp_omicron_zone_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, bp_omicron_zone_id) +); diff --git a/schema/crdb/28.0.0/up6.sql b/schema/crdb/28.0.0/up6.sql new file mode 100644 index 0000000000..41e69ca3da --- /dev/null +++ b/schema/crdb/28.0.0/up6.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_target ( + version INT8 PRIMARY KEY, + blueprint_id UUID NOT NULL, + enabled BOOL NOT NULL, + time_made_target TIMESTAMPTZ NOT NULL +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index c91bb669a9..86d1340379 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2954,8 +2954,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( -- service in them) primary_service_ip INET NOT NULL, primary_service_port INT4 - CHECK (primary_service_port BETWEEN 0 AND 65535) - NOT NULL, + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, -- The remaining properties may be NULL for different kinds of zones. The -- specific constraints are not enforced at the database layer, basically @@ -2967,7 +2967,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( second_service_ip INET, second_service_port INT4 CHECK (second_service_port IS NULL - OR second_service_port BETWEEN 0 AND 65535), + OR second_service_port BETWEEN 0 AND 65535), -- Zones may have an associated dataset. They're currently always on a U.2. -- The only thing we need to identify it here is the name of the zpool that @@ -2995,9 +2995,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( -- Source NAT configuration (currently used for boundary NTP only) snat_ip INET, snat_first_port INT4 - CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), snat_last_port INT4 - CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), PRIMARY KEY (inv_collection_id, id) ); @@ -3016,6 +3016,200 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( PRIMARY KEY (inv_collection_id, id) ); +/* + * System-level blueprints + * + * See RFD 457 and 459 for context. + * + * A blueprint describes a potential system configuration. The primary table is + * the `blueprint` table, which stores only a small amount of metadata about the + * blueprint. The bulk of the information is stored in the `bp_*` tables below, + * each of which references back to `blueprint` by ID. + * + * `bp_target` describes the "target blueprints" of the system. Insertion must + * follow a strict set of rules: + * + * * The first target blueprint must have version=1, and must have no parent + * blueprint. + * * The Nth target blueprint must have version=N, and its parent blueprint must + * be the blueprint that was the target at version=N-1. + * + * The result is that the current target blueprint can always be found by + * looking at the maximally-versioned row in `bp_target`, and there is a linear + * history from that blueprint all the way back to the version=1 blueprint. We + * will eventually prune old blueprint targets, so it will not always be + * possible to view the entire history. + * + * `bp_sled_omicron_zones`, `bp_omicron_zone`, and `bp_omicron_zone_nic` are + * nearly identical to their `inv_*` counterparts, and record the + * `OmicronZonesConfig` for each sled. + * + * `bp_omicron_zones_not_in_service` stores a list of Omicron zones (present in + * `bp_omicron_zone`) that are NOT in service; e.g., should not appear in + * internal DNS. Nexus's in-memory `Blueprint` representation stores the set of + * zones that ARE in service. We invert that logic at this layer because we + * expect most blueprints to have a relatively large number of omicron zones, + * almost all of which will be in service. This is a minor and perhaps + * unnecessary optimization at the database layer, but it's also relatively + * simple and hidden by the relevant read and insert queries in + * `nexus-db-queries`. + */ + +-- list of all blueprints +CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( + id UUID PRIMARY KEY, + + -- This is effectively a foreign key back to this table; however, it is + -- allowed to be NULL: the initial blueprint has no parent. Additionally, + -- it may be non-NULL but no longer reference a row in this table: once a + -- child blueprint has been created from a parent, it's possible for the + -- parent to be deleted. We do not NULL out this field on such a deletion, + -- so we can always see that there had been a particular parent even if it's + -- now gone. + parent_blueprint_id UUID, + + -- These fields are for debugging only. + time_created TIMESTAMPTZ NOT NULL, + creator TEXT NOT NULL, + comment TEXT NOT NULL +); + +-- table describing both the current and historical target blueprints of the +-- system +CREATE TABLE IF NOT EXISTS omicron.public.bp_target ( + -- Monotonically increasing version for all bp_targets + version INT8 PRIMARY KEY, + + -- Effectively a foreign key into the `blueprint` table, but may reference a + -- blueprint that has been deleted (if this target is no longer the current + -- target: the current target must not be deleted). + blueprint_id UUID NOT NULL, + + -- Is this blueprint enabled? + -- + -- Currently, we have no code that acts on this value; however, it exists as + -- an escape hatch once we have automated blueprint planning and execution. + -- An operator can set the current blueprint to disabled, which should stop + -- planning and execution (presumably until a support case can address + -- whatever issue the update system is causing). + enabled BOOL NOT NULL, + + -- Timestamp for when this blueprint was made the current target + time_made_target TIMESTAMPTZ NOT NULL +); + +-- see inv_sled_omicron_zones, which is identical except it references a +-- collection whereas this table references a blueprint +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_zones ( + -- foreign key into `blueprint` table + blueprint_id UUID NOT NULL, + + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); + +-- description of omicron zones specified in a blueprint +-- +-- This is currently identical to `inv_omicron_zone`, except that the foreign +-- keys reference other blueprint tables intead of inventory tables. We expect +-- their sameness to diverge over time as either inventory or blueprints (or +-- both) grow context-specific properties. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( + -- foreign key into the `blueprint` table + blueprint_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a blueprint could refer to a sled that no longer exists, + -- particularly if the blueprint is older than the current target) + sled_id UUID NOT NULL, + + -- unique id for this zone + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + + -- SocketAddr of the "primary" service for this zone + -- (what this describes varies by zone type, but all zones have at least one + -- service in them) + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + + -- The remaining properties may be NULL for different kinds of zones. The + -- specific constraints are not enforced at the database layer, basically + -- because it's really complicated to do that and it's not obvious that it's + -- worthwhile. + + -- Some zones have a second service. Like the primary one, the meaning of + -- this is zone-type-dependent. + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + + -- Zones may have an associated dataset. They're currently always on a U.2. + -- The only thing we need to identify it here is the name of the zpool that + -- it's on. + dataset_zpool_name TEXT, + + -- Zones with external IPs have an associated NIC and sockaddr for listening + -- (first is a foreign key into `bp_omicron_zone_nic`) + bp_nic_id UUID, + + -- Properties for internal DNS servers + -- address attached to this zone from outside the sled's subnet + dns_gz_address INET, + dns_gz_address_index INT8, + + -- Properties common to both kinds of NTP zones + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + + -- Properties specific to Nexus zones + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + + -- Source NAT configuration (currently used for boundary NTP only) + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (blueprint_id, id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( + blueprint_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); + +-- list of omicron zones that are considered NOT in-service for a blueprint +-- +-- In Rust code, we generally want to deal with "zones in service", which means +-- they should appear in DNS. However, almost all zones in almost all blueprints +-- will be in service, so we can induce considerably less database work by +-- storing the zones _not_ in service. Our DB wrapper layer handles this +-- inversion, so the rest of our Rust code can ignore it. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zones_not_in_service ( + blueprint_id UUID NOT NULL, + bp_omicron_zone_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, bp_omicron_zone_id) +); + /*******************************************************************/ /* @@ -3196,7 +3390,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '27.0.0', NULL) + ( TRUE, NOW(), NOW(), '28.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 5215d850768f8a79160220bfd6441959a8a04064 Mon Sep 17 00:00:00 2001 From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com> Date: Fri, 26 Jan 2024 17:48:08 -0600 Subject: [PATCH 51/91] background task for service zone nat (#4857) Currently the logic for configuring NAT for service zones is deeply nested and crosses sled-agent http API boundaries. The cleanest way to deliver eventual consistency for service zone nat entries was to pull the zone information from inventory and use that to generate nat entries to reconcile against the `ipv4_nat_entry` table. This covers us in the following scenarios: ### RSS: * User provides configuration to RSS * RSS process ultimately creates a sled plan and service plan * Application of service plan by sled-agents creates zones * zone create makes direct calls to dendrite to configure NAT (it is the only way it can be done at this time) * eventually the Nexus zones are launched and handoff to Nexus is complete * inventory task is run, recording zone locations to db * service zone nat background task reads inventory from db and uses the data to generate records for `ipv4_nat_entry` table, then triggers dendrite sync. * sync is ultimately a noop because nat entries already exist in dendrite (dendrite operations are idempotent) ### Cold boot: * sled-agents create switch zones if they are managing a scrimlet, and subsequently create zones written to their ledgers. This may result in direct calls to dendrite. * Once nexus is back up, inventory will resume being collected * service zone nat background task will read inventory from db to reconcile entries in `ipv4_nat_entry` table and then trigger dendrite sync. * If nat is out of date on dendrite, it will be updated on trigger. ### Dendrite crash * If dendrite crashes and restarts, it will immediately contact Nexus for re-sync (pre-existing logic from earlier NAT RPW work) * service zone and instance nat entries are now present in rpw table, so all nat entries will be restored ### Migration / Relocation of service zone * New zone gets created on a sled in the rack. Direct call to dendrite will be made (it uses the same logic as pre-nexus to create zone). * Inventory task will record new location of service zone * Service zone nat background task will use inventory to update table, adding and removing the necessary nat entries and triggering a dendrite update Considerations --- Because this relies on data from the inventory task which runs on a periodic timer (600s), and because this task also runs on a periodic timer (30s), there may be some latency for picking up changes. A few potential avenues for improvement: * Plumb additional logic into service zone nat configuration that enables direct updates to the `ipv4_nat_entry` table once nexus is online. Of note, this would further bifurcate the logic of pre-nexus and post-nexus state management. At this moment, it seems that this is the most painful approach. An argument can be made that we ultimately should be lifting the nat configuration logic _out_ of the service zone creation instead. * Decrease the timer for the inventory task. This is the simplest change, however this would result in more frequent collection, increasing overhead. I do not know _how much_ this would increase overhead. Maybe it is negligible. * Plumb in the ability to trigger the inventory collection task for interesting control plane events. This would allow us to keep the _relatively_ infrequent timing intervals but allow us to refresh on-demand when needed. Related --- Closes #4650 Extracted from #4822 --- common/src/address.rs | 6 + common/src/nexus_config.rs | 16 + dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 11 + docs/how-to-run.adoc | 102 +++-- nexus/db-model/src/ipv4_nat_entry.rs | 2 +- nexus/db-model/src/ipv4net.rs | 1 + nexus/db-model/src/ipv6net.rs | 1 + nexus/db-model/src/macaddr.rs | 1 + nexus/db-model/src/schema.rs | 2 +- nexus/db-model/src/vni.rs | 10 +- .../src/db/datastore/ipv4_nat_entry.rs | 210 ++++++++++ nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 29 +- nexus/src/app/background/mod.rs | 1 + .../app/background/sync_service_zone_nat.rs | 362 ++++++++++++++++++ nexus/tests/config.test.toml | 1 + schema/crdb/29.0.0/up1.sql | 14 + schema/crdb/dbinit.sql | 17 +- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 21 files changed, 770 insertions(+), 31 deletions(-) create mode 100644 nexus/src/app/background/sync_service_zone_nat.rs create mode 100644 schema/crdb/29.0.0/up1.sql diff --git a/common/src/address.rs b/common/src/address.rs index 0c8df33868..65a6604daf 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -18,6 +18,12 @@ pub const AZ_PREFIX: u8 = 48; pub const RACK_PREFIX: u8 = 56; pub const SLED_PREFIX: u8 = 64; +/// maximum possible value for a tcp or udp port +pub const MAX_PORT: u16 = u16::MAX; + +/// minimum possible value for a tcp or udp port +pub const MIN_PORT: u16 = u16::MIN; + /// The amount of redundancy for internal DNS servers. /// /// Must be less than or equal to MAX_DNS_REDUNDANCY. diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index be4b05ffdf..dedd091d81 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -334,6 +334,8 @@ pub struct BackgroundTaskConfig { pub inventory: InventoryConfig, /// configuration for phantom disks task pub phantom_disks: PhantomDiskConfig, + /// configuration for service zone nat sync task + pub sync_service_zone_nat: SyncServiceZoneNatConfig, } #[serde_as] @@ -376,6 +378,14 @@ pub struct NatCleanupConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct SyncServiceZoneNatConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct InventoryConfig { @@ -517,6 +527,7 @@ mod test { }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; use crate::api::internal::shared::SwitchLocation; + use crate::nexus_config::SyncServiceZoneNatConfig; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; @@ -665,6 +676,7 @@ mod test { inventory.nkeep = 11 inventory.disable = false phantom_disks.period_secs = 30 + sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -769,6 +781,9 @@ mod test { phantom_disks: PhantomDiskConfig { period_secs: Duration::from_secs(30), }, + sync_service_zone_nat: SyncServiceZoneNatConfig { + period_secs: Duration::from_secs(30) + } }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -827,6 +842,7 @@ mod test { inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 + sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index c08f592852..8cca1b063a 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -70,6 +70,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -139,6 +143,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -195,6 +203,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 65520ab59c..f291bbb6a0 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -264,6 +264,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -369,6 +373,13 @@ task: "phantom_disks" number of phantom disks deleted: 0 number of phantom disk delete errors: 0 +task: "service_zone_nat_tracker" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms + last completion reported error: inventory collection is None + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index f6d780ad72..c1f78a0521 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -498,41 +498,93 @@ Follow the instructions to set up the https://github.com/oxidecomputer/oxide.rs[ oxide auth login --host http://192.168.1.21 ---- +=== Configure quotas for your silo + +Setting resource quotas is required before you can begin uploading images, provisioning instances, etc. +In this example we'll update the recovery silo so we can provision instances directly from it: + +[source, console] +---- +$ oxide api /v1/system/silos/recovery/quotas --method PUT --input - <>). +Here we will first create an ip pool for the recovery silo: [source,console] ----- -$ oxide ip-pool range add --pool default --first 192.168.1.31 --last 192.168.1.40 -success -IpPoolRange { - id: 4a61e65a-d96d-4c56-9cfd-dc1e44d9e99b, - ip_pool_id: 1b1289a7-cefe-4a7e-a8c9-d93330846301, - range: V4( - Ipv4Range { - first: 192.168.1.31, - last: 192.168.1.40, - }, - ), - time_created: 2023-08-02T16:31:43.679785Z, +--- +$ oxide api /v1/system/ip-pools --method POST --input - < CreateResult { + use db::schema::ipv4_nat_entry::dsl; + + let vni = nexus_db_model::Vni(Vni::SERVICES_VNI); + + // find all active nat entries with the services vni + let result: Vec = dsl::ipv4_nat_entry + .filter(dsl::vni.eq(vni)) + .filter(dsl::version_removed.is_null()) + .select(Ipv4NatEntry::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // determine what to keep and what to delete + let mut keep: Vec<_> = vec![]; + let mut delete: Vec<_> = vec![]; + + for db_entry in result.iter() { + let values = Ipv4NatValues { + external_address: db_entry.external_address, + first_port: db_entry.first_port, + last_port: db_entry.last_port, + sled_address: db_entry.sled_address, + vni: db_entry.vni, + mac: db_entry.mac, + }; + + if nat_entries.contains(&values) { + keep.push(values); + } else { + delete.push(db_entry) + } + } + + // delete entries that are not present in requested entries + for entry in delete { + if let Err(e) = self.ipv4_nat_delete(opctx, entry).await { + error!( + opctx.log, + "failed to delete service zone nat entry"; + "error" => ?e, + "entry" => ?entry, + ); + } + } + + // optimization: only attempt to add what is missing + let add = nat_entries.iter().filter(|entry| !keep.contains(entry)); + + let mut count = 0; + + // insert nat_entries + for entry in add { + if let Err(e) = + self.ensure_ipv4_nat_entry(opctx, entry.clone()).await + { + error!( + opctx.log, + "failed to ensure service zone nat entry"; + "error" => ?e, + "entry" => ?entry, + ); + continue; + } + count += 1; + } + + Ok(count) + } + pub async fn ipv4_nat_delete( &self, opctx: &OpContext, @@ -592,4 +678,128 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + // Test our ability to reconcile a set of service zone nat entries + #[tokio::test] + async fn ipv4_nat_sync_service_zones() { + let logctx = dev::test_setup_log("ipv4_nat_sync_service_zones"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 0 + ); + + // create two nat entries: + // 1. an entry should be deleted during the next sync + // 2. an entry that should be kept during the next sync + + let external_address = external::Ipv4Net( + ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(), + ); + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(), + ); + + // Add a nat entry. + let nat1 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 0.into(), + last_port: 999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::SERVICES_VNI), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + let nat2 = Ipv4NatValues { + first_port: 1000.into(), + last_port: 1999.into(), + ..nat1 + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + datastore.ensure_ipv4_nat_entry(&opctx, nat2.clone()).await.unwrap(); + + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 2); + + // sync two nat entries: + // 1. a nat entry that already exists + // 2. a nat entry that does not already exist + + let nat3 = Ipv4NatValues { + first_port: 2000.into(), + last_port: 2999.into(), + ..nat2 + }; + + datastore + .ipv4_nat_sync_service_zones(&opctx, &[nat2.clone(), nat3.clone()]) + .await + .unwrap(); + + // we should have three nat entries in the db + // 1. the old one that was deleted during the last sync + // 2. the old one that "survived" the last sync + // 3. a new one that was added during the last sync + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 3); + + // nat2 and nat3 should not be soft deleted + for request in [nat2.clone(), nat3.clone()] { + assert!(db_entries.iter().any(|entry| { + entry.first_port == request.first_port + && entry.last_port == request.last_port + && entry.time_deleted.is_none() + })); + } + + // nat1 should be soft deleted + assert!(db_entries.iter().any(|entry| { + entry.first_port == nat1.first_port + && entry.last_port == nat1.last_port + && entry.time_deleted.is_some() + && entry.version_removed.is_some() + })); + + // add nat1 back + // this simulates a zone leaving and then returning, i.e. when a sled gets restarted + datastore + .ipv4_nat_sync_service_zones( + &opctx, + &[nat1.clone(), nat2.clone(), nat3.clone()], + ) + .await + .unwrap(); + + // we should have four nat entries in the db + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 4); + + // there should be an active entry for nat1 again + assert!(db_entries.iter().any(|entry| { + entry.first_port == nat1.first_port + && entry.last_port == nat1.last_port + && entry.time_deleted.is_none() + && entry.version_removed.is_none() + })); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index f13ea721b8..dcab2d9da1 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -105,6 +105,7 @@ inventory.nkeep = 5 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index d30d2162c4..49ac6d93e2 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -12,6 +12,7 @@ use super::external_endpoints; use super::inventory_collection; use super::nat_cleanup; use super::phantom_disks; +use super::sync_service_zone_nat::ServiceZoneNatTracker; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; @@ -56,6 +57,9 @@ pub struct BackgroundTasks { /// task handle for the task that detects phantom disks pub task_phantom_disks: common::TaskHandle, + + /// task handle for the service zone nat tracker + pub task_service_zone_nat_tracker: common::TaskHandle, } impl BackgroundTasks { @@ -106,6 +110,9 @@ impl BackgroundTasks { (task, watcher_channel) }; + let dpd_clients: Vec<_> = + dpd_clients.values().map(|client| client.clone()).collect(); + let nat_cleanup = { driver.register( "nat_v4_garbage_collector".to_string(), @@ -116,7 +123,7 @@ impl BackgroundTasks { config.nat_cleanup.period_secs, Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( datastore.clone(), - dpd_clients.values().map(|client| client.clone()).collect(), + dpd_clients.clone(), )), opctx.child(BTreeMap::new()), vec![], @@ -149,7 +156,8 @@ impl BackgroundTasks { // Background task: phantom disk detection let task_phantom_disks = { - let detector = phantom_disks::PhantomDiskDetector::new(datastore); + let detector = + phantom_disks::PhantomDiskDetector::new(datastore.clone()); let task = driver.register( String::from("phantom_disks"), @@ -163,6 +171,22 @@ impl BackgroundTasks { task }; + let task_service_zone_nat_tracker = { + driver.register( + "service_zone_nat_tracker".to_string(), + String::from( + "ensures service zone nat records are recorded in NAT RPW table", + ), + config.sync_service_zone_nat.period_secs, + Box::new(ServiceZoneNatTracker::new( + datastore.clone(), + dpd_clients.clone(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + BackgroundTasks { driver, task_internal_dns_config, @@ -174,6 +198,7 @@ impl BackgroundTasks { nat_cleanup, task_inventory_collection, task_phantom_disks, + task_service_zone_nat_tracker, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 70b20224d4..166fc2654b 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -14,6 +14,7 @@ mod inventory_collection; mod nat_cleanup; mod phantom_disks; mod status; +mod sync_service_zone_nat; pub use common::Driver; pub use common::TaskHandle; diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs new file mode 100644 index 0000000000..8e75f97d7a --- /dev/null +++ b/nexus/src/app/background/sync_service_zone_nat.rs @@ -0,0 +1,362 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting changes to service zone locations and +//! updating the NAT rpw table accordingly + +use super::common::BackgroundTask; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::Ipv4NatValues; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; +use omicron_common::address::{MAX_PORT, MIN_PORT}; +use omicron_common::api::external; +use serde_json::json; +use sled_agent_client::types::OmicronZoneType; +use std::net::{IpAddr, SocketAddr}; +use std::num::NonZeroU32; +use std::sync::Arc; + +// Minumum number of boundary NTP zones that should be present in a valid +// set of service zone nat configurations. +const MIN_NTP_COUNT: usize = 1; + +// Minumum number of nexus zones that should be present in a valid +// set of service zone nat configurations. +const MIN_NEXUS_COUNT: usize = 1; + +// Minumum number of external DNS zones that should be present in a valid +// set of service zone nat configurations. +const MIN_EXTERNAL_DNS_COUNT: usize = 1; + +/// Background task that ensures service zones have nat entries +/// persisted in the NAT RPW table +pub struct ServiceZoneNatTracker { + datastore: Arc, + dpd_clients: Vec>, +} + +impl ServiceZoneNatTracker { + pub fn new( + datastore: Arc, + dpd_clients: Vec>, + ) -> Self { + Self { datastore, dpd_clients } + } +} + +impl BackgroundTask for ServiceZoneNatTracker { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + + // check inventory + let inventory = match self + .datastore + .inventory_get_latest_collection( + opctx, + NonZeroU32::new(u32::MAX).unwrap(), + ) + .await + { + Ok(inventory) => inventory, + Err(e) => { + error!( + &log, + "failed to collect inventory"; + "error" => format!("{:#}", e) + ); + return json!({ + "error": + format!( + "failed collect inventory: \ + {:#}", + e + ) + }); + } + }; + + // generate set of Service Zone NAT entries + let collection = match inventory { + Some(c) => c, + // this could happen if we check the inventory table before the + // inventory job has finished running for the first time + None => { + warn!( + &log, + "inventory collection is None"; + ); + return json!({ + "error": "inventory collection is None" + }); + } + }; + + let mut ipv4_nat_values: Vec = vec![]; + let mut ntp_count = 0; + let mut nexus_count = 0; + let mut dns_count = 0; + + for (sled_id, zones_found) in collection.omicron_zones { + let (_, sled) = match LookupPath::new(opctx, &self.datastore) + .sled_id(sled_id) + .fetch() + .await + .context("failed to look up sled") + { + Ok(result) => result, + Err(e) => { + error!( + &log, + "failed to lookup sled by id"; + "id" => ?sled_id, + "error" => ?e, + ); + continue; + } + }; + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::new(*sled.ip, 128).unwrap(), + ); + + let zones_config: sled_agent_client::types::OmicronZonesConfig = + zones_found.zones; + let zones: Vec = + zones_config.zones; + + for zone in zones { + let zone_type: OmicronZoneType = zone.zone_type; + match zone_type { + OmicronZoneType::BoundaryNtp { + nic, snat_cfg, .. + } => { + let external_ip = match snat_cfg.ip { + IpAddr::V4(addr) => addr, + IpAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + } + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: snat_cfg.first_port.into(), + last_port: snat_cfg.last_port.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + ntp_count += 1; + } + OmicronZoneType::Nexus { nic, external_ip, .. } => { + let external_ip = match external_ip { + IpAddr::V4(addr) => addr, + IpAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + } + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: MIN_PORT.into(), + last_port: MAX_PORT.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + nexus_count += 1; + }, + OmicronZoneType::ExternalDns { nic, dns_address, .. } => { + let socket_addr: SocketAddr = match dns_address.parse() { + Ok(value) => value, + Err(e) => { + error!( + &log, + "failed to parse value into socketaddr"; + "value" => dns_address, + "error" => ?e, + ); + continue; + } + }; + let external_ip = match socket_addr { + SocketAddr::V4(v4) => { + *v4.ip() + }, + SocketAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + }, + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: MIN_PORT.into(), + last_port: MAX_PORT.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + dns_count += 1; + }, + // we explictly list all cases instead of using a wildcard, + // that way if someone adds a new type to OmicronZoneType that + // requires NAT, they must come here to update this logic as + // well + OmicronZoneType::Clickhouse {..} => continue, + OmicronZoneType::ClickhouseKeeper {..} => continue, + OmicronZoneType::CockroachDb {..} => continue, + OmicronZoneType::Crucible {..} => continue, + OmicronZoneType::CruciblePantry {..} => continue, + OmicronZoneType::InternalNtp {..} => continue, + OmicronZoneType::InternalDns {..} => continue, + OmicronZoneType::Oximeter { ..} => continue, + } + } + } + + // if we make it this far this should not be empty: + // * nexus is running so we should at least have generated a nat value for it + // * nexus requies other services zones that require nat to come up first + if ipv4_nat_values.is_empty() { + error!( + &log, + "nexus is running but no service zone nat values could be generated from inventory"; + ); + return json!({ + "error": "nexus is running but no service zone nat values could be generated from inventory" + }); + } + + if dns_count < MIN_EXTERNAL_DNS_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of dns zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of dns zones" + }); + } + + if ntp_count < MIN_NTP_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of ntp zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of ntp zones" + + }); + } + + if nexus_count < MIN_NEXUS_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of nexus zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of nexus zones" + + }); + } + + // reconcile service zone nat entries + let result = match self.datastore.ipv4_nat_sync_service_zones(opctx, &ipv4_nat_values).await { + Ok(num) => num, + Err(e) => { + error!( + &log, + "failed to update service zone nat records"; + "error" => format!("{:#}", e) + ); + return json!({ + "error": + format!( + "failed to update service zone nat records: \ + {:#}", + e + ) + }); + }, + }; + + // notify dpd if we've added any new records + if result > 0 { + for client in &self.dpd_clients { + if let Err(e) = client.ipv4_nat_trigger_update().await { + error!( + &log, + "failed to trigger dpd rpw workflow"; + "error" => ?e + ); + }; + } + } + + let rv = serde_json::to_value(&result).unwrap_or_else(|error| { + json!({ + "error": + format!( + "failed to serialize final value: {:#}", + error + ) + }) + }); + + rv + } + .boxed() + } +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index a4436234f0..476b8fe6c8 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -99,6 +99,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/schema/crdb/29.0.0/up1.sql b/schema/crdb/29.0.0/up1.sql new file mode 100644 index 0000000000..a213380944 --- /dev/null +++ b/schema/crdb/29.0.0/up1.sql @@ -0,0 +1,14 @@ +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup_by_vni ON omicron.public.ipv4_nat_entry ( + vni +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + mac, + version_added, + version_removed, + time_created, + time_deleted +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 86d1340379..6ff92acfa4 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3383,6 +3383,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup_by_vni ON omicron.public.ipv4_nat_entry ( + vni +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + mac, + version_added, + version_removed, + time_created, + time_deleted +); + INSERT INTO omicron.public.db_metadata ( singleton, time_created, @@ -3390,7 +3405,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '28.0.0', NULL) + ( TRUE, NOW(), NOW(), '29.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index d330f32ab6..d84bf8d4b0 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -47,6 +47,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index cbd4851613..01206655f0 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -47,6 +47,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. From c91421a5693fb05e5308a051567f866551805f10 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 17:50:56 -0800 Subject: [PATCH 52/91] Update Rust crate sqlparser to 0.43.1 (#4902) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2815d9a1f..7d7a8422c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8379,9 +8379,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.41.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" +checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" dependencies = [ "log", "sqlparser_derive", diff --git a/Cargo.toml b/Cargo.toml index 47c412b9b5..1192806d15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -359,7 +359,7 @@ sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } -sqlparser = { version = "0.41.0", features = [ "visitor" ] } +sqlparser = { version = "0.43.1", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific From bd0ac96aa0afc6d89e46b4673c1194cb4dce615c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 27 Jan 2024 05:28:14 +0000 Subject: [PATCH 53/91] Update taiki-e/install-action digest to bee85d7 (#4908) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`1f501f0` -> `bee85d7`](https://togithub.com/taiki-e/install-action/compare/1f501f0...bee85d7) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d4a4a4750c..85aa0ab7f4 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@1f501f091c4240a626be17b7496626f8f0cf979a # v2 + uses: taiki-e/install-action@bee85d7ea77c01f7a403c22ac2c802b431b093df # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From c2b90bc664ee957a3956a965b30f57413cde71e9 Mon Sep 17 00:00:00 2001 From: Nils Nieuwejaar Date: Sat, 27 Jan 2024 00:31:58 -0500 Subject: [PATCH 54/91] Update dendrite commit (#4907) Update progenitor to get clippy fix Preserve switch zone logs in CI --- .github/buildomat/jobs/deploy.sh | 1 + Cargo.lock | 8 ++++---- package-manifest.toml | 12 ++++++------ tools/dendrite_openapi_version | 4 ++-- tools/dendrite_stub_checksums | 6 +++--- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index e69cfb0078..5e43ff7f7c 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -5,6 +5,7 @@ #: target = "lab-2.0-opte-0.28" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", +#: "%/zone/oxz_*/root/var/svc/log/oxide-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/system-illumos-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_ntp_*/root/var/log/chrony/*.log*", diff --git a/Cargo.lock b/Cargo.lock index 7d7a8422c2..5309fac767 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6414,7 +6414,7 @@ dependencies = [ [[package]] name = "progenitor" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6425,7 +6425,7 @@ dependencies = [ [[package]] name = "progenitor-client" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "bytes", "futures-core", @@ -6439,7 +6439,7 @@ dependencies = [ [[package]] name = "progenitor-impl" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "getopts", "heck 0.4.1", @@ -6461,7 +6461,7 @@ dependencies = [ [[package]] name = "progenitor-macro" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "openapiv3", "proc-macro2", diff --git a/package-manifest.toml b/package-manifest.toml index f574f1ff5d..b08457e46c 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -497,8 +497,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -source.sha256 = "052d97370515189465e4e835edb4a2d7e1e0b55ace0230ba18f045a03d975e80" +source.commit = "fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +source.sha256 = "1e24598ba77dc00682cdf54fc370696ef5aa49ed510ab7f72fcc91d61d679e7b" output.type = "zone" output.intermediate_only = true @@ -522,8 +522,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -source.sha256 = "3ebc1ee37c4d7a0657a78abbaad2fe81570da88128505bfdc4ea47e3e05c6277" +source.commit = "fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +source.sha256 = "720df8aff3aaa0f8a86ec606089ebf8b5068d7f3c243bd4c868b96ef72d13485" output.type = "zone" output.intermediate_only = true @@ -540,8 +540,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -source.sha256 = "18079b2ce1003facb476e28499f2e31ebe092510ecd6c685fa1a91f1a34f2dda" +source.commit = "fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +source.sha256 = "5e34a10d9dca6c94f96075140d42b755dee1f5e6a3485fc239b12e12b89a30c5" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 6bda68c69d..56bcb2d9ff 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" +COMMIT="fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +SHA2="e8f73a83d5c62f7efce998f821acc80a91b7995c95bd9ec2c228372829310099" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index de183cb496..497ce5c010 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="052d97370515189465e4e835edb4a2d7e1e0b55ace0230ba18f045a03d975e80" -CIDL_SHA256_LINUX_DPD="5c8bc252818897bc552a039f2423eb668d99e19ef54374644412c7aca533f94e" -CIDL_SHA256_LINUX_SWADM="9d549fc3ebaf392961404b50e802ccb5e81e41e779ecc46166d49e5fb44b524f" +CIDL_SHA256_ILLUMOS="1e24598ba77dc00682cdf54fc370696ef5aa49ed510ab7f72fcc91d61d679e7b" +CIDL_SHA256_LINUX_DPD="4fc43b53a048264664ede64805d4d179ec32d50cf9ab1aaa0fa4e17190e511a2" +CIDL_SHA256_LINUX_SWADM="0ab34a2063e68568aa064f7b71825a603d47b3e399f3e7f45976edb5d5283f0f" From 3ee0afd751a4175dc0855c91b5f0dfb7c7c21dc9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 29 Jan 2024 12:11:53 -0500 Subject: [PATCH 55/91] SP versions v1.0.6 (#4916) --- tools/hubris_checksums | 14 +++++++------- tools/hubris_version | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index 478d8f192e..8dc282e500 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,7 +1,7 @@ -6567a0775d5f0b7ff09d97f149532a627222971eadd89ea0dac186c9a825846d build-gimlet-c-image-default-v1.0.5.zip -1190b27246d8c8c20837d957266ac9e90e32934841b9acc2990d2762a3b53a16 build-gimlet-d-image-default-v1.0.5.zip -79e644ffbbd7195ff2699c90ee26f277edac40b385fc5bb8e7821a4611ad7c11 build-gimlet-e-image-default-v1.0.5.zip -bf83e0311e18fc716dd5a315106aa965d278c4f481892fe124bc376b2e23581e build-psc-b-image-default-v1.0.5.zip -0dd1de9c3d3c686e8a05525fbed48c6532b608b34c77214b7fe15a8f54b0f3cb build-psc-c-image-default-v1.0.5.zip -c024d5546288d0d953735b3a0221ee0e218cc27ed1e26eede5c91c9a8137c592 build-sidecar-b-image-default-v1.0.5.zip -de79320022718be94c81dc7d44b5229ce0956aff9c1ffa11e8c3ff8961af49bb build-sidecar-c-image-default-v1.0.5.zip +e1b3dc5c7da643b27c0dd5bf8e915d13661446e711bfdeb1d8274eed63fa5843 build-gimlet-c-image-default-v1.0.6.zip +3002444307047429531ef862435a034c64b89a698921bf19794ac97b777a2f95 build-gimlet-d-image-default-v1.0.6.zip +9e783bc92fb1c8a91f4b117241ed4c0ff2818f32f46c5193cdcdbbe02d56af9a build-gimlet-e-image-default-v1.0.6.zip +dece7d39f7fcd2f15dc62d91e94046b1f438a3e0fd2c804efd5f67e12ce0dd58 build-psc-b-image-default-v1.0.6.zip +7e94035b52f1dcb137b477750bf9e215d4fcd07fe95b2cfdbbc0d7fada79eb28 build-psc-c-image-default-v1.0.6.zip +ccf09dc7c9c2a946b89bcfafb391100504880fa395c9079dfb7a3b28635a4abb build-sidecar-b-image-default-v1.0.6.zip +b5d91c212f813dbdba06c1f5b098fd37fe6cb93fe33fd3c58325cb6504dc6d05 build-sidecar-c-image-default-v1.0.6.zip diff --git a/tools/hubris_version b/tools/hubris_version index 37e565d060..f2c1e74f2b 100644 --- a/tools/hubris_version +++ b/tools/hubris_version @@ -1 +1 @@ -TAGS=(gimlet-v1.0.5 psc-v1.0.5 sidecar-v1.0.5) +TAGS=(gimlet-v1.0.6 psc-v1.0.6 sidecar-v1.0.6) From 45df2e6ce738eba9a66e2f885bdc509d8932834b Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 29 Jan 2024 09:47:55 -0800 Subject: [PATCH 56/91] Update crucible and propolis versions (#4912) Crucible changes Remove a superfluous copy during write serialization (#1087) Update to progenitor v0.5.0, pull in required Omicron updates (#1115) Update usdt to v0.5.0 (#1116) Do not panic on reinitialize of a downstairs client. (#1114) Bump (tracing-)opentelemetry(-jaeger) (#1113) Make the Guest -> Upstairs queue fully async (#1086) Switch to per-block ownership (#1107) Handle timeout in the client IO task (#1109) Enforce buffer alignment (#1106) Block size buffers (#1105) New dtrace probes and a counter struct in the Upstairs. (#1104) Implement read decryption offloading (#1089) Remove Arc + Mutex from Buffer (#1094) Comment cleanup and rename of DsState::Repair -> Reconcile (#1102) do not panic the dynamometer for OOB writes (#1101) Allow dsc to start the downstairs in read-only mode. (#1098) Use the omicron-zone-package methods for topo sorting (#1099) Package with topological sorting (#1097) Fix clippy lints in dsc (#1095) Propolis changes: PHD: demote artifact store logs to DEBUG, enable DEBUG on CI (#626) PHD: fix missing newlines in serial.log (#622) PHD: fix run_shell_command with multiline commands (#621) PHD: fix `--artifact-directory` not doing anything (#618) Update h2 dependency Update Crucible (and Omicron) dependencies PHD: refactor guest serial console handling (#615) phd: add basic "migration-from-base" tests + machinery (#609) phd: Ensure min disk size fits read-only parents (#611) phd: automatically fetch `crucible-downstairs` from Buildomat (#604) Mitigate behavior from illumos#16183 PHD: add guest adapter for WS2022 (#607) phd: include error cause chain in failure output (#606) add QEMU pvpanic ISA device (#596) Add crucible-mem backend Make crucible opt parsing more terse in standalone Co-authored-by: Alan Hanson --- Cargo.lock | 164 +++++++++++++++++++++++++++--------------- Cargo.toml | 12 ++-- package-manifest.toml | 12 ++-- 3 files changed, 119 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5309fac767..a058462468 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "bhyve_api_sys", "libc", @@ -471,7 +471,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "libc", "strum", @@ -638,7 +638,7 @@ dependencies = [ "ipnetwork", "omicron-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -1294,13 +1294,13 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" +source = "git+https://github.com/oxidecomputer/crucible?rev=2d4bc11232d53f177c286383926fa5f8c1b2a938#2d4bc11232d53f177c286383926fa5f8c1b2a938" dependencies = [ "anyhow", "chrono", "crucible-workspace-hack", "percent-encoding", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "schemars", "serde", @@ -1310,13 +1310,13 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" +source = "git+https://github.com/oxidecomputer/crucible?rev=2d4bc11232d53f177c286383926fa5f8c1b2a938#2d4bc11232d53f177c286383926fa5f8c1b2a938" dependencies = [ "anyhow", "chrono", "crucible-workspace-hack", "percent-encoding", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "schemars", "serde", @@ -1327,7 +1327,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" +source = "git+https://github.com/oxidecomputer/crucible?rev=2d4bc11232d53f177c286383926fa5f8c1b2a938#2d4bc11232d53f177c286383926fa5f8c1b2a938" dependencies = [ "crucible-workspace-hack", "libc", @@ -1530,8 +1530,8 @@ dependencies = [ "omicron-common", "omicron-workspace-hack", "omicron-zone-package", - "progenitor", - "progenitor-client", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "quote", "reqwest", "rustfmt-wrapper", @@ -1852,7 +1852,7 @@ dependencies = [ "chrono", "http 0.2.11", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "schemars", "serde", @@ -1906,8 +1906,8 @@ dependencies = [ "ipnetwork", "omicron-workspace-hack", "omicron-zone-package", - "progenitor", - "progenitor-client", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "quote", "rand 0.8.5", "regress", @@ -2545,7 +2545,7 @@ dependencies = [ "chrono", "gateway-messages", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "rand 0.8.5", "reqwest", "schemars", @@ -3457,7 +3457,7 @@ version = "0.1.0" dependencies = [ "installinator-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -3537,7 +3537,7 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "serde", "serde_json", @@ -4042,8 +4042,8 @@ dependencies = [ "omicron-common", "omicron-workspace-hack", "omicron-zone-package", - "progenitor", - "progenitor-client", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "quote", "reqwest", "rustfmt-wrapper", @@ -4212,7 +4212,7 @@ dependencies = [ "omicron-common", "omicron-passwords", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -4780,7 +4780,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "parse-display", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "proptest", "rand 0.8.5", "regress", @@ -4880,7 +4880,7 @@ dependencies = [ "signal-hook", "signal-hook-tokio", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-error-chain", "sp-sim", "subprocess", @@ -4968,7 +4968,7 @@ dependencies = [ "petgraph", "pq-sys", "pretty_assertions", - "progenitor-client", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "propolis-client", "rand 0.8.5", "rcgen", @@ -4989,7 +4989,7 @@ dependencies = [ "sled-agent-client", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-error-chain", "slog-term", "sp-sim", @@ -5183,7 +5183,7 @@ dependencies = [ "sled-storage", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-term", "smf", "static_assertions", @@ -5558,7 +5558,7 @@ dependencies = [ "http 0.2.11", "hyper 0.14.27", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "rand 0.8.5", "regress", "reqwest", @@ -5614,7 +5614,7 @@ dependencies = [ "futures", "omicron-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "serde", "slog", @@ -5651,7 +5651,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-term", "strum", "subprocess", @@ -5691,7 +5691,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-term", "sqlformat", "sqlparser", @@ -5750,7 +5750,7 @@ dependencies = [ "schemars", "serde", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "thiserror", "tokio", "uuid", @@ -6416,9 +6416,20 @@ name = "progenitor" version = "0.5.0" source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ - "progenitor-client", - "progenitor-impl", - "progenitor-macro", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-macro 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "serde_json", +] + +[[package]] +name = "progenitor" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", + "progenitor-macro 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "serde_json", ] @@ -6436,6 +6447,20 @@ dependencies = [ "serde_urlencoded", ] +[[package]] +name = "progenitor-client" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "bytes", + "futures-core", + "percent-encoding", + "reqwest", + "serde", + "serde_json", + "serde_urlencoded", +] + [[package]] name = "progenitor-impl" version = "0.5.0" @@ -6458,6 +6483,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "progenitor-impl" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "getopts", + "heck 0.4.1", + "http 0.2.11", + "indexmap 2.1.0", + "openapiv3", + "proc-macro2", + "quote", + "regex", + "schemars", + "serde", + "serde_json", + "syn 2.0.48", + "thiserror", + "typify", + "unicode-ident", +] + [[package]] name = "progenitor-macro" version = "0.5.0" @@ -6465,7 +6512,24 @@ source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b8 dependencies = [ "openapiv3", "proc-macro2", - "progenitor-impl", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "quote", + "schemars", + "serde", + "serde_json", + "serde_tokenstream 0.2.0", + "serde_yaml", + "syn 2.0.48", +] + +[[package]] +name = "progenitor-macro" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "openapiv3", + "proc-macro2", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "quote", "schemars", "serde", @@ -6478,12 +6542,12 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "async-trait", "base64", "futures", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "rand 0.8.5", "reqwest", "schemars", @@ -6499,7 +6563,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "anyhow", "atty", @@ -6508,7 +6572,7 @@ dependencies = [ "dropshot", "futures", "hyper 0.14.27", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "propolis_types", "rand 0.8.5", "reqwest", @@ -6518,7 +6582,7 @@ dependencies = [ "slog", "slog-async", "slog-bunyan", - "slog-dtrace 0.2.3", + "slog-dtrace", "slog-term", "thiserror", "tokio", @@ -6529,7 +6593,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "schemars", "serde", @@ -7990,7 +8054,7 @@ dependencies = [ "ipnetwork", "omicron-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -8081,20 +8145,6 @@ dependencies = [ "time", ] -[[package]] -name = "slog-dtrace" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebb79013d51afb48c5159d62068658fa672772be3aeeadee0d2710fb3903f637" -dependencies = [ - "chrono", - "serde", - "serde_json", - "slog", - "usdt 0.3.5", - "version_check", -] - [[package]] name = "slog-dtrace" version = "0.3.0" @@ -8307,7 +8357,7 @@ dependencies = [ "omicron-workspace-hack", "serde", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "sprockets-rot", "thiserror", "tokio", @@ -10240,7 +10290,7 @@ dependencies = [ "sha2", "sled-hardware", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "snafu", "subprocess", "tar", @@ -10269,7 +10319,7 @@ dependencies = [ "installinator-common", "ipnetwork", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", diff --git a/Cargo.toml b/Cargo.toml index 1192806d15..d0738f9cd3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -178,9 +178,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "2d4bc11232d53f177c286383926fa5f8c1b2a938" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "2d4bc11232d53f177c286383926fa5f8c1b2a938" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "2d4bc11232d53f177c286383926fa5f8c1b2a938" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -304,9 +304,9 @@ prettyplease = "0.2.16" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index b08457e46c..c34b84eb9d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -405,10 +405,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" +source.commit = "2d4bc11232d53f177c286383926fa5f8c1b2a938" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "030a02551e487f561bcfad47426b953d15c4430d77770765c7fc03afd8d61bd9" +source.sha256 = "88ec93657a644e8f10a32d1d22cc027db901aea81027f49ce7bee58fc4a35755" output.type = "zone" [package.crucible-pantry] @@ -416,10 +416,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" +source.commit = "2d4bc11232d53f177c286383926fa5f8c1b2a938" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "c74e23e7f7995ba3a69a9ec3a31f1db517ec15cd3a9942c2c07621b219b743b2" +source.sha256 = "e2c3ed2d4cd6b5da3d38dd52df6d4a259280be7d45c30a363e9c71b174ecc6f8" output.type = "zone" # Refer to @@ -430,10 +430,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "1e25649e8c2ac274bd04adfe0513dd14a482058c" +source.commit = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "09c124315da3e434c85fe1ddb16459c36d8302e15705ff18fe6bbc7b4876f5f9" +source.sha256 = "aa10aa245a92e657fc074bd588ef6bbddaad2d9c946a8e1b91c02dce7e057561" output.type = "zone" [package.mg-ddm-gz] From 17153dbbfd6363b082feecb69b35f0bcf8e6077f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:24:24 -0800 Subject: [PATCH 57/91] Update Rust crate serde_json to 1.0.113 (#4910) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a058462468..78dc9039ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7767,9 +7767,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.111" +version = "1.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index d0738f9cd3..591cc143ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -332,7 +332,7 @@ semver = { version = "1.0.21", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.111" +serde_json = "1.0.113" serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index cebd4cab36..bf01830630 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -92,7 +92,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.113", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -200,7 +200,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.113", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 28c938f7b4bf6a24a71de65b0a94aefa8511ec5a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:24:41 -0800 Subject: [PATCH 58/91] Update Rust crate cargo_toml to 0.19 (#4914) --- Cargo.lock | 4 ++-- dev-tools/xtask/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78dc9039ab..410daa8a84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -794,9 +794,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802b755090e39835a4b0440fb0bbee0df7495a8b337f63db21e616f7821c7e8c" +checksum = "922d6ea3081d68b9e3e09557204bff47f9b5406a4a304dc917e187f8cafd582b" dependencies = [ "serde", "toml 0.8.8", diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index bccb69a1f7..0429fcae79 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -7,6 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true camino.workspace = true -cargo_toml = "0.18" +cargo_toml = "0.19" cargo_metadata = "0.18" clap.workspace = true From ab1ba613b5ec4d30b369543e3702dc8c4f735eaf Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:24:56 -0800 Subject: [PATCH 59/91] Update Rust crate rcgen to 0.12.1 (#4909) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 410daa8a84..d40d2a5839 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6820,9 +6820,9 @@ dependencies = [ [[package]] name = "rcgen" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d918c80c5a4c7560db726763020bd16db179e4d5b828078842274a443addb5d" +checksum = "48406db8ac1f3cbc7dcdb56ec355343817958a356ff430259bb07baf7607e1e1" dependencies = [ "pem", "ring 0.17.7", diff --git a/Cargo.toml b/Cargo.toml index 591cc143ac..e1e4d40736 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -312,7 +312,7 @@ quote = "1.0" rand = "0.8.5" ratatui = "0.25.0" rayon = "1.8" -rcgen = "0.12.0" +rcgen = "0.12.1" reedline = "0.28.0" ref-cast = "1.0" regex = "1.10.3" From 62547d22e74c4bb134b8d65e373ace793de7529f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 30 Jan 2024 06:19:33 -0500 Subject: [PATCH 60/91] RoT staging/dev and prod/rel v1.0.5 (#4917) --- .github/buildomat/jobs/tuf-repo.sh | 4 ++-- tools/dvt_dock_version | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index 5e7a2d4a91..f8514e2b13 100644 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -278,8 +278,8 @@ EOF done } # usage: SERIES ROT_DIR ROT_VERSION BOARDS... -add_hubris_artifacts rot-staging-dev staging/dev cert-staging-dev-v1.0.4 "${ALL_BOARDS[@]}" -add_hubris_artifacts rot-prod-rel prod/rel cert-prod-rel-v1.0.4 "${ALL_BOARDS[@]}" +add_hubris_artifacts rot-staging-dev staging/dev cert-staging-dev-v1.0.5 "${ALL_BOARDS[@]}" +add_hubris_artifacts rot-prod-rel prod/rel cert-prod-rel-v1.0.5 "${ALL_BOARDS[@]}" for series in "${SERIES_LIST[@]}"; do /work/tufaceous assemble --no-generate-key /work/manifest-"$series".toml /work/repo-"$series".zip diff --git a/tools/dvt_dock_version b/tools/dvt_dock_version index f7fef543f4..047065135b 100644 --- a/tools/dvt_dock_version +++ b/tools/dvt_dock_version @@ -1 +1 @@ -COMMIT=ad874c11ecd0c45bdc1e4c2ac35c2bcbe472d55f +COMMIT=e384836415e05ae0ba648810ab1c87e9093cdabb From 5ba928fb9b661fb46142be8c63552e9e7d7c6278 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 30 Jan 2024 15:31:31 -0600 Subject: [PATCH 61/91] Don't allow link/unlink or range add/remove on service IP pool (#4930) Closes #4762 --- nexus/src/app/ip_pool.rs | 37 +++++----- nexus/tests/integration_tests/instances.rs | 1 + nexus/tests/integration_tests/ip_pools.rs | 80 ++++++++++++++++------ 3 files changed, 81 insertions(+), 37 deletions(-) diff --git a/nexus/src/app/ip_pool.rs b/nexus/src/app/ip_pool.rs index d8d36fff4b..87a7d98c91 100644 --- a/nexus/src/app/ip_pool.rs +++ b/nexus/src/app/ip_pool.rs @@ -149,6 +149,11 @@ impl super::Nexus { ) -> CreateResult { let (authz_pool,) = pool_lookup.lookup_for(authz::Action::Modify).await?; + + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { + return Err(not_found_from_lookup(pool_lookup)); + } + let (authz_silo,) = self .silo_lookup(&opctx, silo_link.silo.clone())? .lookup_for(authz::Action::Modify) @@ -174,6 +179,11 @@ impl super::Nexus { ) -> DeleteResult { let (.., authz_pool) = pool_lookup.lookup_for(authz::Action::Modify).await?; + + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { + return Err(not_found_from_lookup(pool_lookup)); + } + let (.., authz_silo) = silo_lookup.lookup_for(authz::Action::Modify).await?; @@ -191,6 +201,11 @@ impl super::Nexus { ) -> CreateResult { let (.., authz_pool) = pool_lookup.lookup_for(authz::Action::Modify).await?; + + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { + return Err(not_found_from_lookup(pool_lookup)); + } + let (.., authz_silo) = silo_lookup.lookup_for(authz::Action::Modify).await?; @@ -220,9 +235,7 @@ impl super::Nexus { let (.., authz_pool, db_pool) = pool_lookup.fetch_for(authz::Action::Delete).await?; - let is_internal = - self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; - if is_internal { + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { return Err(not_found_from_lookup(pool_lookup)); } @@ -238,9 +251,7 @@ impl super::Nexus { let (.., authz_pool) = pool_lookup.lookup_for(authz::Action::Modify).await?; - let is_internal = - self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; - if is_internal { + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { return Err(not_found_from_lookup(pool_lookup)); } @@ -258,9 +269,7 @@ impl super::Nexus { let (.., authz_pool) = pool_lookup.lookup_for(authz::Action::ListChildren).await?; - let is_internal = - self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; - if is_internal { + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { return Err(not_found_from_lookup(pool_lookup)); } @@ -278,11 +287,10 @@ impl super::Nexus { let (.., authz_pool, _db_pool) = pool_lookup.fetch_for(authz::Action::Modify).await?; - let is_internal = - self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; - if is_internal { + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { return Err(not_found_from_lookup(pool_lookup)); } + self.db_datastore.ip_pool_add_range(opctx, &authz_pool, range).await } @@ -295,10 +303,7 @@ impl super::Nexus { let (.., authz_pool, _db_pool) = pool_lookup.fetch_for(authz::Action::Modify).await?; - let is_internal = - self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await?; - - if is_internal { + if self.db_datastore.ip_pool_is_internal(opctx, &authz_pool).await? { return Err(not_found_from_lookup(pool_lookup)); } diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 57b731c692..875aab74f7 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3692,6 +3692,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( stop_instance(&cptestctx, "pool1-inst").await; stop_instance(&cptestctx, "pool2-inst").await; + // now unlink works object_delete(client, &pool1_silo_url).await; // create instance with pool1, expecting allocation to fail diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 77a5cd5c8a..4f88fa787b 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -11,6 +11,7 @@ use http::method::Method; use http::StatusCode; use nexus_db_queries::db::datastore::SERVICE_IP_POOL_NAME; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; +use nexus_db_queries::db::fixed_data::silo::INTERNAL_SILO_ID; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; @@ -339,10 +340,8 @@ async fn test_ip_pool_service_no_cud(cptestctx: &ControlPlaneTestContext) { StatusCode::NOT_FOUND, ) .await; - assert_eq!( - error.message, - "not found: ip-pool with name \"oxide-service-pool\"" - ); + let not_found_name = "not found: ip-pool with name \"oxide-service-pool\""; + assert_eq!(error.message, not_found_name); let not_found_id = format!("not found: ip-pool with id \"{}\"", pool.identity.id); @@ -370,23 +369,62 @@ async fn test_ip_pool_service_no_cud(cptestctx: &ControlPlaneTestContext) { .await; assert_eq!(error.message, not_found_id); - // linking not allowed - - // let link_body = params::IpPoolLinkSilo { - // silo: NameOrId::Name(cptestctx.silo_name.clone()), - // is_default: false, - // }; - // let link_url = format!("{}/silos", internal_pool_id_url); - // let error = object_create_error( - // client, - // &link_url, - // &link_body, - // StatusCode::NOT_FOUND, - // ) - // .await; - // assert_eq!(error.message, not_found_id); - - // TODO: link, unlink, add/remove range by name or ID should all fail + let error = object_put_error( + client, + &internal_pool_name_url, + &put_body, + StatusCode::NOT_FOUND, + ) + .await; + assert_eq!(error.message, not_found_name); + + // add range not allowed by name or ID + let range = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 2), + std::net::Ipv4Addr::new(10, 0, 0, 5), + ) + .unwrap(), + ); + let url = format!("{}/ranges/add", internal_pool_id_url); + let error = + object_create_error(client, &url, &range, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found_id); + + let url = format!("{}/ranges/add", internal_pool_name_url); + let error = + object_create_error(client, &url, &range, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found_name); + + // remove range not allowed by name or ID + let url = format!("{}/ranges/add", internal_pool_id_url); + let error = + object_create_error(client, &url, &range, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found_id); + + let url = format!("{}/ranges/remove", internal_pool_name_url); + let error = + object_create_error(client, &url, &range, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found_name); + + // linking not allowed by name or ID + let body = params::IpPoolLinkSilo { + silo: NameOrId::Name(cptestctx.silo_name.clone()), + is_default: false, + }; + let url = format!("{}/silos", internal_pool_id_url); + let error = + object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found_id); + + // unlink not allowed by name or ID + let url = format!("{}/silos/{}", internal_pool_id_url, *INTERNAL_SILO_ID); + let error = object_delete_error(client, &url, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found_id); + + let url = format!("{}/silos/{}", internal_pool_name_url, *INTERNAL_SILO_ID); + let error = object_delete_error(client, &url, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found_name); } #[nexus_test] From 5439ce8f391e1f2f1679378171c5a3d880f7ee37 Mon Sep 17 00:00:00 2001 From: Aaron Hartwig Date: Tue, 30 Jan 2024 17:38:07 -0600 Subject: [PATCH 62/91] Add gimlet-f to TUF repo (#4871) As of https://github.com/oxidecomputer/hubris/commit/1fcb6ce375a6b10e4f0ba40745da688748deff39 Hubris now has Gimlet F images. --- .github/buildomat/jobs/tuf-repo.sh | 2 +- tools/hubris_checksums | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index f8514e2b13..59fc0fd7a7 100644 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -71,7 +71,7 @@ set -o errexit set -o pipefail set -o xtrace -ALL_BOARDS=(gimlet-{c..e} psc-{b..c} sidecar-{b..c}) +ALL_BOARDS=(gimlet-{c..f} psc-{b..c} sidecar-{b..c}) TOP=$PWD VERSION=$(< /input/package/work/version.txt) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index 8dc282e500..d451f7a86c 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,6 +1,7 @@ e1b3dc5c7da643b27c0dd5bf8e915d13661446e711bfdeb1d8274eed63fa5843 build-gimlet-c-image-default-v1.0.6.zip 3002444307047429531ef862435a034c64b89a698921bf19794ac97b777a2f95 build-gimlet-d-image-default-v1.0.6.zip 9e783bc92fb1c8a91f4b117241ed4c0ff2818f32f46c5193cdcdbbe02d56af9a build-gimlet-e-image-default-v1.0.6.zip +458c4f02310fe79f27841ce87b2a7c163494f0196890e6420fac17dc4803b51c build-gimlet-f-image-default-v1.0.6.zip dece7d39f7fcd2f15dc62d91e94046b1f438a3e0fd2c804efd5f67e12ce0dd58 build-psc-b-image-default-v1.0.6.zip 7e94035b52f1dcb137b477750bf9e215d4fcd07fe95b2cfdbbc0d7fada79eb28 build-psc-c-image-default-v1.0.6.zip ccf09dc7c9c2a946b89bcfafb391100504880fa395c9079dfb7a3b28635a4abb build-sidecar-b-image-default-v1.0.6.zip From 5780ff6d1baaa9199e2a64a6b91665d636bf2e3e Mon Sep 17 00:00:00 2001 From: Benjamin Leonard Date: Wed, 31 Jan 2024 03:24:24 +0000 Subject: [PATCH 63/91] User specified SSH keys to inject at instance create time (#4764) --- end-to-end-tests/src/instance_launch.rs | 6 +- nexus/db-model/src/schema.rs | 13 +- nexus/db-model/src/ssh_key.rs | 8 + nexus/db-queries/src/db/datastore/instance.rs | 2 + nexus/db-queries/src/db/datastore/ssh_key.rs | 188 ++++++++++++++++ .../db-queries/src/db/queries/external_ip.rs | 1 + .../src/db/queries/network_interface.rs | 1 + nexus/src/app/instance.rs | 64 ++++-- nexus/src/app/mod.rs | 4 + nexus/src/app/sagas/instance_create.rs | 80 ++++++- nexus/src/app/sagas/instance_delete.rs | 1 + nexus/src/app/sagas/instance_migrate.rs | 1 + nexus/src/app/sagas/instance_start.rs | 1 + nexus/src/app/sagas/snapshot_create.rs | 1 + nexus/src/app/silo.rs | 71 ------ nexus/src/app/ssh_key.rs | 98 ++++++++ nexus/src/external_api/http_entrypoints.rs | 45 ++++ nexus/test-utils/src/resource_helpers.rs | 1 + nexus/tests/integration_tests/endpoints.rs | 14 ++ nexus/tests/integration_tests/instances.rs | 213 ++++++++++++++++++ nexus/tests/integration_tests/projects.rs | 1 + nexus/tests/integration_tests/quotas.rs | 1 + nexus/tests/integration_tests/snapshots.rs | 1 + .../integration_tests/subnet_allocation.rs | 1 + nexus/tests/output/nexus_tags.txt | 1 + nexus/types/src/external_api/params.rs | 8 + nexus/types/src/external_api/views.rs | 4 +- openapi/nexus.json | 85 +++++++ schema/crdb/30.0.0/up.sql | 6 + schema/crdb/dbinit.sql | 15 +- 30 files changed, 838 insertions(+), 98 deletions(-) create mode 100644 nexus/src/app/ssh_key.rs create mode 100644 schema/crdb/30.0.0/up.sql diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index 2efd66bf91..019bd73b04 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -26,10 +26,11 @@ async fn instance_launch() -> Result<()> { Arc::new(KeyPair::generate_ed25519().context("key generation failed")?); let public_key_str = format!("ssh-ed25519 {}", key.public_key_base64()); eprintln!("create SSH key: {}", public_key_str); + let ssh_key_name = generate_name("key")?; ctx.client .current_user_ssh_key_create() .body(SshKeyCreate { - name: generate_name("key")?, + name: ssh_key_name.clone(), description: String::new(), public_key: public_key_str, }) @@ -72,6 +73,9 @@ async fn instance_launch() -> Result<()> { network_interfaces: InstanceNetworkInterfaceAttachment::Default, external_ips: vec![ExternalIpCreate::Ephemeral { pool: None }], user_data: String::new(), + ssh_keys: Some(vec![oxide_client::types::NameOrId::Name( + ssh_key_name.clone(), + )]), start: true, }) .send() diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index c2959917e1..9259532c52 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(29, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(30, 0, 0); table! { disk (id) { @@ -713,6 +713,13 @@ table! { } } +table! { + instance_ssh_key (instance_id, ssh_key_id) { + instance_id -> Uuid, + ssh_key_id -> Uuid, + } +} + table! { oximeter (id) { id -> Uuid, @@ -1559,3 +1566,7 @@ allow_tables_to_appear_in_same_query!( allow_tables_to_appear_in_same_query!(disk, virtual_provisioning_resource); allow_tables_to_appear_in_same_query!(volume, virtual_provisioning_resource); + +allow_tables_to_appear_in_same_query!(ssh_key, instance_ssh_key, instance); +joinable!(instance_ssh_key -> ssh_key (ssh_key_id)); +joinable!(instance_ssh_key -> instance (instance_id)); diff --git a/nexus/db-model/src/ssh_key.rs b/nexus/db-model/src/ssh_key.rs index 79513ded62..c7e5cfa60f 100644 --- a/nexus/db-model/src/ssh_key.rs +++ b/nexus/db-model/src/ssh_key.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::schema::instance_ssh_key; use crate::schema::ssh_key; use db_macros::Resource; use nexus_types::external_api::params; @@ -47,3 +48,10 @@ impl From for views::SshKey { } } } + +#[derive(Clone, Debug, Insertable, Queryable, Selectable)] +#[diesel(table_name = instance_ssh_key)] +pub struct InstanceSshKey { + pub instance_id: Uuid, + pub ssh_key_id: Uuid, +} diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index c01f40e791..ca7efe32f7 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -465,6 +465,8 @@ impl DataStore { } })?; + self.instance_ssh_keys_delete(opctx, authz_instance.id()).await?; + Ok(()) } } diff --git a/nexus/db-queries/src/db/datastore/ssh_key.rs b/nexus/db-queries/src/db/datastore/ssh_key.rs index c925903e12..a5f7427267 100644 --- a/nexus/db-queries/src/db/datastore/ssh_key.rs +++ b/nexus/db-queries/src/db/datastore/ssh_key.rs @@ -21,11 +21,199 @@ use diesel::prelude::*; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; +use omicron_common::api::external::UpdateResult; use ref_cast::RefCast; +use uuid::Uuid; impl DataStore { + /// Resolves a list of names or IDs to a list of IDs that are validated to + /// both exist and be owned by the current user. + pub async fn ssh_keys_batch_lookup( + &self, + opctx: &OpContext, + authz_user: &authz::SiloUser, + keys: &Vec, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, authz_user).await?; + + let mut names: Vec = vec![]; + let mut ids: Vec = vec![]; + + for key in keys.iter() { + match key { + NameOrId::Name(name) => names.push(name.clone().into()), + NameOrId::Id(id) => ids.push(*id), + } + } + + use db::schema::ssh_key::dsl; + let result: Vec<(Uuid, Name)> = dsl::ssh_key + .filter(dsl::id.eq_any(ids).or(dsl::name.eq_any(names))) + .filter(dsl::silo_user_id.eq(authz_user.id())) + .filter(dsl::time_deleted.is_null()) + .select((dsl::id, dsl::name)) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // If a key isn't present in the result that was present in the input that means it either + // doesn't exist or isn't owned by the user. Either way we want to give a specific lookup error + // for at least the first result. It would be nice to include an aggregate error with all the missing + // keys. + for key in keys.iter() { + match key { + NameOrId::Name(name) => { + if !result + .iter() + .any(|(_, n)| n.clone() == name.clone().into()) + { + return Err(Error::ObjectNotFound { + type_name: ResourceType::SshKey, + lookup_type: LookupType::ByName(name.to_string()), + }); + } + } + NameOrId::Id(id) => { + if !result.iter().any(|&(i, _)| i == *id) { + return Err(Error::ObjectNotFound { + type_name: ResourceType::SshKey, + lookup_type: LookupType::ById(*id), + }); + } + } + } + } + + return Ok(result.iter().map(|&(id, _)| id).collect()); + } + + /// Given a list of IDs for SSH public keys, fetches the keys that belong to + /// the user and aren't deleted. Does not fail if keys are missing. + pub async fn ssh_keys_batch_fetch( + &self, + opctx: &OpContext, + authz_user: &authz::SiloUser, + keys: &Vec, + ) -> ListResultVec { + opctx.authorize(authz::Action::ListChildren, authz_user).await?; + + use db::schema::ssh_key::dsl; + dsl::ssh_key + .filter(dsl::id.eq_any(keys.to_owned())) + .filter(dsl::silo_user_id.eq(authz_user.id())) + .filter(dsl::time_deleted.is_null()) + .select(SshKey::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + // Associate a list of SSH public keys with an instance. This happens + // during the instance create saga and does not fail if the the ssh keys + // have been deleted as a race condition. + pub async fn ssh_keys_batch_assign( + &self, + opctx: &OpContext, + authz_user: &authz::SiloUser, + instance_id: Uuid, + keys: &Option>, + ) -> UpdateResult<()> { + opctx.authorize(authz::Action::ListChildren, authz_user).await?; + + let instance_ssh_keys: Vec = match keys { + // If the keys are None, use the fallback behavior of assigning all the users keys + None => { + use db::schema::ssh_key::dsl; + dsl::ssh_key + .filter(dsl::silo_user_id.eq(authz_user.id())) + .filter(dsl::time_deleted.is_null()) + .select(dsl::id) + .get_results_async( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + .iter() + .map(|key| db::model::InstanceSshKey { + instance_id, + ssh_key_id: *key, + }) + .collect() + } + // If the keys are Some and empty, opt out of assigning any ssh keys + Some(vec) if vec.is_empty() => return Ok(()), + // If the keys are Some and not-empty, assign the given keys + Some(vec) => vec + .iter() + .map(|key| db::model::InstanceSshKey { + instance_id, + ssh_key_id: *key, + }) + .collect(), + }; + + use db::schema::instance_ssh_key::dsl; + + diesel::insert_into(dsl::instance_ssh_key) + .values(instance_ssh_keys) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + pub async fn instance_ssh_keys_list( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec { + use db::schema::instance_ssh_key::dsl as inst_dsl; + use db::schema::ssh_key::dsl; + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(dsl::ssh_key, dsl::id, &pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + dsl::ssh_key, + dsl::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .inner_join( + db::schema::instance_ssh_key::table + .on(dsl::id.eq(inst_dsl::ssh_key_id)), + ) + .filter(inst_dsl::instance_id.eq(authz_instance.id())) + .filter(dsl::time_deleted.is_null()) + .select(SshKey::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn instance_ssh_keys_delete( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> DeleteResult { + use db::schema::instance_ssh_key::dsl; + diesel::delete(dsl::instance_ssh_key) + .filter(dsl::instance_id.eq(instance_id)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) + } + pub async fn ssh_keys_list( &self, opctx: &OpContext, diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 8114b9e363..2bf4efcdc0 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -999,6 +999,7 @@ mod tests { ncpus: InstanceCpuCount(omicron_common::api::external::InstanceCpuCount(1)).into(), memory: ByteCount(omicron_common::api::external::ByteCount::from_gibibytes_u32(1)).into(), hostname: "test".into(), + ssh_keys: None, user_data: vec![], network_interfaces: Default::default(), external_ips: vec![], diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 6d00b4bc29..443e7da39b 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -1740,6 +1740,7 @@ mod tests { memory: ByteCount::from_gibibytes_u32(4), hostname: "inst".to_string(), user_data: vec![], + ssh_keys: Some(Vec::new()), network_interfaces: InstanceNetworkInterfaceAttachment::None, external_ips: vec![], disks: vec![], diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index f924653525..42f8e2d6a0 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -9,6 +9,7 @@ use super::MAX_EPHEMERAL_IPS_PER_INSTANCE; use super::MAX_EXTERNAL_IPS_PER_INSTANCE; use super::MAX_MEMORY_BYTES_PER_INSTANCE; use super::MAX_NICS_PER_INSTANCE; +use super::MAX_SSH_KEYS_PER_INSTANCE; use super::MAX_VCPU_PER_INSTANCE; use super::MIN_MEMORY_BYTES_PER_INSTANCE; use crate::app::sagas; @@ -60,8 +61,6 @@ use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; use uuid::Uuid; -const MAX_KEYS_PER_INSTANCE: u32 = 8; - type SledAgentClientError = sled_agent_client::Error; @@ -323,10 +322,41 @@ impl super::Nexus { )); } + let actor = opctx.authn.actor_required().internal_context( + "loading current user's ssh keys for new Instance", + )?; + let (.., authz_user) = LookupPath::new(opctx, &self.db_datastore) + .silo_user_id(actor.actor_id()) + .lookup_for(authz::Action::ListChildren) + .await?; + + let ssh_keys = match ¶ms.ssh_keys { + Some(keys) => Some( + self.db_datastore + .ssh_keys_batch_lookup(opctx, &authz_user, keys) + .await? + .iter() + .map(|id| NameOrId::Id(*id)) + .collect::>(), + ), + None => None, + }; + if let Some(ssh_keys) = &ssh_keys { + if ssh_keys.len() > MAX_SSH_KEYS_PER_INSTANCE.try_into().unwrap() { + return Err(Error::invalid_request(format!( + "cannot attach more than {} ssh keys to the instance", + MAX_SSH_KEYS_PER_INSTANCE + ))); + } + } + let saga_params = sagas::instance_create::Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), project_id: authz_project.id(), - create_params: params.clone(), + create_params: params::InstanceCreate { + ssh_keys, + ..params.clone() + }, boundary_switches: self .boundary_switches(&self.opctx_alloc) .await?, @@ -1118,33 +1148,23 @@ impl super::Nexus { vec![] }; - // Gather the SSH public keys of the actor make the request so - // that they may be injected into the new image via cloud-init. - // TODO-security: this should be replaced with a lookup based on - // on `SiloUser` role assignments once those are in place. - let actor = opctx.authn.actor_required().internal_context( - "loading current user's ssh keys for new Instance", - )?; - let (.., authz_user) = LookupPath::new(opctx, &self.db_datastore) - .silo_user_id(actor.actor_id()) - .lookup_for(authz::Action::ListChildren) - .await?; - let public_keys = self + let ssh_keys = self .db_datastore - .ssh_keys_list( + .instance_ssh_keys_list( opctx, - &authz_user, + authz_instance, &PaginatedBy::Name(DataPageParams { marker: None, direction: dropshot::PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(MAX_KEYS_PER_INSTANCE) + limit: std::num::NonZeroU32::new(MAX_SSH_KEYS_PER_INSTANCE) .unwrap(), }), ) .await? - .into_iter() - .map(|ssh_key| ssh_key.public_key) - .collect::>(); + .into_iter(); + + let ssh_keys: Vec = + ssh_keys.map(|ssh_key| ssh_key.public_key).collect(); // Ask the sled agent to begin the state change. Then update the // database to reflect the new intermediate state. If this update is @@ -1171,7 +1191,7 @@ impl super::Nexus { disks: disk_reqs, cloud_init_bytes: Some(base64::Engine::encode( &base64::engine::general_purpose::STANDARD, - db_instance.generate_cidata(&public_keys)?, + db_instance.generate_cidata(&ssh_keys)?, )), }; diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index bf8522452a..8d997902a5 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -60,6 +60,7 @@ mod silo; mod sled; mod sled_instance; mod snapshot; +mod ssh_key; mod switch; mod switch_interface; mod switch_port; @@ -100,6 +101,9 @@ pub const MAX_MEMORY_BYTES_PER_INSTANCE: u64 = 256 * (1 << 30); // 256 GiB pub const MIN_DISK_SIZE_BYTES: u32 = 1 << 30; // 1 GiB pub const MAX_DISK_SIZE_BYTES: u64 = 1023 * (1 << 30); // 1023 GiB +/// This value is aribtrary +pub const MAX_SSH_KEYS_PER_INSTANCE: u32 = 100; + /// Manages an Oxide fleet -- the heart of the control plane pub struct Nexus { /// uuid for this nexus instance. diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 3aa491d978..72a1cd6070 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -17,11 +17,11 @@ use nexus_db_queries::db::queries::network_interface::InsertError as InsertNicEr use nexus_db_queries::{authn, authz, db}; use nexus_defaults::DEFAULT_PRIMARY_NIC_NAME; use nexus_types::external_api::params::InstanceDiskAttachment; -use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::{Error, InternalContext}; use omicron_common::api::internal::shared::SwitchLocation; use ref_cast::RefCast; use serde::Deserialize; @@ -80,6 +80,10 @@ declare_saga_actions! { + sic_create_instance_record - sic_delete_instance_record } + ASSOCIATE_SSH_KEYS -> "output" { + + sic_associate_ssh_keys + - sic_associate_ssh_keys_undo + } CREATE_NETWORK_INTERFACE -> "output" { + sic_create_network_interface - sic_create_network_interface_undo @@ -130,6 +134,8 @@ impl NexusSaga for SagaInstanceCreate { builder.append(create_instance_record_action()); + builder.append(associate_ssh_keys_action()); + // Helper function for appending subsagas to our parent saga. fn subsaga_append( node_basename: String, @@ -291,6 +297,77 @@ impl NexusSaga for SagaInstanceCreate { } } +async fn sic_associate_ssh_keys( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let saga_params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + &saga_params.serialized_authn, + ); + let instance_id = sagactx.lookup::("instance_id")?; + + // Gather the SSH public keys of the actor making the request so + // that they may be injected into the new image via cloud-init. + // TODO-security: this should be replaced with a lookup based on + // on `SiloUser` role assignments once those are in place. + let actor = opctx + .authn + .actor_required() + .internal_context("loading current user's ssh keys for new Instance") + .map_err(ActionError::action_failed)?; + + let (.., authz_user) = LookupPath::new(&opctx, &datastore) + .silo_user_id(actor.actor_id()) + .lookup_for(authz::Action::ListChildren) + .await + .map_err(ActionError::action_failed)?; + + datastore + .ssh_keys_batch_assign( + &opctx, + &authz_user, + instance_id, + &saga_params.create_params.ssh_keys.map(|k| { + // Before the instance_create saga is kicked off all entries + // in `ssh_keys` are validated and converted to `Uuids`. + k.iter() + .filter_map(|n| match n { + omicron_common::api::external::NameOrId::Id(id) => { + Some(*id) + } + _ => None, + }) + .collect() + }), + ) + .await + .map_err(ActionError::action_failed)?; + Ok(()) +} + +async fn sic_associate_ssh_keys_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let saga_params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + &saga_params.serialized_authn, + ); + let instance_id = sagactx.lookup::("instance_id")?; + datastore + .instance_ssh_keys_delete(&opctx, instance_id) + .await + .map_err(ActionError::action_failed)?; + Ok(()) +} + /// Create a network interface for an instance, using the parameters at index /// `nic_index`, returning the UUID for the NIC (or None). async fn sic_create_network_interface( @@ -1027,6 +1104,7 @@ pub mod test { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index aaf5dcb033..4717a1e548 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -237,6 +237,7 @@ mod test { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 1716953f04..feeb172ea2 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -570,6 +570,7 @@ mod tests { memory: ByteCount::from_gibibytes_u32(2), hostname: String::from(INSTANCE_NAME), user_data: b"#cloud-config".to_vec(), + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 92c927e1ce..157a000e37 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -749,6 +749,7 @@ mod test { memory: ByteCount::from_gibibytes_u32(2), hostname: String::from(INSTANCE_NAME), user_data: b"#cloud-config".to_vec(), + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index ed8c8ccebf..764160647f 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1944,6 +1944,7 @@ mod test { user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, disks: disks_to_attach, diff --git a/nexus/src/app/silo.rs b/nexus/src/app/silo.rs index f5f3fa00e7..8461be015a 100644 --- a/nexus/src/app/silo.rs +++ b/nexus/src/app/silo.rs @@ -14,8 +14,6 @@ use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::db::model::Name; -use nexus_db_queries::db::model::SshKey; use nexus_db_queries::db::{self, lookup}; use nexus_db_queries::{authn, authz}; use nexus_types::internal_api::params::DnsRecord; @@ -28,7 +26,6 @@ use omicron_common::api::external::{DataPageParams, ResourceType}; use omicron_common::api::external::{DeleteResult, NameOrId}; use omicron_common::api::external::{Error, InternalContext}; use omicron_common::bail_unless; -use ref_cast::RefCast; use std::net::IpAddr; use std::str::FromStr; use uuid::Uuid; @@ -647,74 +644,6 @@ impl super::Nexus { } } - // SSH Keys - pub fn ssh_key_lookup<'a>( - &'a self, - opctx: &'a OpContext, - ssh_key_selector: &'a params::SshKeySelector, - ) -> LookupResult> { - match ssh_key_selector { - params::SshKeySelector { - silo_user_id: _, - ssh_key: NameOrId::Id(id), - } => { - let ssh_key = - LookupPath::new(opctx, &self.db_datastore).ssh_key_id(*id); - Ok(ssh_key) - } - params::SshKeySelector { - silo_user_id, - ssh_key: NameOrId::Name(name), - } => { - let ssh_key = LookupPath::new(opctx, &self.db_datastore) - .silo_user_id(*silo_user_id) - .ssh_key_name(Name::ref_cast(name)); - Ok(ssh_key) - } - } - } - - pub(crate) async fn ssh_key_create( - &self, - opctx: &OpContext, - silo_user_id: Uuid, - params: params::SshKeyCreate, - ) -> CreateResult { - let ssh_key = db::model::SshKey::new(silo_user_id, params); - let (.., authz_user) = LookupPath::new(opctx, &self.datastore()) - .silo_user_id(silo_user_id) - .lookup_for(authz::Action::CreateChild) - .await?; - assert_eq!(authz_user.id(), silo_user_id); - self.db_datastore.ssh_key_create(opctx, &authz_user, ssh_key).await - } - - pub(crate) async fn ssh_keys_list( - &self, - opctx: &OpContext, - silo_user_id: Uuid, - page_params: &PaginatedBy<'_>, - ) -> ListResultVec { - let (.., authz_user) = LookupPath::new(opctx, &self.datastore()) - .silo_user_id(silo_user_id) - .lookup_for(authz::Action::ListChildren) - .await?; - assert_eq!(authz_user.id(), silo_user_id); - self.db_datastore.ssh_keys_list(opctx, &authz_user, page_params).await - } - - pub(crate) async fn ssh_key_delete( - &self, - opctx: &OpContext, - silo_user_id: Uuid, - ssh_key_lookup: &lookup::SshKey<'_>, - ) -> DeleteResult { - let (.., authz_silo_user, authz_ssh_key) = - ssh_key_lookup.lookup_for(authz::Action::Delete).await?; - assert_eq!(authz_silo_user.id(), silo_user_id); - self.db_datastore.ssh_key_delete(opctx, &authz_ssh_key).await - } - // identity providers pub fn saml_identity_provider_lookup<'a>( diff --git a/nexus/src/app/ssh_key.rs b/nexus/src/app/ssh_key.rs new file mode 100644 index 0000000000..11a16c286a --- /dev/null +++ b/nexus/src/app/ssh_key.rs @@ -0,0 +1,98 @@ +use crate::external_api::params; +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::model::Name; +use nexus_db_queries::db::model::SshKey; +use nexus_db_queries::db::{self, lookup}; +use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::NameOrId; +use ref_cast::RefCast; +use uuid::Uuid; + +impl super::Nexus { + // SSH Keys + pub fn ssh_key_lookup<'a>( + &'a self, + opctx: &'a OpContext, + ssh_key_selector: &'a params::SshKeySelector, + ) -> LookupResult> { + match ssh_key_selector { + params::SshKeySelector { + silo_user_id: _, + ssh_key: NameOrId::Id(id), + } => { + let ssh_key = + LookupPath::new(opctx, &self.db_datastore).ssh_key_id(*id); + Ok(ssh_key) + } + params::SshKeySelector { + silo_user_id, + ssh_key: NameOrId::Name(name), + } => { + let ssh_key = LookupPath::new(opctx, &self.db_datastore) + .silo_user_id(*silo_user_id) + .ssh_key_name(Name::ref_cast(name)); + Ok(ssh_key) + } + } + } + + pub(crate) async fn ssh_key_create( + &self, + opctx: &OpContext, + silo_user_id: Uuid, + params: params::SshKeyCreate, + ) -> CreateResult { + let ssh_key = db::model::SshKey::new(silo_user_id, params); + let (.., authz_user) = LookupPath::new(opctx, &self.datastore()) + .silo_user_id(silo_user_id) + .lookup_for(authz::Action::CreateChild) + .await?; + assert_eq!(authz_user.id(), silo_user_id); + self.db_datastore.ssh_key_create(opctx, &authz_user, ssh_key).await + } + + pub(crate) async fn ssh_keys_list( + &self, + opctx: &OpContext, + silo_user_id: Uuid, + page_params: &PaginatedBy<'_>, + ) -> ListResultVec { + let (.., authz_user) = LookupPath::new(opctx, &self.datastore()) + .silo_user_id(silo_user_id) + .lookup_for(authz::Action::ListChildren) + .await?; + assert_eq!(authz_user.id(), silo_user_id); + self.db_datastore.ssh_keys_list(opctx, &authz_user, page_params).await + } + + pub(crate) async fn instance_ssh_keys_list( + &self, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + page_params: &PaginatedBy<'_>, + ) -> ListResultVec { + let (.., authz_instance) = + instance_lookup.lookup_for(authz::Action::ListChildren).await?; + self.db_datastore + .instance_ssh_keys_list(opctx, &authz_instance, page_params) + .await + } + + pub(crate) async fn ssh_key_delete( + &self, + opctx: &OpContext, + silo_user_id: Uuid, + ssh_key_lookup: &lookup::SshKey<'_>, + ) -> DeleteResult { + let (.., authz_silo_user, authz_ssh_key) = + ssh_key_lookup.lookup_for(authz::Action::Delete).await?; + assert_eq!(authz_silo_user.id(), silo_user_id); + self.db_datastore.ssh_key_delete(opctx, &authz_ssh_key).await + } +} diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 3c3c40d026..14c6f4bd28 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -168,6 +168,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(instance_disk_detach)?; api.register(instance_serial_console)?; api.register(instance_serial_console_stream)?; + api.register(instance_ssh_public_key_list)?; api.register(image_list)?; api.register(image_create)?; @@ -2666,6 +2667,50 @@ async fn instance_serial_console_stream( } } +/// List the SSH public keys added to the instance via cloud-init during instance creation +/// +/// Note that this list is a snapshot in time and will not reflect updates made after +/// the instance is created. +#[endpoint { + method = GET, + path = "/v1/instances/{instance}/ssh-public-keys", + tags = ["instances"], +}] +async fn instance_ssh_public_key_list( + rqctx: RequestContext>, + path_params: Path, + query_params: Query>, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let instance_selector = params::InstanceSelector { + project: scan_params.selector.project.clone(), + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let ssh_keys = nexus + .instance_ssh_keys_list(&opctx, &instance_lookup, &paginated_by) + .await? + .into_iter() + .map(|k| k.into()) + .collect(); + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + ssh_keys, + &marker_for_name_or_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// List an instance's disks #[endpoint { method = GET, diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index d82a934686..b493986213 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -522,6 +522,7 @@ pub async fn create_instance_with( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: Some(Vec::new()), network_interfaces: nics.clone(), external_ips, disks, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index c721fe3606..b0d23e5322 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -395,6 +395,12 @@ pub static DEMO_INSTANCE_EPHEMERAL_IP_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); +pub static DEMO_INSTANCE_SSH_KEYS_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/ssh-public-keys?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); pub static DEMO_INSTANCE_NICS_URL: Lazy = Lazy::new(|| { format!( "/v1/network-interfaces?project={}&instance={}", @@ -417,6 +423,7 @@ pub static DEMO_INSTANCE_CREATE: Lazy = memory: ByteCount::from_gibibytes_u32(16), hostname: String::from("demo-instance"), user_data: vec![], + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some(DEMO_IP_POOL_NAME.clone().into()), @@ -1797,6 +1804,13 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + VerifyEndpoint { + url: &DEMO_INSTANCE_SSH_KEYS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![AllowedMethod::Get] + }, + /* IAM */ VerifyEndpoint { diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 875aab74f7..ac8a9c0fc6 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -36,10 +36,12 @@ use nexus_test_utils::resource_helpers::object_put; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils::start_sled_agent; +use nexus_types::external_api::params::SshKeyCreate; use nexus_types::external_api::shared::IpKind; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::shared::Ipv4Range; use nexus_types::external_api::shared::SiloIdentityMode; +use nexus_types::external_api::views::SshKey; use nexus_types::external_api::{params, views}; use nexus_types::identity::Resource; use omicron_common::api::external::ByteCount; @@ -247,6 +249,7 @@ async fn test_instances_create_reboot_halt( memory: instance.memory, hostname: instance.hostname.clone(), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -1219,6 +1222,7 @@ async fn test_instances_create_stopped_start( memory: ByteCount::from_gibibytes_u32(1), hostname: String::from("the_host"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -1386,6 +1390,7 @@ async fn test_instance_using_image_from_other_project_fails( memory: ByteCount::from_gibibytes_u32(1), hostname: "stolen".into(), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -1460,6 +1465,7 @@ async fn test_instance_create_saga_removes_instance_database_record( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], + ssh_keys: None, network_interfaces: interface_params.clone(), external_ips: vec![], disks: vec![], @@ -1487,6 +1493,7 @@ async fn test_instance_create_saga_removes_instance_database_record( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst2"), user_data: vec![], + ssh_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -1575,6 +1582,7 @@ async fn test_instance_with_single_explicit_ip_address( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], + ssh_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -1688,6 +1696,7 @@ async fn test_instance_with_new_custom_network_interfaces( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], + ssh_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -1801,6 +1810,7 @@ async fn test_instance_create_delete_network_interface( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], disks: vec![], @@ -2041,6 +2051,7 @@ async fn test_instance_update_network_interfaces( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], disks: vec![], @@ -2433,6 +2444,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], + ssh_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -2498,6 +2510,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![params::InstanceDiskAttachment::Attach( @@ -2557,6 +2570,7 @@ async fn test_instance_create_attach_disks( memory: ByteCount::from_gibibytes_u32(3), hostname: String::from("nfs"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![ @@ -2653,6 +2667,7 @@ async fn test_instance_create_attach_disks_undo( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![ @@ -2737,6 +2752,7 @@ async fn test_attach_eight_disks_to_instance( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -2817,6 +2833,7 @@ async fn test_cannot_attach_nine_disks_to_instance( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..9) @@ -2911,6 +2928,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -2994,6 +3012,7 @@ async fn test_disks_detached_when_instance_destroyed( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -3084,6 +3103,7 @@ async fn test_disks_detached_when_instance_destroyed( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfsv2"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -3145,6 +3165,7 @@ async fn test_instances_memory_rejected_less_than_min_memory_size( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3194,6 +3215,7 @@ async fn test_instances_memory_not_divisible_by_min_memory_size( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3243,6 +3265,7 @@ async fn test_instances_memory_greater_than_max_size( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3264,6 +3287,188 @@ async fn test_instances_memory_greater_than_max_size( assert!(error.message.contains("memory must be less than")); } +#[nexus_test] +async fn test_instance_create_with_ssh_keys( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let instance_name = "ssh-keys"; + + cptestctx + .sled_agent + .sled_agent + .start_local_mock_propolis_server(&cptestctx.logctx.log) + .await + .unwrap(); + + // Test pre-reqs + DiskTest::new(&cptestctx).await; + create_project_and_pool(&client).await; + + // Add some SSH keys + let key_configs = vec![ + ("key1", "an SSH public key", "ssh-test AAAAAAAA"), + ("key2", "another SSH public key", "ssh-test BBBBBBBB"), + ("key3", "yet another public key", "ssh-test CCCCCCCC"), + ]; + let mut user_keys: Vec = Vec::new(); + for (name, description, public_key) in &key_configs { + let new_key: SshKey = NexusRequest::objects_post( + client, + "/v1/me/ssh-keys", + &SshKeyCreate { + identity: IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: description.to_string(), + }, + public_key: public_key.to_string(), + }, + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to make POST request") + .parsed_body() + .unwrap(); + assert_eq!(new_key.identity.name.as_str(), *name); + assert_eq!(new_key.identity.description, *description); + assert_eq!(new_key.public_key, *public_key); + user_keys.push(new_key); + } + + // Create an instance + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + // By default should transfer all profile keys + ssh_keys: None, + start: false, + hostname: instance_name.to_string(), + user_data: vec![], + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + }; + + let builder = + RequestBuilder::new(client, http::Method::POST, &get_instances_url()) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation!"); + + let instance = response.parsed_body::().unwrap(); + + let keys = objects_list_page_authz::( + client, + format!("/v1/instances/{}/ssh-public-keys", instance.identity.id) + .as_str(), + ) + .await + .items; + + assert_eq!(keys[0], user_keys[0]); + assert_eq!(keys[1], user_keys[1]); + assert_eq!(keys[2], user_keys[2]); + + // Test creating an instance with only allow listed keys + + let instance_name = "ssh-keys-2"; + // Create an instance + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + // Should only transfer the first key + ssh_keys: Some(vec![user_keys[0].identity.name.clone().into()]), + start: false, + hostname: instance_name.to_string(), + user_data: vec![], + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + }; + + let builder = + RequestBuilder::new(client, http::Method::POST, &get_instances_url()) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation!"); + + let instance = response.parsed_body::().unwrap(); + + let keys = objects_list_page_authz::( + client, + format!("/v1/instances/{}/ssh-public-keys", instance.identity.id) + .as_str(), + ) + .await + .items; + + assert_eq!(keys.len(), 1); + assert_eq!(keys[0], user_keys[0]); + + // Test creating an instance with no keys + + let instance_name = "ssh-keys-3"; + // Create an instance + let instance_params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: String::from("probably serving data"), + }, + ncpus: InstanceCpuCount::try_from(2).unwrap(), + memory: ByteCount::from_gibibytes_u32(4), + // Should transfer no keys + ssh_keys: Some(vec![]), + start: false, + hostname: instance_name.to_string(), + user_data: vec![], + network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, + external_ips: vec![], + disks: vec![], + }; + + let builder = + RequestBuilder::new(client, http::Method::POST, &get_instances_url()) + .body(Some(&instance_params)) + .expect_status(Some(http::StatusCode::CREATED)); + + let response = NexusRequest::new(builder) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Expected instance creation!"); + + let instance = response.parsed_body::().unwrap(); + + let keys = objects_list_page_authz::( + client, + format!("/v1/instances/{}/ssh-public-keys", instance.identity.id) + .as_str(), + ) + .await + .items; + + assert_eq!(keys.len(), 0); +} + async fn expect_instance_start_fail_507( client: &ClientTestContext, instance_name: &str, @@ -3353,6 +3558,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( memory: ByteCount::from_gibibytes_u32(1), hostname: config.0.to_string(), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -3406,6 +3612,7 @@ async fn test_cannot_provision_instance_beyond_cpu_limit( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("test"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3457,6 +3664,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( memory: ByteCount::try_from(config.1).unwrap(), hostname: config.0.to_string(), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -3711,6 +3919,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some("pool1".parse::().unwrap().into()), }], + ssh_keys: None, disks: vec![], start: true, }; @@ -3775,6 +3984,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some("orphan-pool".parse::().unwrap().into()), }], + ssh_keys: None, disks: vec![], start: true, }; @@ -3835,6 +4045,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: None, // <--- the only important thing here }], + ssh_keys: None, disks: vec![], start: true, }; @@ -3965,6 +4176,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![ephemeral_create.clone(), ephemeral_create], disks: vec![], @@ -4090,6 +4302,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], + ssh_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some("default".parse::().unwrap().into()), diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index 60195e5902..dc5b844337 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -157,6 +157,7 @@ async fn test_project_deletion_with_instance( memory: ByteCount::from_gibibytes_u32(1), hostname: String::from("the_host"), user_data: b"none".to_vec(), + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index 858837bb32..c13dda7796 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -81,6 +81,7 @@ impl ResourceAllocator { hostname: "host".to_string(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: Vec::::new(), disks: Vec::::new(), diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 87ec2b3163..c493e725b2 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -123,6 +123,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), + ssh_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, disks: vec![params::InstanceDiskAttachment::Attach( diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index e36b213f7e..9749086d47 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -58,6 +58,7 @@ async fn create_instance_expect_failure( memory: ByteCount::from_gibibytes_u32(1), hostname: name.to_string(), user_data: vec![], + ssh_keys: Some(Vec::new()), network_interfaces, external_ips: vec![], disks: vec![], diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 8bd2f34de5..ec993f482d 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -55,6 +55,7 @@ instance_network_interface_view GET /v1/network-interfaces/{interf instance_reboot POST /v1/instances/{instance}/reboot instance_serial_console GET /v1/instances/{instance}/serial-console instance_serial_console_stream GET /v1/instances/{instance}/serial-console/stream +instance_ssh_public_key_list GET /v1/instances/{instance}/ssh-public-keys instance_start POST /v1/instances/{instance}/start instance_stop POST /v1/instances/{instance}/stop instance_view GET /v1/instances/{instance} diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index c32dae4df9..73687017ae 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1036,6 +1036,14 @@ pub struct InstanceCreate { #[serde(default)] pub disks: Vec, + /// An allowlist of SSH public keys to be transferred to the instance via + /// cloud-init during instance creation. + /// + /// If not provided, all SSH public keys from the user's profile will be sent. + /// If an empty list is provided, no public keys will be transmitted to the + /// instance. + pub ssh_keys: Option>, + /// Should this instance be started upon creation; true by default. #[serde(default = "bool_true")] pub start: bool, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 45cfe8e267..84648f109f 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -558,7 +558,9 @@ pub struct Role { // SSH KEYS /// View of an SSH Key -#[derive(ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[derive( + ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, +)] pub struct SshKey { #[serde(flatten)] pub identity: IdentityMetadata, diff --git a/openapi/nexus.json b/openapi/nexus.json index 59206ed010..8c5af8b14d 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2252,6 +2252,83 @@ "x-dropshot-websocket": {} } }, + "/v1/instances/{instance}/ssh-public-keys": { + "get": { + "tags": [ + "instances" + ], + "summary": "List the SSH public keys added to the instance via cloud-init during instance creation", + "description": "Note that this list is a snapshot in time and will not reflect updates made after the instance is created.", + "operationId": "instance_ssh_public_key_list", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SshKeyResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/v1/instances/{instance}/start": { "post": { "tags": [ @@ -12353,6 +12430,14 @@ } ] }, + "ssh_keys": { + "nullable": true, + "description": "An allowlist of SSH public keys to be transferred to the instance via cloud-init during instance creation.\n\nIf not provided, all SSH public keys from the user's profile will be sent. If an empty list is provided, no public keys will be transmitted to the instance.", + "type": "array", + "items": { + "$ref": "#/components/schemas/NameOrId" + } + }, "start": { "description": "Should this instance be started upon creation; true by default.", "default": true, diff --git a/schema/crdb/30.0.0/up.sql b/schema/crdb/30.0.0/up.sql new file mode 100644 index 0000000000..17c9b6e3dc --- /dev/null +++ b/schema/crdb/30.0.0/up.sql @@ -0,0 +1,6 @@ + +CREATE TABLE IF NOT EXISTS omicron.public.instance_ssh_key ( + instance_id UUID NOT NULL, + ssh_key_id UUID NOT NULL, + PRIMARY KEY (instance_id, ssh_key_id) +); \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 6ff92acfa4..4cb347b260 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -827,6 +827,19 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_ssh_key_by_silo_user ON omicron.public. ) WHERE time_deleted IS NULL; +/** + * Represents the SSH keys copied to an instance at create time by cloud-init. + * Entries are added here when an instance is created (with configured SSH keys) + * and removed when the instance is destroyed. + * + * TODO: Should this have time created / time deleted + */ +CREATE TABLE IF NOT EXISTS omicron.public.instance_ssh_key ( + instance_id UUID NOT NULL, + ssh_key_id UUID NOT NULL, + PRIMARY KEY (instance_id, ssh_key_id) +); + CREATE TABLE IF NOT EXISTS omicron.public.silo_quotas ( silo_id UUID PRIMARY KEY, time_created TIMESTAMPTZ NOT NULL, @@ -3405,7 +3418,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '29.0.0', NULL) + ( TRUE, NOW(), NOW(), '30.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 7d8d93b574a45090db1e38d2f56ec372cf5847da Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 30 Jan 2024 20:25:19 -0800 Subject: [PATCH 64/91] Support Omicron development on NixOS (#4933) I use NixOS on my dev machine, and I'd like to be able to do some development tasks locally (such as `cargo check`, running `rust-analyzer`, et cetera). This branch makes the following changes to better support Omicron development on NixOS: - **Add `flake.nix` for Nix users** (484323c) In order to build Omicron, including running `cargo check` or `rust-analyzer`, a number of non-Rust dependencies must be present. This commit adds a [Nix flake] file which users of the Nix package manager can [use to enter a dev shell][nix-develop] that contains those dependencies, making it easy for Nix and NixOS users to develop Omicron on NixOS. The Nix flake configured here creates a build environment with `clang` as the C compiler toolchain, and ensures that the following libraries are available: - `libclang` (of a version matching the `clang` C toolchain) - `libpq` - `libxml2` - `libxmlsec` - `openssl` - `sqlite` In addition, it provides `pkg-config`, `cmake`, and `rustup`, and ensures that the various environment variables that the Rust bindings for these libraries use to find the libraries are set correctly. This makes it possible to run `cargo check`, `cargo build`, and `rust-analyzer` on NixOS. I've also added to the `.envrc` file so that `direnv` will automatically use the Nix flake's dev environment, if a `nix` binary is present on the user's system and that `nix` binary supports flakes. [Nix flake]: https://nixos.wiki/wiki/Flakes [nix-develop]: https://nixos.wiki/wiki/Development_environment_with_nix-shell#nix_develop - **Use `/usr/bin/env bash` rather than `/bin/bash`** (9b438c1) On some operating systems, such as [NixOS][1], there may not be a `bash` executable in `/bin`, but a `bash` executable may be located elsewhere. Using a `#!/bin/bash` shebang line in scripts may not work correctly on these systems. Instead, a `#!/usr/bin/env bash` shebang is [generally considered][2] the preferable way to write portable shell scripts that use Bash. Therefore, this commit changes the shell scripts in `tools/` to use `#!/usr/bin/env bash` shebang lines, rather than `#!/bin/bash`. [1]: https://discourse.nixos.org/t/shebang-locations/28992 [2]: https://www.cyberciti.biz/tips/finding-bash-perl-python-portably-using-env.html --- .envrc | 5 ++ .gitignore | 1 + flake.lock | 85 +++++++++++++++++++ flake.nix | 64 ++++++++++++++ tools/build-global-zone-packages.sh | 2 +- tools/build-host-image.sh | 2 +- .../build-trampoline-global-zone-packages.sh | 2 +- tools/ci_check_opte_ver.sh | 2 +- tools/ci_download_clickhouse | 2 +- tools/ci_download_cockroachdb | 2 +- tools/ci_download_console | 2 +- tools/ci_download_dendrite_openapi | 2 +- tools/ci_download_dendrite_stub | 2 +- tools/ci_download_maghemite_mgd | 2 +- tools/ci_download_maghemite_openapi | 2 +- tools/ci_download_softnpu_machinery | 2 +- tools/ci_download_transceiver_control | 2 +- tools/create_gimlet_virtual_hardware.sh | 2 +- tools/create_scrimlet_virtual_hardware.sh | 2 +- tools/create_self_signed_cert.sh | 2 +- tools/create_virtual_hardware.sh | 2 +- tools/delete-reservoir.sh | 2 +- tools/destroy_gimlet_virtual_hardware.sh | 2 +- tools/destroy_scrimlet_virtual_hardware.sh | 2 +- tools/destroy_virtual_hardware.sh | 2 +- tools/ensure_buildomat_artifact.sh | 2 +- tools/generate-nexus-api.sh | 2 +- tools/generate-sled-agent-api.sh | 2 +- tools/generate-wicketd-api.sh | 2 +- tools/include/force-git-over-https.sh | 2 +- tools/install_builder_prerequisites.sh | 8 +- tools/install_opte.sh | 2 +- tools/install_prerequisites.sh | 2 +- tools/install_runner_prerequisites.sh | 2 +- tools/opte_version_override | 2 +- tools/populate/populate-alpine.sh | 2 +- tools/populate/populate-images.sh | 2 +- tools/reflector/helpers.sh | 2 +- tools/renovate-post-upgrade.sh | 2 +- tools/scrimlet/create-softnpu-zone.sh | 2 +- tools/scrimlet/destroy-softnpu-zone.sh | 2 +- tools/scrimlet/softnpu-init.sh | 2 +- tools/uninstall_opte.sh | 2 +- tools/update_crucible.sh | 2 +- tools/update_dendrite.sh | 2 +- tools/update_helpers.sh | 2 +- tools/update_maghemite.sh | 2 +- tools/update_propolis.sh | 2 +- tools/update_transceiver_control.sh | 2 +- tools/virtual_hardware.sh | 2 +- 50 files changed, 207 insertions(+), 46 deletions(-) create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/.envrc b/.envrc index 16ef5819f2..036459a4a9 100644 --- a/.envrc +++ b/.envrc @@ -5,3 +5,8 @@ PATH_add out/cockroachdb/bin PATH_add out/clickhouse PATH_add out/dendrite-stub/bin PATH_add out/mgd/root/opt/oxide/mgd/bin + +if nix flake show &> /dev/null +then + use flake; +fi \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1d7177320f..fc3cb4133a 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ debug.out rusty-tags.vi *.sw* tags +.direnv \ No newline at end of file diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000000..2c24a13714 --- /dev/null +++ b/flake.lock @@ -0,0 +1,85 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1705309234, + "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1706371002, + "narHash": "sha256-dwuorKimqSYgyu8Cw6ncKhyQjUDOyuXoxDTVmAXq88s=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "c002c6aa977ad22c60398daaa9be52f2203d0006", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1706634984, + "narHash": "sha256-xn7lGPE8gRGBe3Lt8ESoN/uUHm7IrbiV7siupwjHX1o=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "883b84c426107a8ec020e7124f263d7c35a5bb9f", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000000..65329cbbf7 --- /dev/null +++ b/flake.nix @@ -0,0 +1,64 @@ +{ + description = "Development environment for Omicron"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "flake-utils"; + }; + }; + }; + + outputs = { self, nixpkgs, flake-utils, rust-overlay }: + flake-utils.lib.eachDefaultSystem + (system: + let + overlays = [ (import rust-overlay) ]; + pkgs = import nixpkgs { + inherit system overlays; + }; + # use the Rust toolchain defined in the `rust-toolchain.toml` file. + rustToolchain = pkgs.pkgsBuildHost.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml; + nativeBuildInputs = with pkgs; [ + rustToolchain + cmake + stdenv + pkg-config + ]; + buildInputs = with pkgs; [ + # libs + openssl + postgresql + xmlsec + sqlite + libclang + libxml2 + ]; + in + with pkgs; + { + devShells.default = mkShell.override + { + # use Clang as the C compiler for all C libraries + stdenv = clangStdenv; + } + { + inherit buildInputs nativeBuildInputs; + + name = "omicron"; + DEP_PQ_LIBDIRS = " ${postgresql.lib}/lib"; + LIBCLANG_PATH = "${libclang.lib}/lib"; + OPENSSL_DIR = "${openssl.dev}"; + OPENSSL_LIB_DIR = "${openssl.out}/lib"; + + # Needed by rustfmt-wrapper, see: + # https://github.com/oxidecomputer/rustfmt-wrapper/blob/main/src/lib.rs + RUSTFMT = "${rustToolchain}/bin/rustfmt"; + }; + } + ); +} diff --git a/tools/build-global-zone-packages.sh b/tools/build-global-zone-packages.sh index b989e6a543..c65b273d04 100755 --- a/tools/build-global-zone-packages.sh +++ b/tools/build-global-zone-packages.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eux diff --git a/tools/build-host-image.sh b/tools/build-host-image.sh index c194edb603..759f2dd073 100755 --- a/tools/build-host-image.sh +++ b/tools/build-host-image.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o errexit set -o pipefail diff --git a/tools/build-trampoline-global-zone-packages.sh b/tools/build-trampoline-global-zone-packages.sh index d8df0f8921..ee8e7b3371 100755 --- a/tools/build-trampoline-global-zone-packages.sh +++ b/tools/build-trampoline-global-zone-packages.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eux diff --git a/tools/ci_check_opte_ver.sh b/tools/ci_check_opte_ver.sh index 7f05ec1f36..5d345134b5 100755 --- a/tools/ci_check_opte_ver.sh +++ b/tools/ci_check_opte_ver.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail source tools/opte_version_override diff --git a/tools/ci_download_clickhouse b/tools/ci_download_clickhouse index 7d634a3237..03a5bff24c 100755 --- a/tools/ci_download_clickhouse +++ b/tools/ci_download_clickhouse @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_clickhouse: fetches the appropriate ClickHouse binary tarball diff --git a/tools/ci_download_cockroachdb b/tools/ci_download_cockroachdb index 8b002b4359..ca484c000f 100755 --- a/tools/ci_download_cockroachdb +++ b/tools/ci_download_cockroachdb @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_cockroachdb: fetches the appropriate CockroachDB binary tarball diff --git a/tools/ci_download_console b/tools/ci_download_console index a6837af0e9..b27cebe79f 100755 --- a/tools/ci_download_console +++ b/tools/ci_download_console @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_console: fetches the appropriate Console assets. diff --git a/tools/ci_download_dendrite_openapi b/tools/ci_download_dendrite_openapi index fe5c91282e..395bc62056 100755 --- a/tools/ci_download_dendrite_openapi +++ b/tools/ci_download_dendrite_openapi @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_dendrite_openapi: fetches the appropriate dendrite openapi spec. diff --git a/tools/ci_download_dendrite_stub b/tools/ci_download_dendrite_stub index 870a847af0..d1db31c697 100755 --- a/tools/ci_download_dendrite_stub +++ b/tools/ci_download_dendrite_stub @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_dendrite_stub: fetches the appropriate Dendrite binary tarball diff --git a/tools/ci_download_maghemite_mgd b/tools/ci_download_maghemite_mgd index eff680d7fd..9890e4505e 100755 --- a/tools/ci_download_maghemite_mgd +++ b/tools/ci_download_maghemite_mgd @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_maghemite_mgd: fetches the maghemite mgd binary tarball, unpacks diff --git a/tools/ci_download_maghemite_openapi b/tools/ci_download_maghemite_openapi index db53f68d2c..56ce640a76 100755 --- a/tools/ci_download_maghemite_openapi +++ b/tools/ci_download_maghemite_openapi @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_maghemite_openapi: fetches the appropriate maghemite openapi spec. diff --git a/tools/ci_download_softnpu_machinery b/tools/ci_download_softnpu_machinery index e147238673..e1d173ac2d 100755 --- a/tools/ci_download_softnpu_machinery +++ b/tools/ci_download_softnpu_machinery @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # This script fetches the following from CI # diff --git a/tools/ci_download_transceiver_control b/tools/ci_download_transceiver_control index 633e42167b..cdd2528ef3 100755 --- a/tools/ci_download_transceiver_control +++ b/tools/ci_download_transceiver_control @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ci_download_transceiver_control: fetches the appropriate transceiver-control diff --git a/tools/create_gimlet_virtual_hardware.sh b/tools/create_gimlet_virtual_hardware.sh index 6884e402ff..ad22cc26e7 100755 --- a/tools/create_gimlet_virtual_hardware.sh +++ b/tools/create_gimlet_virtual_hardware.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Make me a Gimlet! # diff --git a/tools/create_scrimlet_virtual_hardware.sh b/tools/create_scrimlet_virtual_hardware.sh index e15bc8aaba..be7785a90d 100755 --- a/tools/create_scrimlet_virtual_hardware.sh +++ b/tools/create_scrimlet_virtual_hardware.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Make me a Scrimlet! # diff --git a/tools/create_self_signed_cert.sh b/tools/create_self_signed_cert.sh index 783c8eaf70..cc127a5c5d 100755 --- a/tools/create_self_signed_cert.sh +++ b/tools/create_self_signed_cert.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Creates a self-signed certificate. # diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index fa35bb24ab..ef01af92bb 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Make me a Gimlet! # diff --git a/tools/delete-reservoir.sh b/tools/delete-reservoir.sh index 77e814f0c7..b148f0ccf3 100755 --- a/tools/delete-reservoir.sh +++ b/tools/delete-reservoir.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash size=`pfexec /usr/lib/rsrvrctl -q | grep Free | awk '{print $3}'` let x=$size/1024 diff --git a/tools/destroy_gimlet_virtual_hardware.sh b/tools/destroy_gimlet_virtual_hardware.sh index 7c5fada14d..f162231677 100644 --- a/tools/destroy_gimlet_virtual_hardware.sh +++ b/tools/destroy_gimlet_virtual_hardware.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Unmake me a Gimlet! # diff --git a/tools/destroy_scrimlet_virtual_hardware.sh b/tools/destroy_scrimlet_virtual_hardware.sh index a593e3fa55..26796bbb49 100755 --- a/tools/destroy_scrimlet_virtual_hardware.sh +++ b/tools/destroy_scrimlet_virtual_hardware.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Unmake me a Scrimlet! # diff --git a/tools/destroy_virtual_hardware.sh b/tools/destroy_virtual_hardware.sh index 46c6f117c4..3e8be95a84 100755 --- a/tools/destroy_virtual_hardware.sh +++ b/tools/destroy_virtual_hardware.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Unmake me a Gimlet! # diff --git a/tools/ensure_buildomat_artifact.sh b/tools/ensure_buildomat_artifact.sh index 35c4473e42..9507435b9e 100755 --- a/tools/ensure_buildomat_artifact.sh +++ b/tools/ensure_buildomat_artifact.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Ensure a buildomat artifact is downloaded and available locally. diff --git a/tools/generate-nexus-api.sh b/tools/generate-nexus-api.sh index 122152f167..a0c7d13165 100755 --- a/tools/generate-nexus-api.sh +++ b/tools/generate-nexus-api.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash ./target/debug/nexus nexus/examples/config.toml -O > openapi/nexus.json ./target/debug/nexus nexus/examples/config.toml -I > openapi/nexus-internal.json diff --git a/tools/generate-sled-agent-api.sh b/tools/generate-sled-agent-api.sh index cc087dcfb8..d6395af447 100755 --- a/tools/generate-sled-agent-api.sh +++ b/tools/generate-sled-agent-api.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash ./target/debug/sled-agent openapi bootstrap > openapi/bootstrap-agent.json ./target/debug/sled-agent openapi sled > openapi/sled-agent.json diff --git a/tools/generate-wicketd-api.sh b/tools/generate-wicketd-api.sh index f1af33aecc..3fbddee5af 100755 --- a/tools/generate-wicketd-api.sh +++ b/tools/generate-wicketd-api.sh @@ -1,3 +1,3 @@ -#!/bin/bash +#!/usr/bin/env bash ./target/debug/wicketd openapi > openapi/wicketd.json diff --git a/tools/include/force-git-over-https.sh b/tools/include/force-git-over-https.sh index cf168b2cbe..3bc7b86798 100644 --- a/tools/include/force-git-over-https.sh +++ b/tools/include/force-git-over-https.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # The token authentication mechanism that affords us access to other private # repositories requires that we use HTTPS URLs for GitHub, rather than SSH. diff --git a/tools/install_builder_prerequisites.sh b/tools/install_builder_prerequisites.sh index 1ce133dff3..0427629960 100755 --- a/tools/install_builder_prerequisites.sh +++ b/tools/install_builder_prerequisites.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eu @@ -105,6 +105,12 @@ HOST_OS=$(uname -s) function install_packages { if [[ "${HOST_OS}" == "Linux" ]]; then + # If Nix is in use, we don't need to install any packagess here, + # as they're provided by the Nix flake. + if nix flake show &> /dev/null; then + return + fi + packages=( 'libpq-dev' 'pkg-config' diff --git a/tools/install_opte.sh b/tools/install_opte.sh index b572c305a7..d56523764d 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Small tool to install OPTE and the xde kernel driver and ONU bits. diff --git a/tools/install_prerequisites.sh b/tools/install_prerequisites.sh index 0323f44890..a7d3cc5aa5 100755 --- a/tools/install_prerequisites.sh +++ b/tools/install_prerequisites.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eu diff --git a/tools/install_runner_prerequisites.sh b/tools/install_runner_prerequisites.sh index 2a29e97085..2b86c4c5f4 100755 --- a/tools/install_runner_prerequisites.sh +++ b/tools/install_runner_prerequisites.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eu diff --git a/tools/opte_version_override b/tools/opte_version_override index 80a6529b24..8d57f7ae9f 100644 --- a/tools/opte_version_override +++ b/tools/opte_version_override @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # only set this if you want to override the version of opte/xde installed by the # install_opte.sh script diff --git a/tools/populate/populate-alpine.sh b/tools/populate/populate-alpine.sh index 6dd6ced08a..63a3400ee8 100755 --- a/tools/populate/populate-alpine.sh +++ b/tools/populate/populate-alpine.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Simple script to install the alpine image included with propolis. if ! oxide api /v1/images > /dev/null; then diff --git a/tools/populate/populate-images.sh b/tools/populate/populate-images.sh index 818ac1d13a..6580341aae 100755 --- a/tools/populate/populate-images.sh +++ b/tools/populate/populate-images.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Populate an Oxide host running Omicron with images from server catacomb. # # Note that the default tunnel IP of `fd00:...` will only be available _after_ diff --git a/tools/reflector/helpers.sh b/tools/reflector/helpers.sh index 3d4f693da2..052d7ff29a 100644 --- a/tools/reflector/helpers.sh +++ b/tools/reflector/helpers.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o errexit diff --git a/tools/renovate-post-upgrade.sh b/tools/renovate-post-upgrade.sh index 4a9e3aa2f2..3316af9dd6 100755 --- a/tools/renovate-post-upgrade.sh +++ b/tools/renovate-post-upgrade.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # This script is run after Renovate upgrades dependencies or lock files. diff --git a/tools/scrimlet/create-softnpu-zone.sh b/tools/scrimlet/create-softnpu-zone.sh index 65f7a9d56f..16a2f93bec 100755 --- a/tools/scrimlet/create-softnpu-zone.sh +++ b/tools/scrimlet/create-softnpu-zone.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -x set -e diff --git a/tools/scrimlet/destroy-softnpu-zone.sh b/tools/scrimlet/destroy-softnpu-zone.sh index 2baba6ccaa..f94e047302 100755 --- a/tools/scrimlet/destroy-softnpu-zone.sh +++ b/tools/scrimlet/destroy-softnpu-zone.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -x set -e diff --git a/tools/scrimlet/softnpu-init.sh b/tools/scrimlet/softnpu-init.sh index 59f8e83019..3f3ec8a8f7 100755 --- a/tools/scrimlet/softnpu-init.sh +++ b/tools/scrimlet/softnpu-init.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e set -x diff --git a/tools/uninstall_opte.sh b/tools/uninstall_opte.sh index c8ee0f5b28..da625047e5 100755 --- a/tools/uninstall_opte.sh +++ b/tools/uninstall_opte.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Small tool to _uninstall_ OPTE and the xde kernel driver and ONU bits. # diff --git a/tools/update_crucible.sh b/tools/update_crucible.sh index 020a33927e..3aab6cb181 100755 --- a/tools/update_crucible.sh +++ b/tools/update_crucible.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o errexit diff --git a/tools/update_dendrite.sh b/tools/update_dendrite.sh index 0e3df107a4..f710332ae3 100755 --- a/tools/update_dendrite.sh +++ b/tools/update_dendrite.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o errexit diff --git a/tools/update_helpers.sh b/tools/update_helpers.sh index 64a28e4f82..656e038c46 100755 --- a/tools/update_helpers.sh +++ b/tools/update_helpers.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o errexit diff --git a/tools/update_maghemite.sh b/tools/update_maghemite.sh index db7e482776..583a75ab22 100755 --- a/tools/update_maghemite.sh +++ b/tools/update_maghemite.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o errexit diff --git a/tools/update_propolis.sh b/tools/update_propolis.sh index f97aa8dad7..567e28650f 100755 --- a/tools/update_propolis.sh +++ b/tools/update_propolis.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o errexit diff --git a/tools/update_transceiver_control.sh b/tools/update_transceiver_control.sh index 8493408cf7..ab506f75ad 100755 --- a/tools/update_transceiver_control.sh +++ b/tools/update_transceiver_control.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -o pipefail set -o errexit diff --git a/tools/virtual_hardware.sh b/tools/virtual_hardware.sh index f9f0782e09..119b64ac70 100755 --- a/tools/virtual_hardware.sh +++ b/tools/virtual_hardware.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash MARKER=/etc/opt/oxide/NO_INSTALL if [[ -f "$MARKER" ]]; then From 6491841457ad74ac340072365a12661cc460c343 Mon Sep 17 00:00:00 2001 From: Justin Bennett Date: Wed, 31 Jan 2024 13:14:42 -0500 Subject: [PATCH 65/91] Rename `ssh_keys` to `ssh_public_keys` in instance create (#4941) Fixes #4866 --- end-to-end-tests/src/instance_launch.rs | 2 +- .../db-queries/src/db/queries/external_ip.rs | 2 +- .../src/db/queries/network_interface.rs | 2 +- nexus/src/app/instance.rs | 4 +- nexus/src/app/sagas/instance_create.rs | 4 +- nexus/src/app/sagas/instance_delete.rs | 2 +- nexus/src/app/sagas/instance_migrate.rs | 2 +- nexus/src/app/sagas/instance_start.rs | 2 +- nexus/src/app/sagas/snapshot_create.rs | 2 +- nexus/test-utils/src/resource_helpers.rs | 2 +- nexus/tests/integration_tests/endpoints.rs | 2 +- nexus/tests/integration_tests/instances.rs | 64 +++++++++---------- nexus/tests/integration_tests/projects.rs | 2 +- nexus/tests/integration_tests/quotas.rs | 2 +- nexus/tests/integration_tests/snapshots.rs | 2 +- .../integration_tests/subnet_allocation.rs | 2 +- nexus/types/src/external_api/params.rs | 2 +- openapi/nexus.json | 2 +- 18 files changed, 51 insertions(+), 51 deletions(-) diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index 019bd73b04..c1da731c35 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -73,7 +73,7 @@ async fn instance_launch() -> Result<()> { network_interfaces: InstanceNetworkInterfaceAttachment::Default, external_ips: vec![ExternalIpCreate::Ephemeral { pool: None }], user_data: String::new(), - ssh_keys: Some(vec![oxide_client::types::NameOrId::Name( + ssh_public_keys: Some(vec![oxide_client::types::NameOrId::Name( ssh_key_name.clone(), )]), start: true, diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 2bf4efcdc0..392e669243 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -999,7 +999,7 @@ mod tests { ncpus: InstanceCpuCount(omicron_common::api::external::InstanceCpuCount(1)).into(), memory: ByteCount(omicron_common::api::external::ByteCount::from_gibibytes_u32(1)).into(), hostname: "test".into(), - ssh_keys: None, + ssh_public_keys: None, user_data: vec![], network_interfaces: Default::default(), external_ips: vec![], diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 443e7da39b..3cfbead2f7 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -1740,7 +1740,7 @@ mod tests { memory: ByteCount::from_gibibytes_u32(4), hostname: "inst".to_string(), user_data: vec![], - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: InstanceNetworkInterfaceAttachment::None, external_ips: vec![], disks: vec![], diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 42f8e2d6a0..eb78d4179c 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -330,7 +330,7 @@ impl super::Nexus { .lookup_for(authz::Action::ListChildren) .await?; - let ssh_keys = match ¶ms.ssh_keys { + let ssh_keys = match ¶ms.ssh_public_keys { Some(keys) => Some( self.db_datastore .ssh_keys_batch_lookup(opctx, &authz_user, keys) @@ -354,7 +354,7 @@ impl super::Nexus { serialized_authn: authn::saga::Serialized::for_opctx(opctx), project_id: authz_project.id(), create_params: params::InstanceCreate { - ssh_keys, + ssh_public_keys: ssh_keys, ..params.clone() }, boundary_switches: self diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index 72a1cd6070..ed1b23fe82 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -331,7 +331,7 @@ async fn sic_associate_ssh_keys( &opctx, &authz_user, instance_id, - &saga_params.create_params.ssh_keys.map(|k| { + &saga_params.create_params.ssh_public_keys.map(|k| { // Before the instance_create saga is kicked off all entries // in `ssh_keys` are validated and converted to `Uuids`. k.iter() @@ -1104,7 +1104,7 @@ pub mod test { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 4717a1e548..067e2d79ed 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -237,7 +237,7 @@ mod test { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index feeb172ea2..5e91b8fed1 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -570,7 +570,7 @@ mod tests { memory: ByteCount::from_gibibytes_u32(2), hostname: String::from(INSTANCE_NAME), user_data: b"#cloud-config".to_vec(), - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 157a000e37..b4cc6f4cc6 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -749,7 +749,7 @@ mod test { memory: ByteCount::from_gibibytes_u32(2), hostname: String::from(INSTANCE_NAME), user_data: b"#cloud-config".to_vec(), - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 764160647f..d80b1b9029 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1944,7 +1944,7 @@ mod test { user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, disks: disks_to_attach, diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index b493986213..254723d32b 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -522,7 +522,7 @@ pub async fn create_instance_with( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: nics.clone(), external_ips, disks, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index b0d23e5322..4ce5901d08 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -423,7 +423,7 @@ pub static DEMO_INSTANCE_CREATE: Lazy = memory: ByteCount::from_gibibytes_u32(16), hostname: String::from("demo-instance"), user_data: vec![], - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some(DEMO_IP_POOL_NAME.clone().into()), diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index ac8a9c0fc6..e5d1c2f143 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -249,7 +249,7 @@ async fn test_instances_create_reboot_halt( memory: instance.memory, hostname: instance.hostname.clone(), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -1222,7 +1222,7 @@ async fn test_instances_create_stopped_start( memory: ByteCount::from_gibibytes_u32(1), hostname: String::from("the_host"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -1390,7 +1390,7 @@ async fn test_instance_using_image_from_other_project_fails( memory: ByteCount::from_gibibytes_u32(1), hostname: "stolen".into(), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -1465,7 +1465,7 @@ async fn test_instance_create_saga_removes_instance_database_record( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: interface_params.clone(), external_ips: vec![], disks: vec![], @@ -1493,7 +1493,7 @@ async fn test_instance_create_saga_removes_instance_database_record( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst2"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -1582,7 +1582,7 @@ async fn test_instance_with_single_explicit_ip_address( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -1696,7 +1696,7 @@ async fn test_instance_with_new_custom_network_interfaces( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -1810,7 +1810,7 @@ async fn test_instance_create_delete_network_interface( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], disks: vec![], @@ -2051,7 +2051,7 @@ async fn test_instance_update_network_interfaces( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], disks: vec![], @@ -2444,7 +2444,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nic-test"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: interface_params, external_ips: vec![], disks: vec![], @@ -2510,7 +2510,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![params::InstanceDiskAttachment::Attach( @@ -2570,7 +2570,7 @@ async fn test_instance_create_attach_disks( memory: ByteCount::from_gibibytes_u32(3), hostname: String::from("nfs"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![ @@ -2667,7 +2667,7 @@ async fn test_instance_create_attach_disks_undo( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![ @@ -2752,7 +2752,7 @@ async fn test_attach_eight_disks_to_instance( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -2833,7 +2833,7 @@ async fn test_cannot_attach_nine_disks_to_instance( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..9) @@ -2928,7 +2928,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -3012,7 +3012,7 @@ async fn test_disks_detached_when_instance_destroyed( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfs"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -3103,7 +3103,7 @@ async fn test_disks_detached_when_instance_destroyed( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("nfsv2"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: (0..8) @@ -3165,7 +3165,7 @@ async fn test_instances_memory_rejected_less_than_min_memory_size( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3215,7 +3215,7 @@ async fn test_instances_memory_not_divisible_by_min_memory_size( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3265,7 +3265,7 @@ async fn test_instances_memory_greater_than_max_size( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3345,7 +3345,7 @@ async fn test_instance_create_with_ssh_keys( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), // By default should transfer all profile keys - ssh_keys: None, + ssh_public_keys: None, start: false, hostname: instance_name.to_string(), user_data: vec![], @@ -3391,7 +3391,7 @@ async fn test_instance_create_with_ssh_keys( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), // Should only transfer the first key - ssh_keys: Some(vec![user_keys[0].identity.name.clone().into()]), + ssh_public_keys: Some(vec![user_keys[0].identity.name.clone().into()]), start: false, hostname: instance_name.to_string(), user_data: vec![], @@ -3436,7 +3436,7 @@ async fn test_instance_create_with_ssh_keys( ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), // Should transfer no keys - ssh_keys: Some(vec![]), + ssh_public_keys: Some(vec![]), start: false, hostname: instance_name.to_string(), user_data: vec![], @@ -3558,7 +3558,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( memory: ByteCount::from_gibibytes_u32(1), hostname: config.0.to_string(), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -3612,7 +3612,7 @@ async fn test_cannot_provision_instance_beyond_cpu_limit( memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("test"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], disks: vec![], @@ -3664,7 +3664,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( memory: ByteCount::try_from(config.1).unwrap(), hostname: config.0.to_string(), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -3919,7 +3919,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some("pool1".parse::().unwrap().into()), }], - ssh_keys: None, + ssh_public_keys: None, disks: vec![], start: true, }; @@ -3984,7 +3984,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some("orphan-pool".parse::().unwrap().into()), }], - ssh_keys: None, + ssh_public_keys: None, disks: vec![], start: true, }; @@ -4045,7 +4045,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: None, // <--- the only important thing here }], - ssh_keys: None, + ssh_public_keys: None, disks: vec![], start: true, }; @@ -4176,7 +4176,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![ephemeral_create.clone(), ephemeral_create], disks: vec![], @@ -4302,7 +4302,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { memory: ByteCount::from_gibibytes_u32(4), hostname: String::from("inst"), user_data: vec![], - ssh_keys: None, + ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { pool: Some("default".parse::().unwrap().into()), diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index dc5b844337..a89f2508ac 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -157,7 +157,7 @@ async fn test_project_deletion_with_instance( memory: ByteCount::from_gibibytes_u32(1), hostname: String::from("the_host"), user_data: b"none".to_vec(), - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, external_ips: vec![], diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index c13dda7796..c0422d0030 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -81,7 +81,7 @@ impl ResourceAllocator { hostname: "host".to_string(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: Vec::::new(), disks: Vec::::new(), diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index c493e725b2..9a2ee3d310 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -123,7 +123,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::None, disks: vec![params::InstanceDiskAttachment::Attach( diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 9749086d47..3c9e18817f 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -58,7 +58,7 @@ async fn create_instance_expect_failure( memory: ByteCount::from_gibibytes_u32(1), hostname: name.to_string(), user_data: vec![], - ssh_keys: Some(Vec::new()), + ssh_public_keys: Some(Vec::new()), network_interfaces, external_ips: vec![], disks: vec![], diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 73687017ae..84336c9099 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1042,7 +1042,7 @@ pub struct InstanceCreate { /// If not provided, all SSH public keys from the user's profile will be sent. /// If an empty list is provided, no public keys will be transmitted to the /// instance. - pub ssh_keys: Option>, + pub ssh_public_keys: Option>, /// Should this instance be started upon creation; true by default. #[serde(default = "bool_true")] diff --git a/openapi/nexus.json b/openapi/nexus.json index 8c5af8b14d..f1da97a55e 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -12430,7 +12430,7 @@ } ] }, - "ssh_keys": { + "ssh_public_keys": { "nullable": true, "description": "An allowlist of SSH public keys to be transferred to the instance via cloud-init during instance creation.\n\nIf not provided, all SSH public keys from the user's profile will be sent. If an empty list is provided, no public keys will be transmitted to the instance.", "type": "array", From 98180bf18de236fa1d8084d7b8b7637f78cf0653 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 31 Jan 2024 15:31:43 -0800 Subject: [PATCH 66/91] Perform inventory fetch queries in batches (#4932) When querying the database for inventory data, we read a large number of rows from a lot of different tables. Therefore, we want to ensure that there is always an upper bound on the amount of data read per query. Currently, an upper bound is ensured using _limits_: we add a limit to the query, and if more rows would be returned, we fail the query. This means that we can ensure an upper bound. However, the issue with this is that the behavior when the upper bound is reached is to *fail* --- this means that it's possible to select an upper bound that's too small, and upon hitting that upper bound, we just fail...and there's no way to know whether the selected limit is big enough until we hit it at runtime. Instead of using hard limits, it's preferable to use _pagination_. By paginating these queries, we still set an upper bound on the number of rows returned by a single query, but rather than failing if that bound is reached, we instead perform multiple queries. This branch changes inventory fetch queries to use pagination with a batch size limit, rather than a hard limit on query size, using the pagination utilities added in #4632. I've replaced the `inventory_collection_read_best_effort` and `inventory_collection_read_all_or_nothing` methods, which limit the size of the query, with a new `inventory_collection_read_batched` method, which uses the limit parameter as a maximum size per batch. I've then changed the existing `inventory_get_latest_collection` method to use `inventory_collection_read_batched` rather than `inventory_collection_read_all_or_nothing`. Closes #4629 --- dev-tools/omdb/src/bin/omdb/db.rs | 9 +- .../db-queries/src/db/datastore/inventory.rs | 516 +++++++++++------- .../app/background/sync_service_zone_nat.rs | 2 - nexus/src/app/deployment.rs | 14 +- nexus/src/app/rack.rs | 27 +- 5 files changed, 328 insertions(+), 240 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index a465183351..c2a4250595 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2291,7 +2291,6 @@ async fn cmd_db_inventory( opctx, datastore, id, - limit, long_string_formatter, ) .await @@ -2496,16 +2495,12 @@ async fn cmd_db_inventory_collections_show( opctx: &OpContext, datastore: &DataStore, id: Uuid, - limit: NonZeroU32, long_string_formatter: LongStringFormatter, ) -> Result<(), anyhow::Error> { - let (collection, incomplete) = datastore - .inventory_collection_read_best_effort(opctx, id, limit) + let collection = datastore + .inventory_collection_read(opctx, id) .await .context("reading collection")?; - if incomplete { - limit_error(limit, || "loading collection"); - } inv_collection_print(&collection).await?; let nerrors = inv_collection_print_errors(&collection).await?; diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index bdacb0e7b9..6b737f21ac 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -9,6 +9,7 @@ use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::public_error_from_diesel_lookup; use crate::db::error::ErrorHandler; +use crate::db::pagination::{paginated, paginated_multicolumn, Paginator}; use crate::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use crate::db::TransactionError; use anyhow::Context; @@ -65,6 +66,14 @@ use std::num::NonZeroU32; use std::sync::Arc; use uuid::Uuid; +/// "limit" used in SQL queries that paginate through all SPs, RoTs, sleds, +/// omicron zones, etc. +/// +/// We use a [`Paginator`] to guard against single queries returning an +/// unchecked number of rows. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; + impl DataStore { /// Store a complete inventory collection into the database pub async fn inventory_insert_collection( @@ -1195,14 +1204,12 @@ impl DataStore { }) } - /// Attempt to read the latest collection while limiting queries to `limit` - /// records + /// Attempt to read the latest collection. /// /// If there aren't any collections, return `Ok(None)`. pub async fn inventory_get_latest_collection( &self, opctx: &OpContext, - limit: NonZeroU32, ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::INVENTORY).await?; let conn = self.pool_connection_authorized(opctx).await?; @@ -1219,49 +1226,37 @@ impl DataStore { return Ok(None); }; - Ok(Some( - self.inventory_collection_read_all_or_nothing( - opctx, - collection_id, - limit, - ) - .await?, - )) + Ok(Some(self.inventory_collection_read(opctx, collection_id).await?)) } - /// Attempt to read the given collection while limiting queries to `limit` - /// records and returning nothing if `limit` is not large enough. - pub async fn inventory_collection_read_all_or_nothing( + /// Attempt to read the current collection + pub async fn inventory_collection_read( &self, opctx: &OpContext, id: Uuid, - limit: NonZeroU32, ) -> Result { - let (collection, limit_reached) = self - .inventory_collection_read_best_effort(opctx, id, limit) - .await?; - bail_unless!( - !limit_reached, - "hit limit of {} records while loading collection", - limit - ); - Ok(collection) + self.inventory_collection_read_batched(opctx, id, SQL_BATCH_SIZE).await } - /// Make a best effort to read the given collection while limiting queries - /// to `limit` results. Returns as much as it was able to get. The - /// returned bool indicates whether the returned collection might be - /// incomplete because the limit was reached. - pub async fn inventory_collection_read_best_effort( + /// Attempt to read the current collection with the provided batch size. + /// + /// Queries are limited to `batch_size` records at a time, performing + /// multiple queries if more than `batch_size` records exist. + /// + /// In general, we don't want to permit downstream code to determine the + /// batch size; instead, we would like to always use `SQL_BATCH_SIZE`. + /// However, in order to facilitate testing of the batching logic itself, + /// this private method is separated from the public APIs + /// [`Self::inventory_get_latest_collection`] and + /// [`Self::inventory_collection_read`], so that we can test with smaller + /// batch sizes. + async fn inventory_collection_read_batched( &self, opctx: &OpContext, id: Uuid, - limit: NonZeroU32, - ) -> Result<(Collection, bool), Error> { + batch_size: NonZeroU32, + ) -> Result { let conn = self.pool_connection_authorized(opctx).await?; - let sql_limit = i64::from(u32::from(limit)); - let usize_limit = usize::try_from(u32::from(limit)).unwrap(); - let mut limit_reached = false; let (time_started, time_done, collector) = { use db::schema::inv_collection::dsl; @@ -1285,73 +1280,115 @@ impl DataStore { let errors: Vec = { use db::schema::inv_collection_error::dsl; - dsl::inv_collection_error + let mut errors = Vec::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_collection_error, + dsl::idx, + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) .order_by(dsl::idx) - .limit(sql_limit) .select(InvCollectionError::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|e| e.message) - .collect() + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = + p.found_batch(&batch, &|row: &InvCollectionError| row.idx); + errors.extend(batch.into_iter().map(|e| e.message)); + } + errors }; - limit_reached = limit_reached || errors.len() == usize_limit; let sps: BTreeMap<_, _> = { use db::schema::inv_service_processor::dsl; - dsl::inv_service_processor + + let mut sps = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_service_processor, + dsl::hw_baseboard_id, + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) - .limit(sql_limit) .select(InvServiceProcessor::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|sp_row| { - let baseboard_id = sp_row.hw_baseboard_id; + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.hw_baseboard_id); + sps.extend(batch.into_iter().map(|row| { + let baseboard_id = row.hw_baseboard_id; ( baseboard_id, - nexus_types::inventory::ServiceProcessor::from(sp_row), + nexus_types::inventory::ServiceProcessor::from(row), ) - }) - .collect() + })); + } + sps }; - limit_reached = limit_reached || sps.len() == usize_limit; let rots: BTreeMap<_, _> = { use db::schema::inv_root_of_trust::dsl; - dsl::inv_root_of_trust + + let mut rots = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_root_of_trust, + dsl::hw_baseboard_id, + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) - .limit(sql_limit) .select(InvRootOfTrust::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|rot_row| { + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.hw_baseboard_id); + rots.extend(batch.into_iter().map(|rot_row| { let baseboard_id = rot_row.hw_baseboard_id; ( baseboard_id, nexus_types::inventory::RotState::from(rot_row), ) - }) - .collect() + })); + } + rots }; - limit_reached = limit_reached || rots.len() == usize_limit; let sled_agent_rows: Vec<_> = { use db::schema::inv_sled_agent::dsl; - dsl::inv_sled_agent + + let mut rows = Vec::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let mut batch = paginated( + dsl::inv_sled_agent, + dsl::sled_id, + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) - .limit(sql_limit) .select(InvSledAgent::as_select()) .load_async(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) - })? + })?; + paginator = p.found_batch(&batch, &|row| row.sled_id); + rows.append(&mut batch); + } + + rows }; // Collect the unique baseboard ids referenced by SPs, RoTs, and Sled @@ -1365,23 +1402,34 @@ impl DataStore { // Fetch the corresponding baseboard records. let baseboards_by_id: BTreeMap<_, _> = { use db::schema::hw_baseboard_id::dsl; - dsl::hw_baseboard_id - .filter(dsl::id.eq_any(baseboard_id_ids)) - .limit(sql_limit) + + let mut bbs = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::hw_baseboard_id, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::id.eq_any(baseboard_id_ids.clone())) .select(HwBaseboardId::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|bb| { + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + bbs.extend(batch.into_iter().map(|bb| { ( bb.id, Arc::new(nexus_types::inventory::BaseboardId::from(bb)), ) - }) - .collect() + })); + } + + bbs }; - limit_reached = limit_reached || baseboards_by_id.len() == usize_limit; // Having those, we can replace the keys in the maps above with // references to the actual baseboard rather than the uuid. @@ -1457,17 +1505,31 @@ impl DataStore { // Fetch records of cabooses found. let inv_caboose_rows = { use db::schema::inv_caboose::dsl; - dsl::inv_caboose + + let mut cabooses = Vec::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let mut batch = paginated_multicolumn( + dsl::inv_caboose, + (dsl::hw_baseboard_id, dsl::which), + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) - .limit(sql_limit) .select(InvCaboose::as_select()) .load_async(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) - })? + })?; + paginator = p.found_batch(&batch, &|row| { + (row.hw_baseboard_id, row.which) + }); + cabooses.append(&mut batch); + } + + cabooses }; - limit_reached = limit_reached || inv_caboose_rows.len() == usize_limit; // Collect the unique sw_caboose_ids for those cabooses. let sw_caboose_ids: BTreeSet<_> = inv_caboose_rows @@ -1477,25 +1539,33 @@ impl DataStore { // Fetch the corresponing records. let cabooses_by_id: BTreeMap<_, _> = { use db::schema::sw_caboose::dsl; - dsl::sw_caboose - .filter(dsl::id.eq_any(sw_caboose_ids)) - .limit(sql_limit) - .select(SwCaboose::as_select()) - .load_async(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|sw_caboose_row| { + + let mut cabooses = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = + paginated(dsl::sw_caboose, dsl::id, &p.current_pagparams()) + .filter(dsl::id.eq_any(sw_caboose_ids.clone())) + .select(SwCaboose::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + cabooses.extend(batch.into_iter().map(|sw_caboose_row| { ( sw_caboose_row.id, Arc::new(nexus_types::inventory::Caboose::from( sw_caboose_row, )), ) - }) - .collect() + })); + } + + cabooses }; - limit_reached = limit_reached || cabooses_by_id.len() == usize_limit; // Assemble the lists of cabooses found. let mut cabooses_found = BTreeMap::new(); @@ -1537,17 +1607,31 @@ impl DataStore { // Fetch records of RoT pages found. let inv_rot_page_rows = { use db::schema::inv_root_of_trust_page::dsl; - dsl::inv_root_of_trust_page + + let mut rot_pages = Vec::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let mut batch = paginated_multicolumn( + dsl::inv_root_of_trust_page, + (dsl::hw_baseboard_id, dsl::which), + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) - .limit(sql_limit) .select(InvRotPage::as_select()) .load_async(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) - })? + })?; + paginator = p.found_batch(&batch, &|row| { + (row.hw_baseboard_id, row.which) + }); + rot_pages.append(&mut batch); + } + + rot_pages }; - limit_reached = limit_reached || inv_rot_page_rows.len() == usize_limit; // Collect the unique sw_rot_page_ids for those pages. let sw_rot_page_ids: BTreeSet<_> = inv_rot_page_rows @@ -1557,25 +1641,36 @@ impl DataStore { // Fetch the corresponding records. let rot_pages_by_id: BTreeMap<_, _> = { use db::schema::sw_root_of_trust_page::dsl; - dsl::sw_root_of_trust_page - .filter(dsl::id.eq_any(sw_rot_page_ids)) - .limit(sql_limit) + + let mut rot_pages = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::sw_root_of_trust_page, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::id.eq_any(sw_rot_page_ids.clone())) .select(SwRotPage::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|sw_rot_page_row| { + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + rot_pages.extend(batch.into_iter().map(|sw_rot_page_row| { ( sw_rot_page_row.id, Arc::new(nexus_types::inventory::RotPage::from( sw_rot_page_row, )), ) - }) - .collect() + })) + } + + rot_pages }; - limit_reached = limit_reached || rot_pages_by_id.len() == usize_limit; // Assemble the lists of rot pages found. let mut rot_pages_found = BTreeMap::new(); @@ -1626,62 +1721,98 @@ impl DataStore { // found on each sled. let mut omicron_zones: BTreeMap<_, _> = { use db::schema::inv_sled_omicron_zones::dsl; - dsl::inv_sled_omicron_zones + + let mut zones = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_sled_omicron_zones, + dsl::sled_id, + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) - .limit(sql_limit) .select(InvSledOmicronZones::as_select()) .load_async(&*conn) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|sled_zones_config| { + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.sled_id); + zones.extend(batch.into_iter().map(|sled_zones_config| { ( sled_zones_config.sled_id, sled_zones_config.into_uninit_zones_found(), ) - }) - .collect() + })) + } + + zones }; - limit_reached = limit_reached || omicron_zones.len() == usize_limit; // Assemble a mutable map of all the NICs found, by NIC id. As we // match these up with the corresponding zone below, we'll remove items // from this set. That way we can tell if the same NIC was used twice // or not used at all. - let mut omicron_zone_nics: BTreeMap<_, _> = { - use db::schema::inv_omicron_zone_nic::dsl; - dsl::inv_omicron_zone_nic - .filter(dsl::inv_collection_id.eq(id)) - .limit(sql_limit) - .select(InvOmicronZoneNic::as_select()) - .load_async(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|found_zone_nic| (found_zone_nic.id, found_zone_nic)) - .collect() - }; - limit_reached = limit_reached || omicron_zone_nics.len() == usize_limit; + let mut omicron_zone_nics: BTreeMap<_, _> = + { + use db::schema::inv_omicron_zone_nic::dsl; + + let mut nics = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::inv_omicron_zone_nic, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(id)) + .select(InvOmicronZoneNic::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| row.id); + nics.extend(batch.into_iter().map(|found_zone_nic| { + (found_zone_nic.id, found_zone_nic) + })); + } + + nics + }; // Now load the actual list of zones from all sleds. let omicron_zones_list = { use db::schema::inv_omicron_zone::dsl; - dsl::inv_omicron_zone + + let mut zones = Vec::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let mut batch = paginated( + dsl::inv_omicron_zone, + dsl::id, + &p.current_pagparams(), + ) .filter(dsl::inv_collection_id.eq(id)) // It's not strictly necessary to order these by id. Doing so // ensures a consistent representation for `Collection`, which // makes testing easier. It's already indexed to do this, too. .order_by(dsl::id) - .limit(sql_limit) .select(InvOmicronZone::as_select()) .load_async(&*conn) .await .map_err(|e| { public_error_from_diesel(e, ErrorHandler::Server) - })? + })?; + paginator = p.found_batch(&batch, &|row| row.id); + zones.append(&mut batch); + } + + zones }; - limit_reached = - limit_reached || omicron_zones_list.len() == usize_limit; for z in omicron_zones_list { let nic_row = z .nic_id @@ -1727,25 +1858,22 @@ impl DataStore { omicron_zone_nics.keys() ); - Ok(( - Collection { - id, - errors, - time_started, - time_done, - collector, - baseboards: baseboards_by_id.values().cloned().collect(), - cabooses: cabooses_by_id.values().cloned().collect(), - rot_pages: rot_pages_by_id.values().cloned().collect(), - sps, - rots, - cabooses_found, - rot_pages_found, - sled_agents, - omicron_zones, - }, - limit_reached, - )) + Ok(Collection { + id, + errors, + time_started, + time_done, + collector, + baseboards: baseboards_by_id.values().cloned().collect(), + cabooses: cabooses_by_id.values().cloned().collect(), + rot_pages: rot_pages_by_id.values().cloned().collect(), + sps, + rots, + cabooses_found, + rot_pages_found, + sled_agents, + omicron_zones, + }) } } @@ -1786,10 +1914,8 @@ impl DataStoreInventoryTest for DataStore { #[cfg(test)] mod test { - use crate::context::OpContext; use crate::db::datastore::datastore_test; use crate::db::datastore::inventory::DataStoreInventoryTest; - use crate::db::datastore::DataStore; use crate::db::datastore::DataStoreConnection; use crate::db::schema; use anyhow::Context; @@ -1804,23 +1930,10 @@ mod test { use nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; - use nexus_types::inventory::Collection; use nexus_types::inventory::RotPageWhich; use omicron_common::api::external::Error; use omicron_test_utils::dev; use std::num::NonZeroU32; - use uuid::Uuid; - - async fn read_collection( - opctx: &OpContext, - datastore: &DataStore, - id: Uuid, - ) -> anyhow::Result { - let limit = NonZeroU32::new(1000).unwrap(); - Ok(datastore - .inventory_collection_read_all_or_nothing(opctx, id, limit) - .await?) - } struct CollectionCounts { baseboards: usize, @@ -1899,10 +2012,10 @@ mod test { // Read it back. let conn = datastore.pool_connection_for_tests().await.unwrap(); - let collection_read = - read_collection(&opctx, &datastore, collection1.id) - .await - .expect("failed to read collection back"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection1.id) + .await + .expect("failed to read collection back"); assert_eq!(collection1, collection_read); // There ought to be no baseboards, cabooses, or RoT pages in the @@ -1923,10 +2036,10 @@ mod test { .inventory_insert_collection(&opctx, &collection2) .await .expect("failed to insert collection"); - let collection_read = - read_collection(&opctx, &datastore, collection2.id) - .await - .expect("failed to read collection back"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection2.id) + .await + .expect("failed to read collection back"); assert_eq!(collection2, collection_read); // Verify that we have exactly the set of cabooses, baseboards, and RoT // pages in the databases that came from this first non-empty @@ -1939,17 +2052,23 @@ mod test { assert_eq!(collection2.cabooses.len(), coll_counts.cabooses); assert_eq!(collection2.rot_pages.len(), coll_counts.rot_pages); - // Check that we get an error on the limit being reached for - // `read_all_or_nothing` - let limit = NonZeroU32::new(1).unwrap(); - assert!(datastore - .inventory_collection_read_all_or_nothing( + // Try another read with a batch size of 1, and assert we got all the + // same data as the previous read with the default batch size. This + // ensures that we correctly handle queries over the batch size, without + // having to actually read 1000s of records. + let batched_read = datastore + .inventory_collection_read_batched( &opctx, collection2.id, - limit + NonZeroU32::new(1).unwrap(), ) .await - .is_err()); + .expect("failed to read back with batch size 1"); + assert_eq!( + collection_read, batched_read, + "read with default batch size and read with batch size 1 must \ + return the same results" + ); // Now insert an equivalent collection again. Verify the distinct // baseboards, cabooses, and RoT pages again. This is important: the @@ -1961,10 +2080,10 @@ mod test { .inventory_insert_collection(&opctx, &collection3) .await .expect("failed to insert collection"); - let collection_read = - read_collection(&opctx, &datastore, collection3.id) - .await - .expect("failed to read collection back"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection3.id) + .await + .expect("failed to read collection back"); assert_eq!(collection3, collection_read); // Verify that we have the same number of cabooses, baseboards, and RoT // pages, since those didn't change. @@ -2015,10 +2134,10 @@ mod test { .inventory_insert_collection(&opctx, &collection4) .await .expect("failed to insert collection"); - let collection_read = - read_collection(&opctx, &datastore, collection4.id) - .await - .expect("failed to read collection back"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection4.id) + .await + .expect("failed to read collection back"); assert_eq!(collection4, collection_read); // Verify the number of baseboards and collections again. assert_eq!( @@ -2044,10 +2163,10 @@ mod test { .inventory_insert_collection(&opctx, &collection5) .await .expect("failed to insert collection"); - let collection_read = - read_collection(&opctx, &datastore, collection5.id) - .await - .expect("failed to read collection back"); + let collection_read = datastore + .inventory_collection_read(&opctx, collection5.id) + .await + .expect("failed to read collection back"); assert_eq!(collection5, collection_read); assert_eq!(collection5.baseboards.len(), collection3.baseboards.len()); assert_eq!(collection5.cabooses.len(), collection3.cabooses.len()); @@ -2178,19 +2297,26 @@ mod test { ); // If we try to fetch a pruned collection, we should get nothing. - let _ = read_collection(&opctx, &datastore, collection4.id) + let _ = datastore + .inventory_collection_read(&opctx, collection4.id) .await .expect_err("unexpectedly read pruned collection"); // But we should still be able to fetch the collections that do exist. - let collection_read = - read_collection(&opctx, &datastore, collection5.id).await.unwrap(); + let collection_read = datastore + .inventory_collection_read(&opctx, collection5.id) + .await + .unwrap(); assert_eq!(collection5, collection_read); - let collection_read = - read_collection(&opctx, &datastore, collection6.id).await.unwrap(); + let collection_read = datastore + .inventory_collection_read(&opctx, collection6.id) + .await + .unwrap(); assert_eq!(collection6, collection_read); - let collection_read = - read_collection(&opctx, &datastore, collection7.id).await.unwrap(); + let collection_read = datastore + .inventory_collection_read(&opctx, collection7.id) + .await + .unwrap(); assert_eq!(collection7, collection_read); // We should prune more than one collection, if needed. We'll wind up diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs index 8e75f97d7a..a3d52f4469 100644 --- a/nexus/src/app/background/sync_service_zone_nat.rs +++ b/nexus/src/app/background/sync_service_zone_nat.rs @@ -18,7 +18,6 @@ use omicron_common::api::external; use serde_json::json; use sled_agent_client::types::OmicronZoneType; use std::net::{IpAddr, SocketAddr}; -use std::num::NonZeroU32; use std::sync::Arc; // Minumum number of boundary NTP zones that should be present in a valid @@ -62,7 +61,6 @@ impl BackgroundTask for ServiceZoneNatTracker { .datastore .inventory_get_latest_collection( opctx, - NonZeroU32::new(u32::MAX).unwrap(), ) .await { diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index b9718a0367..70d6d242fb 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -37,14 +37,6 @@ use uuid::Uuid; // unsafe: `new_unchecked` is only unsound if the argument is 0. const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; -/// "limit" used in SQL queries that fetch inventory data. Unlike the batch -/// size above, this is a limit on the *total* number of records returned. If -/// it's too small, the whole operation will fail. See -/// oxidecomputer/omicron#4629. -// unsafe: `new_unchecked` is only unsound if the argument is 0. -const SQL_LIMIT_INVENTORY: NonZeroU32 = - unsafe { NonZeroU32::new_unchecked(1000) }; - /// Common structure for collecting information that the planner needs struct PlanningContext { policy: Policy, @@ -197,11 +189,7 @@ impl super::Nexus { ) -> CreateResult { let collection = self .datastore() - .inventory_collection_read_all_or_nothing( - opctx, - collection_id, - SQL_LIMIT_INVENTORY, - ) + .inventory_collection_read(opctx, collection_id) .await?; let planning_context = self.blueprint_planning_context(opctx).await?; let blueprint = BlueprintBuilder::build_initial_from_collection( diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 38c7861e46..569153f23e 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -71,19 +71,6 @@ use std::num::NonZeroU32; use std::str::FromStr; use uuid::Uuid; -// A limit for querying the last inventory collection -// -// We set a limit of 200 here to give us some breathing room when -// querying for cabooses and RoT pages, each of which is "4 per SP/RoT", -// which in a single fully populated rack works out to (32 sleds + 2 -// switches + 1 psc) * 4 = 140. -// -// This feels bad and probably needs more thought; see -// https://github.com/oxidecomputer/omicron/issues/4621 where this limit -// being too low bit us, and it will link to a more general followup -// issue. -const INVENTORY_COLLECTION_LIMIT: u32 = 200; - impl super::Nexus { pub(crate) async fn racks_list( &self, @@ -812,11 +799,8 @@ impl super::Nexus { ) -> ListResultVec { debug!(self.log, "Getting latest collection"); // Grab the SPs from the last collection - let limit = NonZeroU32::new(INVENTORY_COLLECTION_LIMIT).unwrap(); - let collection = self - .db_datastore - .inventory_get_latest_collection(opctx, limit) - .await?; + let collection = + self.db_datastore.inventory_get_latest_collection(opctx).await?; // There can't be any uninitialized sleds we know about // if there is no inventory. @@ -888,11 +872,8 @@ impl super::Nexus { .await?; // Grab the SPs from the last collection - let limit = NonZeroU32::new(INVENTORY_COLLECTION_LIMIT).unwrap(); - let collection = self - .db_datastore - .inventory_get_latest_collection(opctx, limit) - .await?; + let collection = + self.db_datastore.inventory_get_latest_collection(opctx).await?; // If there isn't a collection, we don't know about the sled let Some(collection) = collection else { From 749d23ecae06ba57093a9104128aff997eb0a4ab Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 31 Jan 2024 15:51:28 -0800 Subject: [PATCH 67/91] BFD support (#4852) --- common/src/nexus_config.rs | 17 +- dev-tools/omdb/tests/env.out | 15 ++ dev-tools/omdb/tests/successes.out | 12 + nexus/db-model/src/bfd.rs | 59 +++++ nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 17 +- nexus/db-queries/src/db/datastore/bfd.rs | 88 ++++++++ nexus/db-queries/src/db/datastore/bgp.rs | 4 + nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/db-queries/src/db/pool_connection.rs | 1 + nexus/examples/config.toml | 1 + nexus/src/app/background/bfd.rs | 217 ++++++++++++++++++ nexus/src/app/background/init.rs | 26 ++- nexus/src/app/background/mod.rs | 1 + nexus/src/app/bfd.rs | 105 +++++++++ nexus/src/app/bgp.rs | 4 + nexus/src/app/mod.rs | 2 + nexus/src/external_api/http_entrypoints.rs | 67 ++++++ nexus/tests/config.test.toml | 1 + nexus/tests/integration_tests/endpoints.rs | 56 +++++ nexus/tests/output/nexus_tags.txt | 3 + nexus/types/src/external_api/params.rs | 54 +++++ nexus/types/src/external_api/shared.rs | 53 +++++ openapi/nexus.json | 246 +++++++++++++++++++++ package-manifest.toml | 12 +- schema/crdb/31.0.0/up1.sql | 4 + schema/crdb/31.0.0/up2.sql | 13 ++ schema/crdb/31.0.0/up3.sql | 5 + schema/crdb/dbinit.sql | 26 ++- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- 34 files changed, 1108 insertions(+), 16 deletions(-) create mode 100644 nexus/db-model/src/bfd.rs create mode 100644 nexus/db-queries/src/db/datastore/bfd.rs create mode 100644 nexus/src/app/background/bfd.rs create mode 100644 nexus/src/app/bfd.rs create mode 100644 schema/crdb/31.0.0/up1.sql create mode 100644 schema/crdb/31.0.0/up2.sql create mode 100644 schema/crdb/31.0.0/up3.sql diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index dedd091d81..e987790a21 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -336,6 +336,8 @@ pub struct BackgroundTaskConfig { pub phantom_disks: PhantomDiskConfig, /// configuration for service zone nat sync task pub sync_service_zone_nat: SyncServiceZoneNatConfig, + /// configuration for the bfd manager task + pub bfd_manager: BfdManagerConfig, } #[serde_as] @@ -378,6 +380,14 @@ pub struct NatCleanupConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct BfdManagerConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct SyncServiceZoneNatConfig { @@ -527,7 +537,7 @@ mod test { }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; use crate::api::internal::shared::SwitchLocation; - use crate::nexus_config::SyncServiceZoneNatConfig; + use crate::nexus_config::{BfdManagerConfig, SyncServiceZoneNatConfig}; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; @@ -672,6 +682,7 @@ mod test { dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 nat_cleanup.period_secs = 30 + bfd_manager.period_secs = 30 inventory.period_secs = 10 inventory.nkeep = 11 inventory.disable = false @@ -773,6 +784,9 @@ mod test { nat_cleanup: NatCleanupConfig { period_secs: Duration::from_secs(30), }, + bfd_manager: BfdManagerConfig { + period_secs: Duration::from_secs(30), + }, inventory: InventoryConfig { period_secs: Duration::from_secs(10), nkeep: 11, @@ -838,6 +852,7 @@ mod test { dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 nat_cleanup.period_secs = 30 + bfd_manager.period_secs = 30 inventory.period_secs = 10 inventory.nkeep = 3 inventory.disable = false diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 8cca1b063a..878b3f04dd 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -23,6 +23,11 @@ EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "http://127.0.0.1:REDA termination: Exited(0) --------------------------------------------- stdout: +task: "bfd_manager" + Manages bidirectional fowarding detection (BFD) configuration on rack + switches + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -96,6 +101,11 @@ EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] termination: Exited(0) --------------------------------------------- stdout: +task: "bfd_manager" + Manages bidirectional fowarding detection (BFD) configuration on rack + switches + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -156,6 +166,11 @@ EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "nexus", "backgr termination: Exited(0) --------------------------------------------- stdout: +task: "bfd_manager" + Manages bidirectional fowarding detection (BFD) configuration on rack + switches + + task: "dns_config_external" watches external DNS data stored in CockroachDB diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index f291bbb6a0..e5a38049f3 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -217,6 +217,11 @@ EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] termination: Exited(0) --------------------------------------------- stdout: +task: "bfd_manager" + Manages bidirectional fowarding detection (BFD) configuration on rack + switches + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -339,6 +344,13 @@ task: "nat_v4_garbage_collector" started at (s ago) and ran for ms warning: unknown background task: "nat_v4_garbage_collector" (don't know how to interpret details: Null) +task: "bfd_manager" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "bfd_manager" (don't know how to interpret details: Object {}) + task: "external_endpoints" configured period: every 1m currently executing: no diff --git a/nexus/db-model/src/bfd.rs b/nexus/db-model/src/bfd.rs new file mode 100644 index 0000000000..b71136ea2c --- /dev/null +++ b/nexus/db-model/src/bfd.rs @@ -0,0 +1,59 @@ +use crate::schema::bfd_session; +use crate::{impl_enum_type, SqlU32}; +use chrono::DateTime; +use chrono::Utc; +use ipnetwork::IpNetwork; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +impl_enum_type!( + #[derive(SqlType, Debug, Clone, Copy)] + #[diesel(postgres_type(name = "bfd_mode", schema = "public"))] + pub struct BfdModeEnum; + + #[derive( + Clone, + Copy, + Debug, + AsExpression, + FromSqlRow, + PartialEq, + Serialize, + Deserialize + )] + #[diesel(sql_type = BfdModeEnum)] + pub enum BfdMode; + + SingleHop => b"single_hop" + MultiHop => b"multi_hop" +); + +#[derive( + Queryable, Insertable, Selectable, Clone, Debug, Serialize, Deserialize, +)] +#[diesel(table_name = bfd_session)] +pub struct BfdSession { + pub id: Uuid, + pub local: Option, + pub remote: IpNetwork, + pub detection_threshold: SqlU32, + pub required_rx: SqlU32, + pub switch: String, + pub mode: BfdMode, + pub time_created: DateTime, + pub time_modified: DateTime, + pub time_deleted: Option>, +} + +impl From for BfdMode { + fn from(value: nexus_types::external_api::params::BfdMode) -> Self { + match value { + nexus_types::external_api::params::BfdMode::SingleHop => { + BfdMode::SingleHop + } + nexus_types::external_api::params::BfdMode::MultiHop => { + BfdMode::MultiHop + } + } + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 7fa95822a7..b77d56059e 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -10,6 +10,7 @@ extern crate diesel; extern crate newtype_derive; mod address_lot; +mod bfd; mod bgp; mod block_size; mod bootstore; @@ -106,6 +107,7 @@ mod db { pub use self::macaddr::*; pub use self::unsigned::*; pub use address_lot::*; +pub use bfd::*; pub use bgp::*; pub use block_size::*; pub use bootstore::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 9259532c52..99fe41ba34 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(30, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(31, 0, 0); table! { disk (id) { @@ -1485,6 +1485,21 @@ table! { } } +table! { + bfd_session (remote, switch) { + id -> Uuid, + local -> Nullable, + remote -> Inet, + detection_threshold -> Int8, + required_rx -> Int8, + switch -> Text, + mode -> crate::BfdModeEnum, + time_created -> Timestamptz, + time_modified -> Timestamptz, + time_deleted -> Nullable, + } +} + table! { db_metadata (singleton) { singleton -> Bool, diff --git a/nexus/db-queries/src/db/datastore/bfd.rs b/nexus/db-queries/src/db/datastore/bfd.rs new file mode 100644 index 0000000000..fba19259a6 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/bfd.rs @@ -0,0 +1,88 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::DataStore; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::pagination::paginated; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use ipnetwork::IpNetwork; +use nexus_db_model::BfdSession; +use nexus_db_model::SqlU32; +use nexus_types::external_api::params; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::{ + CreateResult, DeleteResult, ListResultVec, +}; +use uuid::Uuid; + +impl DataStore { + pub async fn bfd_session_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::bfd_session::dsl; + let conn = self.pool_connection_authorized(opctx).await?; + paginated(dsl::bfd_session, dsl::id, pagparams) + .select(BfdSession::as_select()) + .filter(dsl::time_deleted.is_null()) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn bfd_session_create( + &self, + opctx: &OpContext, + config: ¶ms::BfdSessionEnable, + ) -> CreateResult { + use db::schema::bfd_session::dsl; + let conn = self.pool_connection_authorized(opctx).await?; + + let session = BfdSession { + id: Uuid::new_v4(), + local: config.local.map(Into::into), + remote: config.remote.into(), + detection_threshold: SqlU32::new(config.detection_threshold.into()), + required_rx: SqlU32::new( + config.required_rx.try_into().unwrap_or(u32::MAX), + ), + switch: config.switch.to_string(), + mode: config.mode.into(), + time_created: chrono::Utc::now(), + time_modified: chrono::Utc::now(), + time_deleted: None, + }; + + diesel::insert_into(dsl::bfd_session) + .values(session) + .returning(BfdSession::as_returning()) + .get_result_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + pub async fn bfd_session_delete( + &self, + opctx: &OpContext, + config: ¶ms::BfdSessionDisable, + ) -> DeleteResult { + use db::schema::bfd_session::dsl; + let conn = self.pool_connection_authorized(opctx).await?; + + diesel::update(dsl::bfd_session) + .filter(dsl::remote.eq(IpNetwork::from(config.remote))) + .filter(dsl::switch.eq(config.switch.to_string())) + .filter(dsl::time_deleted.is_null()) + .set(dsl::time_deleted.eq(chrono::Utc::now())) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map(|_| ()) + } +} diff --git a/nexus/db-queries/src/db/datastore/bgp.rs b/nexus/db-queries/src/db/datastore/bgp.rs index 28075b0ded..e5ac35d19a 100644 --- a/nexus/db-queries/src/db/datastore/bgp.rs +++ b/nexus/db-queries/src/db/datastore/bgp.rs @@ -1,3 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + use super::DataStore; use crate::context::OpContext; use crate::db; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 96832b25bf..b9ad2ea610 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -48,6 +48,7 @@ use std::sync::Arc; use uuid::Uuid; mod address_lot; +mod bfd; mod bgp; mod bootstore; mod certificate; diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index 2d57274909..d9c50ff26c 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -39,6 +39,7 @@ pub type DbConnection = DTraceConnection; static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "address_lot_kind", "authentication_mode", + "bfd_mode", "block_size", "caboose_which", "dataset_kind", diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index dcab2d9da1..1cfe3ae8a2 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -97,6 +97,7 @@ dns_external.max_concurrent_server_updates = 5 # them (on a sunny day). external_endpoints.period_secs = 60 nat_cleanup.period_secs = 30 +bfd_manager.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/nexus/src/app/background/bfd.rs b/nexus/src/app/background/bfd.rs new file mode 100644 index 0000000000..a586db54e4 --- /dev/null +++ b/nexus/src/app/background/bfd.rs @@ -0,0 +1,217 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for managing switch bidirectional forwarding detection +//! (BFD) sessions. + +use super::common::BackgroundTask; +use futures::future::BoxFuture; +use futures::FutureExt; +use mg_admin_client::types::{BfdPeerConfig, SessionMode}; +use nexus_db_model::{BfdMode, BfdSession}; +use nexus_db_queries::{context::OpContext, db::DataStore}; +use omicron_common::api::external::{DataPageParams, SwitchLocation}; +use serde_json::json; +use std::{ + collections::{HashMap, HashSet}, + hash::Hash, + net::{IpAddr, Ipv4Addr}, + sync::Arc, +}; + +pub struct BfdManager { + datastore: Arc, + mgd_clients: HashMap>, +} + +impl BfdManager { + pub fn new( + datastore: Arc, + mgd_clients: HashMap>, + ) -> Self { + Self { datastore, mgd_clients } + } +} + +struct BfdSessionKey { + switch: SwitchLocation, + local: Option, + remote: IpAddr, + detection_threshold: u8, + required_rx: u64, + mode: BfdMode, +} + +impl BfdSessionKey { + fn needs_update(&self, target: &BfdSessionKey) -> bool { + self.detection_threshold != target.detection_threshold + || self.required_rx != target.required_rx + || self.mode != target.mode + } +} + +impl Hash for BfdSessionKey { + fn hash(&self, state: &mut H) { + self.switch.to_string().hash(state); + self.remote.hash(state); + } +} + +impl PartialEq for BfdSessionKey { + fn eq(&self, other: &Self) -> bool { + self.switch.eq(&other.switch) && self.remote.eq(&other.remote) + } +} + +impl Eq for BfdSessionKey {} + +impl From for BfdSessionKey { + fn from(value: BfdSession) -> Self { + Self { + switch: value.switch.parse().unwrap(), //TODO unwrap + remote: value.remote.ip(), + local: value.local.map(|x| x.ip()), + detection_threshold: value + .detection_threshold + .0 + .try_into() + .unwrap(), //TODO unwrap + required_rx: value.required_rx.0.into(), + mode: value.mode, + } + } +} + +impl BackgroundTask for BfdManager { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + + let target: HashSet = match self + .datastore + .bfd_session_list(opctx, &DataPageParams::max_page()) + .await + { + Ok(sessions) => sessions.into_iter().map(Into::into).collect(), + Err(e) => { + error!(&log, "failed to get bfd sessions from db"; + "error" => e.to_string() + ); + return json!({ + "error": + format!( + "failed to get bfd sessions from db: \ + {:#}", + e + ) + }); + } + }; + + let mut current: HashSet = HashSet::new(); + + for (location, c) in &self.mgd_clients { + let client_current = match c.inner.get_bfd_peers().await { + Ok(x) => x.into_inner(), + Err(e) => { + error!(&log, "failed to get bfd sessions from mgd: {}", + c.inner.baseurl(); + "error" => e.to_string() + ); + continue; + } + }; + for info in &client_current { + current.insert(BfdSessionKey { + local: Some(info.config.listen), + remote: info.config.peer, + detection_threshold: info.config.detection_threshold, + required_rx: info.config.required_rx, + switch: *location, + mode: match info.config.mode { + SessionMode::SingleHop => BfdMode::SingleHop, + SessionMode::MultiHop => BfdMode::MultiHop, + }, + }); + } + } + + let to_add: HashSet<&BfdSessionKey> = + target.difference(¤t).collect(); + + let to_del: HashSet<&BfdSessionKey> = + current.difference(&target).collect(); + + let to_check: HashSet<&BfdSessionKey> = + target.intersection(¤t).collect(); + + let mut to_update: HashSet<&BfdSessionKey> = HashSet::new(); + for x in &to_check { + let c = current.get(x).unwrap(); + let t = target.get(x).unwrap(); + if c.needs_update(&t) { + to_update.insert(t); + } + } + + for x in &to_add { + let mg = match self.mgd_clients.get(&x.switch) { + Some(mg) => mg, + None => { + error!(&log, "failed to get mg client"; + "switch" => x.switch.to_string(), + ); + continue; + } + }; + if let Err(e) = mg + .inner + .add_bfd_peer(&BfdPeerConfig { + peer: x.remote, + detection_threshold: x.detection_threshold, + listen: x.local.unwrap_or(Ipv4Addr::UNSPECIFIED.into()), + mode: match x.mode { + BfdMode::SingleHop => SessionMode::SingleHop, + BfdMode::MultiHop => SessionMode::MultiHop, + }, + required_rx: x.required_rx, + }) + .await + { + error!(&log, "failed to add bfd peer to switch daemon"; + "error" => e.to_string(), + "switch" => x.switch.to_string(), + ); + } + } + + for x in &to_del { + let mg = match self.mgd_clients.get(&x.switch) { + Some(mg) => mg, + None => { + error!(&log, "failed to get mg client"; + "switch" => x.switch.to_string(), + ); + continue; + } + }; + if let Err(e) = mg.inner.remove_bfd_peer(&x.remote).await { + error!(&log, "failed to remove bfd peer from switch daemon"; + "error" => e.to_string(), + "switch" => x.switch.to_string(), + ); + } + } + + // TODO parameter updates + // https://github.com/oxidecomputer/omicron/issues/4921 + + json!({}) + } + .boxed() + } +} diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 49ac6d93e2..6eacb07dfa 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -4,6 +4,7 @@ //! Background task initialization +use super::bfd; use super::common; use super::dns_config; use super::dns_propagation; @@ -52,6 +53,9 @@ pub struct BackgroundTasks { /// task handle for the ipv4 nat entry garbage collector pub nat_cleanup: common::TaskHandle, + /// task handle for the switch bfd manager + pub bfd_manager: common::TaskHandle, + /// task handle for the task that collects inventory pub task_inventory_collection: common::TaskHandle, @@ -69,6 +73,7 @@ impl BackgroundTasks { datastore: Arc, config: &BackgroundTaskConfig, dpd_clients: &HashMap>, + mgd_clients: &HashMap>, nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, ) -> BackgroundTasks { @@ -110,8 +115,7 @@ impl BackgroundTasks { (task, watcher_channel) }; - let dpd_clients: Vec<_> = - dpd_clients.values().map(|client| client.clone()).collect(); + let dpd_clients: Vec<_> = dpd_clients.values().cloned().collect(); let nat_cleanup = { driver.register( @@ -130,6 +134,23 @@ impl BackgroundTasks { ) }; + let bfd_manager = { + driver.register( + "bfd_manager".to_string(), + String::from( + "Manages bidirectional fowarding detection (BFD) \ + configuration on rack switches", + ), + config.bfd_manager.period_secs, + Box::new(bfd::BfdManager::new( + datastore.clone(), + mgd_clients.clone(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + // Background task: inventory collector let task_inventory_collection = { let collector = inventory_collection::InventoryCollector::new( @@ -196,6 +217,7 @@ impl BackgroundTasks { task_external_endpoints, external_endpoints, nat_cleanup, + bfd_manager, task_inventory_collection, task_phantom_disks, task_service_zone_nat_tracker, diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 166fc2654b..dc9eff7d79 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -4,6 +4,7 @@ //! Background tasks +mod bfd; mod common; mod dns_config; mod dns_propagation; diff --git a/nexus/src/app/bfd.rs b/nexus/src/app/bfd.rs new file mode 100644 index 0000000000..2d95ad9a58 --- /dev/null +++ b/nexus/src/app/bfd.rs @@ -0,0 +1,105 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::external_api::params; +use mg_admin_client::types::BfdPeerState; +use nexus_db_queries::context::OpContext; +use nexus_types::external_api::shared::{BfdState, BfdStatus}; +use omicron_common::api::{external::Error, internal::shared::SwitchLocation}; +use std::sync::Arc; + +impl super::Nexus { + fn mg_client_for_switch_location( + &self, + switch: SwitchLocation, + ) -> Result, Error> { + let mg_client: Arc = self + .mg_clients + .get(&switch) + .ok_or_else(|| { + Error::not_found_by_name( + omicron_common::api::external::ResourceType::Switch, + &switch.to_string().parse().unwrap(), + ) + })? + .clone(); + + Ok(mg_client) + } + + pub async fn bfd_enable( + &self, + opctx: &OpContext, + session: params::BfdSessionEnable, + ) -> Result<(), Error> { + // add the bfd session to the db and trigger the bfd manager to handle + // the reset + self.datastore().bfd_session_create(opctx, &session).await?; + self.background_tasks + .driver + .activate(&self.background_tasks.bfd_manager); + Ok(()) + } + + pub async fn bfd_disable( + &self, + opctx: &OpContext, + session: params::BfdSessionDisable, + ) -> Result<(), Error> { + // remove the bfd session from the db and trigger the bfd manager to + // handle the reset + self.datastore().bfd_session_delete(opctx, &session).await?; + self.background_tasks + .driver + .activate(&self.background_tasks.bfd_manager); + Ok(()) + } + + pub async fn bfd_status( + &self, + _opctx: &OpContext, + ) -> Result, Error> { + // ask each rack switch about all its BFD sessions. This will need to + // be updated for multirack. + let mut result = Vec::new(); + for s in &[SwitchLocation::Switch0, SwitchLocation::Switch1] { + let mg_client = self.mg_client_for_switch_location(*s)?; + let status = mg_client + .inner + .get_bfd_peers() + .await + .map_err(|e| { + Error::internal_error(&format!( + "maghemite get bfd peers: {e}" + )) + })? + .into_inner(); + + for info in status.iter() { + result.push(BfdStatus { + peer: info.config.peer, + state: match info.state { + BfdPeerState::Up => BfdState::Up, + BfdPeerState::Down => BfdState::Down, + BfdPeerState::Init => BfdState::Init, + BfdPeerState::AdminDown => BfdState::AdminDown, + }, + switch: s.to_string().parse().unwrap(), + local: Some(info.config.listen), + detection_threshold: info.config.detection_threshold, + required_rx: info.config.required_rx, + mode: match info.config.mode { + mg_admin_client::types::SessionMode::SingleHop => { + params::BfdMode::SingleHop + } + mg_admin_client::types::SessionMode::MultiHop => { + params::BfdMode::MultiHop + } + }, + }) + } + } + Ok(result) + } +} diff --git a/nexus/src/app/bgp.rs b/nexus/src/app/bgp.rs index e800d72bdd..51d22dbced 100644 --- a/nexus/src/app/bgp.rs +++ b/nexus/src/app/bgp.rs @@ -1,3 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + use crate::app::authz; use crate::external_api::params; use nexus_db_model::{BgpAnnounceSet, BgpAnnouncement, BgpConfig}; diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 8d997902a5..65525557b3 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -35,6 +35,7 @@ use uuid::Uuid; // by resource. mod address_lot; pub(crate) mod background; +mod bfd; mod bgp; mod certificate; mod deployment; @@ -365,6 +366,7 @@ impl Nexus { Arc::clone(&db_datastore), &config.pkg.background_tasks, &dpd_clients, + &mg_clients, config.deployment.id, resolver.clone(), ); diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 14c6f4bd28..ccd8cebad6 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -41,6 +41,7 @@ use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; +use nexus_types::external_api::shared::BfdStatus; use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; @@ -274,6 +275,10 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(networking_bgp_announce_set_list)?; api.register(networking_bgp_announce_set_delete)?; + api.register(networking_bfd_enable)?; + api.register(networking_bfd_disable)?; + api.register(networking_bfd_status)?; + api.register(utilization_view)?; // Fleet-wide API operations @@ -3534,6 +3539,68 @@ async fn networking_bgp_announce_set_delete( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Enable a BFD session. +#[endpoint { + method = POST, + path = "/v1/system/networking/bfd-enable", + tags = ["system/networking"], +}] +async fn networking_bfd_enable( + rqctx: RequestContext>, + session: TypedBody, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + nexus.bfd_enable(&opctx, session.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Disable a BFD session. +#[endpoint { + method = POST, + path = "/v1/system/networking/bfd-disable", + tags = ["system/networking"], +}] +async fn networking_bfd_disable( + rqctx: RequestContext>, + session: TypedBody, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + nexus.bfd_disable(&opctx, session.into_inner()).await?; + Ok(HttpResponseUpdatedNoContent {}) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Get BFD status. +#[endpoint { + method = GET, + path = "/v1/system/networking/bfd-status", + tags = ["system/networking"], +}] +async fn networking_bfd_status( + rqctx: RequestContext>, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let status = nexus.bfd_status(&opctx).await?; + Ok(HttpResponseOk(status)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Images /// List images diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 476b8fe6c8..a795f57f4c 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -91,6 +91,7 @@ dns_external.max_concurrent_server_updates = 5 # them (on a sunny day). external_endpoints.period_secs = 60 nat_cleanup.period_secs = 30 +bfd_manager.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 4ce5901d08..38e248471b 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -578,6 +578,31 @@ pub const DEMO_BGP_STATUS_URL: &'static str = pub const DEMO_BGP_ROUTES_IPV4_URL: &'static str = "/v1/system/networking/bgp-routes-ipv4?asn=47"; +pub const DEMO_BFD_STATUS_URL: &'static str = + "/v1/system/networking/bfd-status"; + +pub const DEMO_BFD_ENABLE_URL: &'static str = + "/v1/system/networking/bfd-enable"; + +pub const DEMO_BFD_DISABLE_URL: &'static str = + "/v1/system/networking/bfd-disable"; + +pub static DEMO_BFD_ENABLE: Lazy = + Lazy::new(|| params::BfdSessionEnable { + local: None, + remote: "10.0.0.1".parse().unwrap(), + detection_threshold: 3, + required_rx: 1000000, + switch: "switch0".parse().unwrap(), + mode: params::BfdMode::MultiHop, + }); + +pub static DEMO_BFD_DISABLE: Lazy = + Lazy::new(|| params::BfdSessionDisable { + remote: "10.0.0.1".parse().unwrap(), + switch: "switch0".parse().unwrap(), + }); + // Project Images pub static DEMO_IMAGE_NAME: Lazy = Lazy::new(|| "demo-image".parse().unwrap()); @@ -2203,6 +2228,37 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + VerifyEndpoint { + url: &DEMO_BFD_STATUS_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::GetNonexistent, + ], + }, + + VerifyEndpoint { + url: &DEMO_BFD_ENABLE_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_BFD_ENABLE).unwrap() + ) + ], + }, + + VerifyEndpoint { + url: &DEMO_BFD_DISABLE_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_BFD_DISABLE).unwrap() + ) + ], + }, + // Floating IPs VerifyEndpoint { url: &DEMO_PROJECT_URL_FIPS, diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index ec993f482d..7ed73fd30a 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -164,6 +164,9 @@ networking_address_lot_block_list GET /v1/system/networking/address- networking_address_lot_create POST /v1/system/networking/address-lot networking_address_lot_delete DELETE /v1/system/networking/address-lot/{address_lot} networking_address_lot_list GET /v1/system/networking/address-lot +networking_bfd_disable POST /v1/system/networking/bfd-disable +networking_bfd_enable POST /v1/system/networking/bfd-enable +networking_bfd_status GET /v1/system/networking/bfd-status networking_bgp_announce_set_create POST /v1/system/networking/bgp-announce networking_bgp_announce_set_delete DELETE /v1/system/networking/bgp-announce networking_bgp_announce_set_list GET /v1/system/networking/bgp-announce diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 84336c9099..bda6a876ee 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1796,6 +1796,60 @@ pub struct BgpStatusSelector { pub name_or_id: NameOrId, } +#[derive( + Clone, + Copy, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + Ord, + PartialOrd, +)] +#[serde(rename_all = "snake_case")] +pub enum BfdMode { + SingleHop, + MultiHop, +} + +/// Information about a bidirectional forwarding detection (BFD) session. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct BfdSessionEnable { + /// Address the Oxide switch will listen on for BFD traffic. If `None` then + /// the unspecified address (0.0.0.0 or ::) is used. + pub local: Option, + + /// Address of the remote peer to establish a BFD session with. + pub remote: IpAddr, + + /// The negotiated Control packet transmission interval, multiplied by this + /// variable, will be the Detection Time for this session (as seen by the + /// remote system) + pub detection_threshold: u8, + + /// The minimum interval, in microseconds, between received BFD + /// Control packets that this system requires + pub required_rx: u64, + + /// The switch to enable this session on. Must be `switch0` or `switch1`. + pub switch: Name, + + /// Select either single-hop (RFC 5881) or multi-hop (RFC 5883) + pub mode: BfdMode, +} + +/// Information needed to disable a BFD session +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct BfdSessionDisable { + /// Address of the remote peer to disable a BFD session for. + pub remote: IpAddr, + + /// The switch to enable this session on. Must be `switch0` or `switch1`. + pub switch: Name, +} + /// A set of addresses associated with a port configuration. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct AddressConfig { diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index f6b4db18a3..5fb7705631 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -4,6 +4,9 @@ //! Types that are used as both views and params +use std::net::IpAddr; + +use omicron_common::api::external::Name; use parse_display::FromStr; use schemars::JsonSchema; use serde::de::Error as _; @@ -15,6 +18,8 @@ use uuid::Uuid; pub use omicron_common::address::{IpRange, Ipv4Range, Ipv6Range}; +use super::params::BfdMode; + /// Maximum number of role assignments allowed on any one resource // Today's implementation assumes a relatively small number of role assignments // per resource. Things should work if we bump this up, but we'll want to look @@ -283,6 +288,54 @@ pub struct UninitializedSled { pub cubby: u16, } +#[derive( + Clone, + Debug, + Serialize, + Deserialize, + JsonSchema, + PartialOrd, + Ord, + PartialEq, + Eq, +)] +#[serde(rename_all = "snake_case")] +pub enum BfdState { + /// A stable down state. Non-responsive to incoming messages. + AdminDown = 0, + + /// The initial state. + Down = 1, + + /// The peer has detected a remote peer in the down state. + Init = 2, + + /// The peer has detected a remote peer in the up or init state while in the + /// init state. + Up = 3, +} + +#[derive( + Clone, + Debug, + Serialize, + Deserialize, + JsonSchema, + PartialOrd, + Ord, + PartialEq, + Eq, +)] +pub struct BfdStatus { + pub peer: IpAddr, + pub state: BfdState, + pub switch: Name, + pub local: Option, + pub detection_threshold: u8, + pub required_rx: u64, + pub mode: BfdMode, +} + #[cfg(test)] mod test { use super::Policy; diff --git a/openapi/nexus.json b/openapi/nexus.json index f1da97a55e..98073c8625 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -5904,6 +5904,97 @@ } } }, + "/v1/system/networking/bfd-disable": { + "post": { + "tags": [ + "system/networking" + ], + "summary": "Disable a BFD session.", + "operationId": "networking_bfd_disable", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BfdSessionDisable" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/system/networking/bfd-enable": { + "post": { + "tags": [ + "system/networking" + ], + "summary": "Enable a BFD session.", + "operationId": "networking_bfd_enable", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/BfdSessionEnable" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/system/networking/bfd-status": { + "get": { + "tags": [ + "system/networking" + ], + "summary": "Get BFD status.", + "operationId": "networking_bfd_status", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_BfdStatus", + "type": "array", + "items": { + "$ref": "#/components/schemas/BfdStatus" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/networking/bgp": { "get": { "tags": [ @@ -8565,6 +8656,161 @@ "serial" ] }, + "BfdMode": { + "type": "string", + "enum": [ + "single_hop", + "multi_hop" + ] + }, + "BfdSessionDisable": { + "description": "Information needed to disable a BFD session", + "type": "object", + "properties": { + "remote": { + "description": "Address of the remote peer to disable a BFD session for.", + "type": "string", + "format": "ip" + }, + "switch": { + "description": "The switch to enable this session on. Must be `switch0` or `switch1`.", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + } + }, + "required": [ + "remote", + "switch" + ] + }, + "BfdSessionEnable": { + "description": "Information about a bidirectional forwarding detection (BFD) session.", + "type": "object", + "properties": { + "detection_threshold": { + "description": "The negotiated Control packet transmission interval, multiplied by this variable, will be the Detection Time for this session (as seen by the remote system)", + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "local": { + "nullable": true, + "description": "Address the Oxide switch will listen on for BFD traffic. If `None` then the unspecified address (0.0.0.0 or ::) is used.", + "type": "string", + "format": "ip" + }, + "mode": { + "description": "Select either single-hop (RFC 5881) or multi-hop (RFC 5883)", + "allOf": [ + { + "$ref": "#/components/schemas/BfdMode" + } + ] + }, + "remote": { + "description": "Address of the remote peer to establish a BFD session with.", + "type": "string", + "format": "ip" + }, + "required_rx": { + "description": "The minimum interval, in microseconds, between received BFD Control packets that this system requires", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "switch": { + "description": "The switch to enable this session on. Must be `switch0` or `switch1`.", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + } + }, + "required": [ + "detection_threshold", + "mode", + "remote", + "required_rx", + "switch" + ] + }, + "BfdState": { + "oneOf": [ + { + "description": "A stable down state. Non-responsive to incoming messages.", + "type": "string", + "enum": [ + "admin_down" + ] + }, + { + "description": "The initial state.", + "type": "string", + "enum": [ + "down" + ] + }, + { + "description": "The peer has detected a remote peer in the down state.", + "type": "string", + "enum": [ + "init" + ] + }, + { + "description": "The peer has detected a remote peer in the up or init state while in the init state.", + "type": "string", + "enum": [ + "up" + ] + } + ] + }, + "BfdStatus": { + "type": "object", + "properties": { + "detection_threshold": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "local": { + "nullable": true, + "type": "string", + "format": "ip" + }, + "mode": { + "$ref": "#/components/schemas/BfdMode" + }, + "peer": { + "type": "string", + "format": "ip" + }, + "required_rx": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "state": { + "$ref": "#/components/schemas/BfdState" + }, + "switch": { + "$ref": "#/components/schemas/Name" + } + }, + "required": [ + "detection_threshold", + "mode", + "peer", + "required_rx", + "state", + "switch" + ] + }, "BgpAnnounceSet": { "description": "Represents a BGP announce set by id. The id can be used with other API calls to view and manage the announce set.", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index c34b84eb9d..8944e59c37 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -446,10 +446,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" +source.commit = "712b2487d9b141234af98b6578bc5f77420bdb03" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "442ef3a927ce2f2a401b631daa3c67a708fbbed83a839552a6fbcadd68120783" +source.sha256 = "36e976ae9b1517b358ec7eadd5fb03f5d40d54074ff830a79895f8fc3e643935" output.type = "tarball" [package.mg-ddm] @@ -462,10 +462,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" +source.commit = "712b2487d9b141234af98b6578bc5f77420bdb03" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "81a766a88fab3fe7cb7fb6698ec02d05224320500b7a4421bbea9f4123127fba" +source.sha256 = "bc3137751db24d2e44eca7118f6ca825ed3e9df736480fc210392802cd063dd8" output.type = "zone" output.intermediate_only = true @@ -477,10 +477,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" +source.commit = "712b2487d9b141234af98b6578bc5f77420bdb03" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "bc887e08e3d052d8440983a2d6186cd1d92a52345504092f64f4de2e5335a75d" +source.sha256 = "2c54146a133b5f12587d9fb89f85ef0a0ca6278efc8c6fe4859782e886e6c774" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/31.0.0/up1.sql b/schema/crdb/31.0.0/up1.sql new file mode 100644 index 0000000000..bc42f0cbe6 --- /dev/null +++ b/schema/crdb/31.0.0/up1.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.bfd_mode AS ENUM ( + 'single_hop', + 'multi_hop' +); diff --git a/schema/crdb/31.0.0/up2.sql b/schema/crdb/31.0.0/up2.sql new file mode 100644 index 0000000000..04d472fc99 --- /dev/null +++ b/schema/crdb/31.0.0/up2.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bfd_session ( + id UUID PRIMARY KEY, + local INET, + remote INET NOT NULL, + detection_threshold INT8 NOT NULL, + required_rx INT8 NOT NULL, + switch TEXT NOT NULL, + mode omicron.public.bfd_mode, + + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ +); diff --git a/schema/crdb/31.0.0/up3.sql b/schema/crdb/31.0.0/up3.sql new file mode 100644 index 0000000000..ff1715af83 --- /dev/null +++ b/schema/crdb/31.0.0/up3.sql @@ -0,0 +1,5 @@ +/* Add an index which lets us look up sleds on a rack */ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_bfd_session ON omicron.public.bfd_session ( + remote, + switch +) WHERE time_deleted IS NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 4cb347b260..5db1c98f14 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3371,6 +3371,30 @@ STORING ( time_deleted ); +CREATE TYPE IF NOT EXISTS omicron.public.bfd_mode AS ENUM ( + 'single_hop', + 'multi_hop' +); + +CREATE TABLE IF NOT EXISTS omicron.public.bfd_session ( + id UUID PRIMARY KEY, + local INET, + remote INET NOT NULL, + detection_threshold INT8 NOT NULL, + required_rx INT8 NOT NULL, + switch TEXT NOT NULL, + mode omicron.public.bfd_mode, + + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ +); + +CREATE UNIQUE INDEX IF NOT EXISTS lookup_bfd_session ON omicron.public.bfd_session ( + remote, + switch +) WHERE time_deleted IS NULL; + /* * Metadata for the schema itself. This version number isn't great, as there's * nothing to ensure it gets bumped when it should be, but it's a start. @@ -3418,7 +3442,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '30.0.0', NULL) + ( TRUE, NOW(), NOW(), '31.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index d84bf8d4b0..2eed205ddf 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -39,6 +39,7 @@ dns_external.max_concurrent_server_updates = 5 # them (on a sunny day). external_endpoints.period_secs = 60 nat_cleanup.period_secs = 30 +bfd_manager.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 01206655f0..53bdeaadd6 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -39,6 +39,7 @@ dns_external.max_concurrent_server_updates = 5 # them (on a sunny day). external_endpoints.period_secs = 60 nat_cleanup.period_secs = 30 +bfd_manager.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index a103b117e8..8ee3001179 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="d12bdf89b9058065789cd00c8704e4ce0a352342" +COMMIT="712b2487d9b141234af98b6578bc5f77420bdb03" SHA2="0b0dbc2f8bbc5d2d9be92d64c4865f8f9335355aae62f7de9f67f81dfb3f1803" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 6981c98070..3fa53a9483 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="d12bdf89b9058065789cd00c8704e4ce0a352342" -SHA2="7618511f905d26394ef7c552339dd78835ce36a6def0d85b05b6d1e363a5e7b4" +COMMIT="712b2487d9b141234af98b6578bc5f77420bdb03" +SHA2="0ac038bbaa54d0ae0ac5ccaeff48f03070618372cca26c9d09b716b909bf9355" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 8d0efb473a..1dacea54dc 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="bc887e08e3d052d8440983a2d6186cd1d92a52345504092f64f4de2e5335a75d" -MGD_LINUX_SHA256="93331c1001e3aa506a8c1b83346abba1995e489910bff2c94a86730b96617a34" \ No newline at end of file +CIDL_SHA256="2c54146a133b5f12587d9fb89f85ef0a0ca6278efc8c6fe4859782e886e6c774" +MGD_LINUX_SHA256="248732202f5102bf0947f5f91871379b6c6945fe387d4272cebe6e08f1b58184" \ No newline at end of file From 22dbd545c60522809e0395f9873061b4e1f28928 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Wed, 31 Jan 2024 17:59:16 -0600 Subject: [PATCH 68/91] Bump web console (#4946) https://github.com/oxidecomputer/console/compare/b9013a33...1a4f5d81 * [1a4f5d81](https://github.com/oxidecomputer/console/commit/1a4f5d81) oxidecomputer/console#1910 * [34596e33](https://github.com/oxidecomputer/console/commit/34596e33) oxidecomputer/console#1928 * [ff7cdb28](https://github.com/oxidecomputer/console/commit/ff7cdb28) oxidecomputer/console#1926 * [efa39789](https://github.com/oxidecomputer/console/commit/efa39789) oxidecomputer/console#1867 * [4bfadc02](https://github.com/oxidecomputer/console/commit/4bfadc02) oxidecomputer/console#1927 * [695d3671](https://github.com/oxidecomputer/console/commit/695d3671) oxidecomputer/console#1925 * [30070292](https://github.com/oxidecomputer/console/commit/30070292) better path filter for local files in msw handler * [5cf4339c](https://github.com/oxidecomputer/console/commit/5cf4339c) oxidecomputer/console#1916 * [a26f7c1e](https://github.com/oxidecomputer/console/commit/a26f7c1e) oxidecomputer/console#1922 * [231b93ed](https://github.com/oxidecomputer/console/commit/231b93ed) oxidecomputer/console#1923 * [c8364638](https://github.com/oxidecomputer/console/commit/c8364638) better msw warning filter so we don't get warning noise in console * [764f7310](https://github.com/oxidecomputer/console/commit/764f7310) oxidecomputer/console#1908 * [945619eb](https://github.com/oxidecomputer/console/commit/945619eb) oxidecomputer/console#1921 * [e2d82a4c](https://github.com/oxidecomputer/console/commit/e2d82a4c) oxidecomputer/console#1887 * [d6a67bd5](https://github.com/oxidecomputer/console/commit/d6a67bd5) oxidecomputer/console#1918 * [1fb746f4](https://github.com/oxidecomputer/console/commit/1fb746f4) oxidecomputer/console#1899 * [ca7c85de](https://github.com/oxidecomputer/console/commit/ca7c85de) oxidecomputer/console#1917 * [28598e1d](https://github.com/oxidecomputer/console/commit/28598e1d) oxidecomputer/console#1914 * [34eb478d](https://github.com/oxidecomputer/console/commit/34eb478d) oxidecomputer/console#1912 * [4d693088](https://github.com/oxidecomputer/console/commit/4d693088) bump vite-plugin-html to stop getting deprecation warning * [d5c39549](https://github.com/oxidecomputer/console/commit/d5c39549) oxidecomputer/console#1909 * [7c6f53db](https://github.com/oxidecomputer/console/commit/7c6f53db) oxidecomputer/console#1854 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 0dc0024f2c..197d3014a2 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="b9013a33eaa3f5efdcd5c7d244e36a54e7222295" -SHA2="bebb9800ff94c42897d54faac8c2a3f89b2b0e927ebf75ec74223b6163e4209d" +COMMIT="1a4f5d81af09f238dc094e56ad24dd0aa4fd46a0" +SHA2="97e1ea69a7f2a798c05f1d8e12f7de9bf32d11f70d99a7cbc05dabda1c5b7ce4" From 6a823367deab2af996ede370c6ef9b49161f11a6 Mon Sep 17 00:00:00 2001 From: Justin Bennett Date: Wed, 31 Jan 2024 20:51:15 -0500 Subject: [PATCH 69/91] Hide internal silos in utilization (#4943) Fixes #4708. I've updated the `/v1/system/utilization/silos` endpoint to only return non-discoverable silos if they have a quota set. I believe the `default-silo` _does_ get a quota set currently which is non-ideal, but that should be the only one that shows up on the list. I need specific eyes on the migration b/c I've never written a view migration before. --- nexus/db-model/src/schema.rs | 3 +- nexus/db-model/src/utilization.rs | 1 + .../src/db/datastore/utilization.rs | 8 ++++ nexus/tests/integration_tests/utilization.rs | 43 ++++++++++++++++++- schema/crdb/32.0.0/up.sql | 22 ++++++++++ schema/crdb/dbinit.sql | 5 ++- 6 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 schema/crdb/32.0.0/up.sql diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 99fe41ba34..cfe7daa27e 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(31, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(32, 0, 0); table! { disk (id) { @@ -431,6 +431,7 @@ table! { silo_utilization(silo_id) { silo_id -> Uuid, silo_name -> Text, + silo_discoverable -> Bool, cpus_provisioned -> Int8, memory_provisioned -> Int8, storage_provisioned -> Int8, diff --git a/nexus/db-model/src/utilization.rs b/nexus/db-model/src/utilization.rs index 9bef4f59c7..b0e6324bc9 100644 --- a/nexus/db-model/src/utilization.rs +++ b/nexus/db-model/src/utilization.rs @@ -9,6 +9,7 @@ use uuid::Uuid; pub struct SiloUtilization { pub silo_id: Uuid, pub silo_name: Name, + pub silo_discoverable: bool, pub cpus_allocated: i64, pub memory_allocated: ByteCount, diff --git a/nexus/db-queries/src/db/datastore/utilization.rs b/nexus/db-queries/src/db/datastore/utilization.rs index 4fbe215fe2..826c66043a 100644 --- a/nexus/db-queries/src/db/datastore/utilization.rs +++ b/nexus/db-queries/src/db/datastore/utilization.rs @@ -8,6 +8,7 @@ use crate::db::model::Name; use crate::db::model::SiloUtilization; use crate::db::pagination::paginated; use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::BoolExpressionMethods; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::Error; @@ -50,6 +51,13 @@ impl DataStore { ), } .select(SiloUtilization::as_select()) + .filter( + dsl::silo_discoverable + .eq(true) + .or(dsl::cpus_allocated.gt(0)) + .or(dsl::memory_allocated.gt(0)) + .or(dsl::storage_allocated.gt(0)), + ) .load_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) diff --git a/nexus/tests/integration_tests/utilization.rs b/nexus/tests/integration_tests/utilization.rs index e09e71a9e3..7d733c66b8 100644 --- a/nexus/tests/integration_tests/utilization.rs +++ b/nexus/tests/integration_tests/utilization.rs @@ -29,6 +29,16 @@ async fn test_utilization(cptestctx: &ControlPlaneTestContext) { create_default_ip_pool(&client).await; + // set high quota for test silo + let _ = NexusRequest::object_put( + client, + "/v1/system/silos/test-suite-silo/quotas", + Some(¶ms::SiloQuotasCreate::arbitrarily_high_default()), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await; + let current_util = objects_list_page_authz::( client, "/v1/system/utilization/silos", @@ -36,6 +46,8 @@ async fn test_utilization(cptestctx: &ControlPlaneTestContext) { .await .items; + // `default-silo` should be the only silo that shows up because + // it has a default quota set assert_eq!(current_util.len(), 2); assert_eq!(current_util[0].silo_name, "default-silo"); @@ -47,7 +59,36 @@ async fn test_utilization(cptestctx: &ControlPlaneTestContext) { assert_eq!(current_util[1].silo_name, "test-suite-silo"); assert_eq!(current_util[1].provisioned, SiloQuotasCreate::empty().into()); - assert_eq!(current_util[1].allocated, SiloQuotasCreate::empty().into()); + assert_eq!( + current_util[1].allocated, + SiloQuotasCreate::arbitrarily_high_default().into() + ); + + let _ = NexusRequest::object_put( + client, + "/v1/system/silos/test-suite-silo/quotas", + Some(¶ms::SiloQuotasCreate::empty()), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await; + + let current_util = objects_list_page_authz::( + client, + "/v1/system/utilization/silos", + ) + .await + .items; + + // Now that default-silo is the only one with a quota, it should be the only result + assert_eq!(current_util.len(), 1); + + assert_eq!(current_util[0].silo_name, "default-silo"); + assert_eq!(current_util[0].provisioned, SiloQuotasCreate::empty().into()); + assert_eq!( + current_util[0].allocated, + SiloQuotasCreate::arbitrarily_high_default().into() + ); let _ = create_project(&client, &PROJECT_NAME).await; let _ = create_instance(client, &PROJECT_NAME, &INSTANCE_NAME).await; diff --git a/schema/crdb/32.0.0/up.sql b/schema/crdb/32.0.0/up.sql new file mode 100644 index 0000000000..d6cd1e2cde --- /dev/null +++ b/schema/crdb/32.0.0/up.sql @@ -0,0 +1,22 @@ +CREATE OR REPLACE VIEW omicron.public.silo_utilization +AS SELECT + c.id AS silo_id, + s.name AS silo_name, + c.cpus_provisioned AS cpus_provisioned, + c.ram_provisioned AS memory_provisioned, + c.virtual_disk_bytes_provisioned AS storage_provisioned, + q.cpus AS cpus_allocated, + q.memory_bytes AS memory_allocated, + q.storage_bytes AS storage_allocated, + -- This is the added column + s.discoverable as silo_discoverable +FROM + omicron.public.virtual_provisioning_collection AS c + RIGHT JOIN omicron.public.silo_quotas AS q + ON c.id = q.silo_id + INNER JOIN omicron.public.silo AS s + ON c.id = s.id +WHERE + c.collection_type = 'Silo' +AND + s.time_deleted IS NULL; \ No newline at end of file diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 5db1c98f14..103eb2e0c7 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -862,7 +862,8 @@ AS SELECT c.virtual_disk_bytes_provisioned AS storage_provisioned, q.cpus AS cpus_allocated, q.memory_bytes AS memory_allocated, - q.storage_bytes AS storage_allocated + q.storage_bytes AS storage_allocated, + s.discoverable as silo_discoverable FROM omicron.public.virtual_provisioning_collection AS c RIGHT JOIN omicron.public.silo_quotas AS q @@ -3442,7 +3443,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '31.0.0', NULL) + ( TRUE, NOW(), NOW(), '32.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 1ac4cc3bade99581747d21de7816aa82a182714a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 07:20:15 +0000 Subject: [PATCH 70/91] Update Rust crate indexmap to 2.2.2 (#4928) --- Cargo.lock | 36 ++++++++++++++++++------------------ Cargo.toml | 2 +- update-engine/src/buffer.rs | 2 +- workspace-hack/Cargo.toml | 4 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d40d2a5839..db80bfd091 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1939,7 +1939,7 @@ dependencies = [ "hostname", "http 0.2.11", "hyper 0.14.27", - "indexmap 2.1.0", + "indexmap 2.2.2", "multer", "openapiv3", "paste", @@ -2732,7 +2732,7 @@ dependencies = [ "debug-ignore", "fixedbitset", "guppy-workspace-hack", - "indexmap 2.1.0", + "indexmap 2.2.2", "itertools 0.12.0", "nested", "once_cell", @@ -2764,7 +2764,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.11", - "indexmap 2.1.0", + "indexmap 2.2.2", "slab", "tokio", "tokio-util", @@ -3359,9 +3359,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.1.0" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" dependencies = [ "equivalent", "hashbrown 0.14.2", @@ -5288,7 +5288,7 @@ dependencies = [ "hex", "hmac", "hyper 0.14.27", - "indexmap 2.1.0", + "indexmap 2.2.2", "inout", "ipnetwork", "itertools 0.10.5", @@ -5407,7 +5407,7 @@ version = "0.4.0" source = "git+https://github.com/oxidecomputer/openapi-lint?branch=main#ef442ee4343e97b6d9c217d3e7533962fe7d7236" dependencies = [ "heck 0.4.1", - "indexmap 2.1.0", + "indexmap 2.2.2", "lazy_static", "openapiv3", "regex", @@ -5419,7 +5419,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.2", "serde", "serde_json", ] @@ -5676,7 +5676,7 @@ dependencies = [ "expectorate", "futures", "highway", - "indexmap 2.1.0", + "indexmap 2.2.2", "itertools 0.12.0", "omicron-common", "omicron-test-utils", @@ -6052,7 +6052,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.1.0", + "indexmap 2.2.2", "serde", "serde_derive", ] @@ -6469,7 +6469,7 @@ dependencies = [ "getopts", "heck 0.4.1", "http 0.2.11", - "indexmap 2.1.0", + "indexmap 2.2.2", "openapiv3", "proc-macro2", "quote", @@ -6491,7 +6491,7 @@ dependencies = [ "getopts", "heck 0.4.1", "http 0.2.11", - "indexmap 2.1.0", + "indexmap 2.2.2", "openapiv3", "proc-macro2", "quote", @@ -7860,7 +7860,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.1.0", + "indexmap 2.2.2", "serde", "serde_json", "serde_with_macros", @@ -7885,7 +7885,7 @@ version = "0.9.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.2", "itoa", "ryu", "serde", @@ -9220,7 +9220,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.2", "serde", "serde_spanned", "toml_datetime", @@ -9233,7 +9233,7 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.2.2", "serde", "serde_spanned", "toml_datetime", @@ -9752,7 +9752,7 @@ dependencies = [ "derive-where", "either", "futures", - "indexmap 2.1.0", + "indexmap 2.2.2", "indicatif", "libsw", "linear-map", @@ -10165,7 +10165,7 @@ dependencies = [ "crossterm", "futures", "humantime", - "indexmap 2.1.0", + "indexmap 2.2.2", "indicatif", "itertools 0.12.0", "omicron-common", diff --git a/Cargo.toml b/Cargo.toml index e1e4d40736..07e54a0cb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -224,7 +224,7 @@ hyper = "0.14" hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } -indexmap = "2.1.0" +indexmap = "2.2.2" indicatif = { version = "0.17.7", features = ["rayon"] } installinator = { path = "installinator" } installinator-artifactd = { path = "installinator-artifactd" } diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 04363ffc26..2359ecc03f 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -113,7 +113,7 @@ impl EventBuffer { // XXX: more efficient algorithm let root_execution_id = self.root_execution_id()?; let mut summary = self.steps().summarize(); - summary.remove(&root_execution_id) + summary.swap_remove(&root_execution_id) } /// Returns information about each step, as currently tracked by the buffer, diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index bf01830630..658514e042 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -60,7 +60,7 @@ hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } -indexmap = { version = "2.1.0", features = ["serde"] } +indexmap = { version = "2.2.2", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } @@ -168,7 +168,7 @@ hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } -indexmap = { version = "2.1.0", features = ["serde"] } +indexmap = { version = "2.2.2", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } From a5fd50b6ed4ae59b8baaa3c6992792dc21f44d6d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 08:45:37 +0000 Subject: [PATCH 71/91] Update taiki-e/install-action digest to 7dcb31b (#4951) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`bee85d7` -> `7dcb31b`](https://togithub.com/taiki-e/install-action/compare/bee85d7...7dcb31b) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 85aa0ab7f4..4eec09c455 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@bee85d7ea77c01f7a403c22ac2c802b431b093df # v2 + uses: taiki-e/install-action@7dcb31b8033f96afe112f0df83dcb01f9969d23b # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 24b150049d2386c2174bc5159bb8aa333dc92ee1 Mon Sep 17 00:00:00 2001 From: liffy <629075+lifning@users.noreply.github.com> Date: Thu, 1 Feb 2024 03:30:18 -0800 Subject: [PATCH 72/91] Unit tests for DumpSetup (#3788) Verifies decision-making in different combinations of M.2/U.2 dataset and dump slice availability and occupancy, and tests log/core-archiving. (functionality that had been implemented for #2478) --- Cargo.toml | 2 +- illumos-utils/src/coreadm.rs | 99 ++-- illumos-utils/src/dumpadm.rs | 194 +++----- sled-agent/src/dump_setup.rs | 924 ++++++++++++++++++++++++++++++----- 4 files changed, 922 insertions(+), 297 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 07e54a0cb3..72952ff643 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -408,7 +408,7 @@ wicket-common = { path = "wicket-common" } wicketd-client = { path = "clients/wicketd-client" } zeroize = { version = "1.7.0", features = ["zeroize_derive", "std"] } zip = { version = "0.6.6", default-features = false, features = ["deflate","bzip2"] } -zone = { version = "0.3", default-features = false, features = ["async"] } +zone = { version = "0.3", default-features = false, features = ["async", "sync"] } # NOTE: The test profile inherits from the dev profile, so settings under # profile.dev get inherited. AVOID setting anything under profile.test: that diff --git a/illumos-utils/src/coreadm.rs b/illumos-utils/src/coreadm.rs index 543dbca239..00e31a3309 100644 --- a/illumos-utils/src/coreadm.rs +++ b/illumos-utils/src/coreadm.rs @@ -1,62 +1,57 @@ -use camino::Utf8PathBuf; -use std::ffi::OsString; -use std::os::unix::ffi::OsStringExt; +use crate::{execute, ExecutionError}; use std::process::Command; -#[derive(thiserror::Error, Debug)] -pub enum CoreAdmError { - #[error("Error obtaining or modifying coreadm configuration. core_dir: {core_dir:?}")] - Execution { core_dir: Utf8PathBuf }, - - #[error("Invalid invocation of coreadm: {0:?} {1:?}")] - InvalidCommand(Vec, OsString), +const COREADM: &str = "/usr/bin/coreadm"; - #[error("coreadm process was terminated by a signal.")] - TerminatedBySignal, +pub struct CoreAdm { + cmd: Command, +} - #[error("coreadm invocation exited with unexpected return code {0}")] - UnexpectedExitCode(i32), +pub enum CoreFileOption { + Global, + GlobalSetid, + Log, + Process, + ProcSetid, +} - #[error("Failed to execute dumpadm process: {0}")] - Exec(std::io::Error), +impl AsRef for CoreFileOption { + fn as_ref(&self) -> &str { + match self { + CoreFileOption::Global => "global", + CoreFileOption::GlobalSetid => "global-setid", + CoreFileOption::Log => "log", + CoreFileOption::Process => "process", + CoreFileOption::ProcSetid => "proc-setid", + } + } } -const COREADM: &str = "/usr/bin/coreadm"; +impl CoreAdm { + pub fn new() -> Self { + let mut cmd = Command::new(COREADM); + cmd.env_clear(); + Self { cmd } + } -pub fn coreadm(core_dir: &Utf8PathBuf) -> Result<(), CoreAdmError> { - let mut cmd = Command::new(COREADM); - cmd.env_clear(); - - // disable per-process core patterns - cmd.arg("-d").arg("process"); - cmd.arg("-d").arg("proc-setid"); - - // use the global core pattern - cmd.arg("-e").arg("global"); - cmd.arg("-e").arg("global-setid"); - - // set the global pattern to place all cores into core_dir, - // with filenames of "core.[zone-name].[exe-filename].[pid].[time]" - cmd.arg("-g").arg(core_dir.join("core.%z.%f.%p.%t")); - - // also collect DWARF data from the exe and its library deps - cmd.arg("-G").arg("default+debug"); - - let out = cmd.output().map_err(CoreAdmError::Exec)?; - - match out.status.code() { - Some(0) => Ok(()), - Some(1) => Err(CoreAdmError::Execution { core_dir: core_dir.clone() }), - Some(2) => { - // unwrap: every arg we've provided in this function is UTF-8 - let mut args = - vec![cmd.get_program().to_str().unwrap().to_string()]; - cmd.get_args() - .for_each(|arg| args.push(arg.to_str().unwrap().to_string())); - let stderr = OsString::from_vec(out.stderr); - Err(CoreAdmError::InvalidCommand(args, stderr)) - } - Some(n) => Err(CoreAdmError::UnexpectedExitCode(n)), - None => Err(CoreAdmError::TerminatedBySignal), + pub fn disable(&mut self, opt: CoreFileOption) { + self.cmd.arg("-d").arg(opt.as_ref()); + } + + pub fn enable(&mut self, opt: CoreFileOption) { + self.cmd.arg("-e").arg(opt.as_ref()); + } + + pub fn global_pattern(&mut self, pat: impl AsRef) { + self.cmd.arg("-g").arg(pat); + } + + pub fn global_contents(&mut self, contents: &str) { + self.cmd.arg("-G").arg(contents); + } + + pub fn execute(mut self) -> Result<(), ExecutionError> { + execute(&mut self.cmd)?; + Ok(()) } } diff --git a/illumos-utils/src/dumpadm.rs b/illumos-utils/src/dumpadm.rs index feb470e494..e37874f795 100644 --- a/illumos-utils/src/dumpadm.rs +++ b/illumos-utils/src/dumpadm.rs @@ -1,3 +1,4 @@ +use crate::{execute, ExecutionError}; use byteorder::{LittleEndian, ReadBytesExt}; use camino::Utf8PathBuf; use std::ffi::OsString; @@ -6,6 +7,17 @@ use std::io::{Seek, SeekFrom}; use std::os::unix::ffi::OsStringExt; use std::process::Command; +pub const DUMPADM: &str = "/usr/sbin/dumpadm"; +pub const SAVECORE: &str = "/usr/bin/savecore"; + +// values from /usr/src/uts/common/sys/dumphdr.h: +pub const DUMP_OFFSET: u64 = 65536; // pad at start/end of dev + +pub const DUMP_MAGIC: u32 = 0xdefec8ed; // weird hex but ok +pub const DUMP_VERSION: u32 = 10; // version of this dumphdr + +pub const DF_VALID: u32 = 0x00000001; // Dump is valid (savecore clears) + #[derive(thiserror::Error, Debug)] pub enum DumpHdrError { #[error("I/O error while attempting to open raw disk: {0}")] @@ -39,14 +51,6 @@ pub enum DumpHdrError { pub fn dump_flag_is_valid( dump_slice: &Utf8PathBuf, ) -> Result { - // values from /usr/src/uts/common/sys/dumphdr.h: - const DUMP_OFFSET: u64 = 65536; // pad at start/end of dev - - const DUMP_MAGIC: u32 = 0xdefec8ed; // weird hex but ok - const DUMP_VERSION: u32 = 10; // version of this dumphdr - - const DF_VALID: u32 = 0x00000001; // Dump is valid (savecore clears) - let mut f = File::open(dump_slice).map_err(DumpHdrError::OpenRaw)?; f.seek(SeekFrom::Start(DUMP_OFFSET)).map_err(DumpHdrError::Seek)?; @@ -75,134 +79,86 @@ pub fn dump_flag_is_valid( Ok((flags & DF_VALID) != 0) } -const DUMPADM: &str = "/usr/sbin/dumpadm"; -const SAVECORE: &str = "/usr/bin/savecore"; - -#[derive(thiserror::Error, Debug)] -pub enum DumpAdmError { - #[error("Error obtaining or modifying dump configuration. dump_slice: {dump_slice}, savecore_dir: {savecore_dir:?}")] - Execution { dump_slice: Utf8PathBuf, savecore_dir: Option }, - - #[error("Invalid invocation of dumpadm: {0:?} {1:?}")] - InvalidCommand(Vec, OsString), +pub enum DumpContentType { + Kernel, + All, + CurProc, +} - #[error("dumpadm process was terminated by a signal.")] - TerminatedBySignal, +impl AsRef for DumpContentType { + fn as_ref(&self) -> &str { + match self { + DumpContentType::Kernel => "kernel", + DumpContentType::All => "all", + DumpContentType::CurProc => "curproc", + } + } +} - #[error("dumpadm invocation exited with unexpected return code {0}")] - UnexpectedExitCode(i32), +/// Invokes `dumpadm(8)` to configure the kernel to dump core into the given +/// `dump_slice` block device in the event of a panic. +pub struct DumpAdm { + cmd: Command, + content_type: Option, + dump_slice: Utf8PathBuf, + savecore_dir: Utf8PathBuf, +} - #[error( - "Failed to create placeholder savecore directory at /tmp/crash: {0}" - )] - Mkdir(std::io::Error), +impl DumpAdm { + pub fn new(dump_slice: Utf8PathBuf, savecore_dir: Utf8PathBuf) -> Self { + let mut cmd = Command::new(DUMPADM); + cmd.env_clear(); - #[error("savecore failed: {0:?}")] - SavecoreFailure(OsString), + Self { cmd, content_type: None, dump_slice, savecore_dir } + } - #[error("Failed to execute dumpadm process: {0}")] - ExecDumpadm(std::io::Error), + pub fn content_type(&mut self, ctype: DumpContentType) { + self.content_type = Some(ctype); + } - #[error("Failed to execute savecore process: {0}")] - ExecSavecore(std::io::Error), -} + pub fn compress(&mut self, on: bool) { + let arg = if on { "on" } else { "off" }; + self.cmd.arg("-z").arg(arg); + } -/// Invokes `dumpadm(8)` to configure the kernel to dump core into the given -/// `dump_slice` block device in the event of a panic. If a core is already -/// present in that block device, and a `savecore_dir` is provided, this -/// function also invokes `savecore(8)` to save it into that directory. -/// On success, returns Ok(Some(stdout)) if `savecore(8)` was invoked, or -/// Ok(None) if it wasn't. -pub fn dumpadm( - dump_slice: &Utf8PathBuf, - savecore_dir: Option<&Utf8PathBuf>, -) -> Result, DumpAdmError> { - let mut cmd = Command::new(DUMPADM); - cmd.env_clear(); - - // Include memory from the current process if there is one for the panic - // context, in addition to kernel memory: - cmd.arg("-c").arg("curproc"); - - // Use the given block device path for dump storage: - cmd.arg("-d").arg(dump_slice); - - // Compress crash dumps: - cmd.arg("-z").arg("on"); - - // Do not run savecore(8) automatically on boot (irrelevant anyhow, as the - // config file being mutated by dumpadm won't survive reboots on gimlets). - // The sled-agent will invoke it manually instead. - cmd.arg("-n"); - - if let Some(savecore_dir) = savecore_dir { - // Run savecore(8) to place the existing contents of dump_slice (if - // any) into savecore_dir, and clear the presence flag. - cmd.arg("-s").arg(savecore_dir); - } else { - // if we don't have a savecore destination yet, still create and use - // a tmpfs path (rather than the default location under /var/crash, - // which is in the ramdisk pool), because dumpadm refuses to do what - // we ask otherwise. - let tmp_crash = "/tmp/crash"; - std::fs::create_dir_all(tmp_crash).map_err(DumpAdmError::Mkdir)?; - - cmd.arg("-s").arg(tmp_crash); + pub fn no_boot_time_savecore(&mut self) { + self.cmd.arg("-n"); } - let out = cmd.output().map_err(DumpAdmError::ExecDumpadm)?; - - match out.status.code() { - Some(0) => { - // do we have a destination for the saved dump - if savecore_dir.is_some() { - // and does the dump slice have one to save off - if let Ok(true) = dump_flag_is_valid(dump_slice) { - return savecore(); - } - } - Ok(None) - } - Some(1) => Err(DumpAdmError::Execution { - dump_slice: dump_slice.clone(), - savecore_dir: savecore_dir.cloned(), - }), - Some(2) => { - // unwrap: every arg we've provided in this function is UTF-8 - let mut args = - vec![cmd.get_program().to_str().unwrap().to_string()]; - cmd.get_args() - .for_each(|arg| args.push(arg.to_str().unwrap().to_string())); - let stderr = OsString::from_vec(out.stderr); - Err(DumpAdmError::InvalidCommand(args, stderr)) + pub fn execute(mut self) -> Result<(), ExecutionError> { + if let Some(ctype) = self.content_type { + self.cmd.arg("-c").arg(ctype.as_ref()); } - Some(n) => Err(DumpAdmError::UnexpectedExitCode(n)), - None => Err(DumpAdmError::TerminatedBySignal), + self.cmd.arg("-d").arg(self.dump_slice); + self.cmd.arg("-s").arg(self.savecore_dir); + + execute(&mut self.cmd)?; + Ok(()) } } -// invokes savecore(8) according to the system-wide config set by dumpadm. -// savecore(8) creates a file in the savecore directory called `vmdump.`, -// where `` is the number in the neighboring plaintext file called `bounds`, -// or 0 if the file doesn't exist. -// if savecore(8) successfully copies the data from the dump slice to the -// vmdump file, it clears the "valid" flag in the dump slice's header and -// increments the number in `bounds` by 1. -// in the event that savecore(8) terminates before it finishes copying the -// dump, the incomplete dump will remain in the target directory, but the next -// invocation will overwrite it, because `bounds` wasn't created/incremented. -fn savecore() -> Result, DumpAdmError> { - let mut cmd = Command::new(SAVECORE); - cmd.env_clear(); - cmd.arg("-v"); - let out = cmd.output().map_err(DumpAdmError::ExecSavecore)?; - if out.status.success() { +pub struct SaveCore; + +impl SaveCore { + /// Invokes savecore(8) according to the system-wide config set by dumpadm. + /// savecore(8) creates a file in the savecore directory called `vmdump.`, + /// where `` is the number in the neighboring plaintext file called `bounds`, + /// or 0 if the file doesn't exist. + /// If savecore(8) successfully copies the data from the dump slice to the + /// vmdump file, it clears the "valid" flag in the dump slice's header and + /// increments the number in `bounds` by 1. + /// In the event that savecore(8) terminates before it finishes copying the + /// dump, the incomplete dump will remain in the target directory, but the next + /// invocation will overwrite it, because `bounds` wasn't created/incremented. + pub fn execute(&self) -> Result, ExecutionError> { + let mut cmd = Command::new(SAVECORE); + cmd.env_clear(); + cmd.arg("-v"); + let out = execute(&mut cmd)?; if out.stdout.is_empty() || out.stdout == vec![b'\n'] { Ok(None) } else { Ok(Some(OsString::from_vec(out.stdout))) } - } else { - Err(DumpAdmError::SavecoreFailure(OsString::from_vec(out.stderr))) } } diff --git a/sled-agent/src/dump_setup.rs b/sled-agent/src/dump_setup.rs index e675e6e12d..bdbc008ccb 100644 --- a/sled-agent/src/dump_setup.rs +++ b/sled-agent/src/dump_setup.rs @@ -1,8 +1,94 @@ +//! This module is responsible for moving debug info (kernel crash dumps, +//! userspace process core dumps, and rotated logs) onto external drives for +//! perusal/archival, and to prevent internal drives from filling up. +//! (For background on the paths and datasets being used, see RFD 118) +//! +//! The behaviors documented below describe current behavior, but are not +//! necessarily a long-term guarantee, and details may be subject to change. +//! +//! ## Choice of destination external drive for archived logs and dumps +//! As zpools on external (U.2) drives come online, their proportion of space +//! used is checked, any that are over 70% are skipped, and of the remaining +//! candidates the one with the *most* content is designated as the target onto +//! which diagnostic files will be archived every 5 minutes. +//! +//! If *all* drives are over 70% utilization, the one with the oldest average +//! file modification time is chosen for cleanup, wherein its oldest archived +//! file are removed until the space used is under the 70% threshold again. +//! +//! If the chosen drive eventually exceeds 80% of its capacity used, then a +//! different drive is chosen by the same algorithm. +//! +//! ## Kernel crash dumps +//! As internal (M.2) drives are discovered, their designated dump slices are +//! checked for the presence of a previous kernel crash dump that hasn't been +//! archived. If a dump is present that has not yet been archived, and an +//! external debug directory has been chosen, `savecore(8)` is invoked to save +//! the dump slice's contents there and mark the slice as processed. +//! +//! If an active dump slice (into which the running kernel should dump) has not +//! yet been designated, and the slice being observed was either successfully +//! archived or vacant to begin with, that slice is configured as the running +//! system's dump slice with `dumpadm(8)`. +//! +//! If no vacant slices are available and no external volume is online with +//! sufficient free space to serve as a `savecore(8)` destination, we simply +//! do not configure a dump slice, preferring to preserve evidence of the +//! original root cause of an issue rather than overwriting it with confounding +//! variables (in the event adjacent systems begin behaving erratically due to +//! the initial failure). +//! In this event, as soon as an external drive becomes available to archive +//! one or all of the occupied dump slices' contents, the golden-path procedure +//! detailed above occurs and a dump slice is configured. +//! +//! ## Process core dumps +//! As zpools on internal (M.2) drives come online, the first one seen by the +//! poll loop is chosen to be the destination of process cores in all zones: +//! ```text +//! /pool/int/*/crash/core.[zone-name].[exe-filename].[pid].[time] +//! ``` +//! +//! For reference, at time of writing, the invocation of coreadm(8) looks like: +//! ```sh +//! coreadm \ +//! -d process -d proc-setid \ +//! -e global -e global-setid \ +//! -g "/pool/int/${CHOSEN_ZFS}/crash/core.%z.%f.%p.%t" \ +//! -G default+debug +//! ``` +//! +//! Every 5 minutes, all core files found on internal drives are moved to the +//! DUMP_DATASET of the (similarly chosen) removable U.2 drive, like so: +//! ```text +//! /pool/int/*/crash/core.global.sled-agent.101.34784217 +//! -> /pool/ext/*/crypt/debug/core.global.sled-agent.101.34784217 +//! ``` +//! +//! ## Log rotation and archival +//! Every 5 minutes, each log that logadm(8) has rotated (in every zone) gets +//! archived into the DUMP_DATASET of the chosen U.2, with the suffixed +//! number replaced by the modified timestamp, like so: +//! ```text +//! /var/svc/log/foo.log.0 +//! -> /pool/ext/*/crypt/debug/global/foo.log.34784217 +//! /pool/int/*/crypt/zone/oxz_bar/root/var/svc/log/baz.log.0 +//! -> /pool/ext/*/crypt/debug/oxz_bar/baz.log.34784217 +//! ``` +//! +//! If the log file's modified time is unavailable or invalid, we fall back to +//! the time of archival, and if that fails, we simply count up from 0. +//! +//! In the event of filename collisions (i.e. several instances of a service's +//! rotated log files having the same modified time to the second), the +//! number is incremented by 1 until no conflict remains. + use camino::Utf8PathBuf; -use derive_more::{AsRef, Deref, From}; -use illumos_utils::dumpadm::DumpAdmError; -use illumos_utils::zone::{AdmError, Zones}; +use derive_more::{AsRef, From}; +use illumos_utils::coreadm::{CoreAdm, CoreFileOption}; +use illumos_utils::dumpadm::{DumpAdm, DumpContentType}; +use illumos_utils::zone::ZONE_PREFIX; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; +use illumos_utils::ExecutionError; use omicron_common::disk::DiskIdentity; use sled_hardware::DiskVariant; use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; @@ -14,70 +100,56 @@ use std::ffi::OsString; use std::path::{Path, PathBuf}; use std::sync::{Arc, Weak}; use std::time::{Duration, SystemTime, SystemTimeError, UNIX_EPOCH}; +use zone::{Zone, ZoneError}; -pub struct DumpSetup { - worker: Arc>, - _poller: std::thread::JoinHandle<()>, - log: Logger, -} +const ZFS_PROP_USED: &str = "used"; +const ZFS_PROP_AVAILABLE: &str = "available"; -impl DumpSetup { - pub fn new(log: &Logger) -> Self { - let worker = Arc::new(std::sync::Mutex::new(DumpSetupWorker::new( - log.new(o!("component" => "DumpSetup-worker")), - ))); - let worker_weak = Arc::downgrade(&worker); - let log_poll = log.new(o!("component" => "DumpSetup-archival")); - let _poller = std::thread::spawn(move || { - Self::poll_file_archival(worker_weak, log_poll) - }); - let log = log.new(o!("component" => "DumpSetup")); - Self { worker, _poller, log } - } -} +const DATASET_USAGE_PERCENT_CHOICE: u64 = 70; +const DATASET_USAGE_PERCENT_CLEANUP: u64 = 80; + +const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300); // we sure are passing a lot of Utf8PathBufs around, let's be careful about it -#[derive( - AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd, -)] +#[derive(AsRef, Clone, Debug, Eq, From, Hash, Ord, PartialEq, PartialOrd)] struct DumpSlicePath(Utf8PathBuf); -#[derive( - AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd, -)] +#[derive(AsRef, Clone, Debug, Eq, From, Hash, Ord, PartialEq, PartialOrd)] struct DebugDataset(Utf8PathBuf); -#[derive( - AsRef, Clone, Debug, Deref, Eq, From, Hash, Ord, PartialEq, PartialOrd, -)] +#[derive(AsRef, Clone, Debug, Eq, From, Hash, Ord, PartialEq, PartialOrd)] struct CoreDataset(Utf8PathBuf); -#[derive(Deref)] -struct CoreZpool(ZpoolName); -#[derive(Deref)] -struct DebugZpool(ZpoolName); +#[derive(AsRef, Clone, From)] +pub(super) struct CoreZpool(pub ZpoolName); +#[derive(AsRef, Clone, From)] +pub(super) struct DebugZpool(pub ZpoolName); + +impl GetMountpoint for DebugZpool { + type NewType = DebugDataset; + const MOUNTPOINT: &'static str = DUMP_DATASET; +} +impl GetMountpoint for CoreZpool { + type NewType = CoreDataset; + const MOUNTPOINT: &'static str = CRASH_DATASET; +} // only want to access these directories after they're mounted! -trait GetMountpoint: std::ops::Deref { +trait GetMountpoint: AsRef { type NewType: From; const MOUNTPOINT: &'static str; - fn mountpoint(&self) -> Result, ZfsGetError> { - if zfs_get_prop(self.to_string(), "mounted")? == "yes" { + fn mountpoint( + &self, + invoker: &dyn ZfsInvoker, + ) -> Result, ZfsGetError> { + if invoker.zfs_get_prop(&self.as_ref().to_string(), "mounted")? == "yes" + { Ok(Some(Self::NewType::from( - self.dataset_mountpoint(Self::MOUNTPOINT), + invoker.mountpoint(self.as_ref(), Self::MOUNTPOINT), ))) } else { Ok(None) } } } -impl GetMountpoint for DebugZpool { - type NewType = DebugDataset; - const MOUNTPOINT: &'static str = DUMP_DATASET; -} -impl GetMountpoint for CoreZpool { - type NewType = CoreDataset; - const MOUNTPOINT: &'static str = CRASH_DATASET; -} - struct DumpSetupWorker { core_dataset_names: Vec, debug_dataset_names: Vec, @@ -93,11 +165,34 @@ struct DumpSetupWorker { savecored_slices: HashSet, log: Logger, + coredumpadm_invoker: Box, + zfs_invoker: Box, + zone_invoker: Box, } -const ARCHIVAL_INTERVAL: Duration = Duration::from_secs(300); +pub struct DumpSetup { + worker: Arc>, + _poller: std::thread::JoinHandle<()>, + log: Logger, +} impl DumpSetup { + pub fn new(log: &Logger) -> Self { + let worker = Arc::new(std::sync::Mutex::new(DumpSetupWorker::new( + Box::new(RealCoreDumpAdm {}), + Box::new(RealZfs {}), + Box::new(RealZone {}), + log.new(o!("component" => "DumpSetup-worker")), + ))); + let worker_weak = Arc::downgrade(&worker); + let log_poll = log.new(o!("component" => "DumpSetup-archival")); + let _poller = std::thread::spawn(move || { + Self::poll_file_archival(worker_weak, log_poll) + }); + let log = log.new(o!("component" => "DumpSetup")); + Self { worker, _poller, log } + } + pub(crate) async fn update_dumpdev_setup( &self, disks: &BTreeMap, @@ -127,7 +222,8 @@ impl DumpSetup { illumos_utils::zpool::Zpool::get_info(&name.to_string()) { if info.health() == ZpoolHealth::Online { - m2_core_datasets.push(CoreZpool(name.clone())); + m2_core_datasets + .push(CoreZpool::from(name.clone())); } else { warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); } @@ -139,7 +235,8 @@ impl DumpSetup { illumos_utils::zpool::Zpool::get_info(&name.to_string()) { if info.health() == ZpoolHealth::Online { - u2_debug_datasets.push(DebugZpool(name.clone())); + u2_debug_datasets + .push(DebugZpool::from(name.clone())); } else { warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); } @@ -211,45 +308,179 @@ enum ZfsGetError { Parse(#[from] std::num::ParseIntError), } -const ZFS_PROP_USED: &str = "used"; -const ZFS_PROP_AVAILABLE: &str = "available"; +trait CoreDumpAdmInvoker { + fn coreadm(&self, core_dir: &Utf8PathBuf) -> Result<(), ExecutionError>; + fn dumpadm( + &self, + dump_slice: &Utf8PathBuf, + savecore_dir: Option<&Utf8PathBuf>, + ) -> Result, ExecutionError>; +} + +trait ZfsInvoker { + fn zfs_get_prop( + &self, + mountpoint_or_name: &str, + property: &str, + ) -> Result; + + fn zfs_get_integer( + &self, + mountpoint_or_name: &str, + property: &str, + ) -> Result { + self.zfs_get_prop(mountpoint_or_name, property)? + .parse() + .map_err(Into::into) + } + + fn below_thresh( + &self, + mountpoint: &Utf8PathBuf, + percent: u64, + ) -> Result<(bool, u64), ZfsGetError> { + let used = self.zfs_get_integer(mountpoint.as_str(), ZFS_PROP_USED)?; + let available = + self.zfs_get_integer(mountpoint.as_str(), ZFS_PROP_AVAILABLE)?; + let capacity = used + available; + let below = (used * 100) / capacity < percent; + Ok((below, used)) + } -fn zfs_get_integer( - mountpoint_or_name: impl AsRef, - property: &str, -) -> Result { - zfs_get_prop(mountpoint_or_name, property)?.parse().map_err(Into::into) + fn mountpoint( + &self, + zpool: &ZpoolName, + mountpoint: &'static str, + ) -> Utf8PathBuf; } -fn zfs_get_prop( - mountpoint_or_name: impl AsRef + Sized, - property: &str, -) -> Result { - let mountpoint = mountpoint_or_name.as_ref(); - let mut cmd = std::process::Command::new(illumos_utils::zfs::ZFS); - cmd.arg("get").arg("-Hpo").arg("value"); - cmd.arg(property); - cmd.arg(mountpoint); - let output = cmd.output()?; - Ok(String::from_utf8(output.stdout)?.trim().to_string()) +trait ZoneInvoker { + fn get_zones(&self) -> Result, ArchiveLogsError>; } -const DATASET_USAGE_PERCENT_CHOICE: u64 = 70; -const DATASET_USAGE_PERCENT_CLEANUP: u64 = 80; +struct RealCoreDumpAdm {} +struct RealZfs {} +struct RealZone {} + +impl CoreDumpAdmInvoker for RealCoreDumpAdm { + fn coreadm(&self, core_dir: &Utf8PathBuf) -> Result<(), ExecutionError> { + let mut cmd = CoreAdm::new(); + + // disable per-process core patterns + cmd.disable(CoreFileOption::Process); + cmd.disable(CoreFileOption::ProcSetid); + + // use the global core pattern + cmd.enable(CoreFileOption::Global); + cmd.enable(CoreFileOption::GlobalSetid); + + // set the global pattern to place all cores into core_dir, + // with filenames of "core.[zone-name].[exe-filename].[pid].[time]" + cmd.global_pattern(core_dir.join("core.%z.%f.%p.%t")); + + // also collect DWARF data from the exe and its library deps + cmd.global_contents("default+debug"); + + cmd.execute() + } + + // Invokes `dumpadm(8)` to configure the kernel to dump core into the given + // `dump_slice` block device in the event of a panic. If a core is already + // present in that block device, and a `savecore_dir` is provided, this + // function also invokes `savecore(8)` to save it into that directory. + // On success, returns Ok(Some(stdout)) if `savecore(8)` was invoked, or + // Ok(None) if it wasn't. + fn dumpadm( + &self, + dump_slice: &Utf8PathBuf, + savecore_dir: Option<&Utf8PathBuf>, + ) -> Result, ExecutionError> { + let savecore_dir_cloned = if let Some(dir) = savecore_dir.cloned() { + dir + } else { + // if we don't have a savecore destination yet, still create and use + // a tmpfs path (rather than the default location under /var/crash, + // which is in the ramdisk pool), because dumpadm refuses to do what + // we ask otherwise. + let tmp_crash = "/tmp/crash"; + std::fs::create_dir_all(tmp_crash).map_err(|err| { + ExecutionError::ExecutionStart { + command: format!("mkdir {tmp_crash:?}"), + err, + } + })?; + Utf8PathBuf::from(tmp_crash) + }; + + // Use the given block device path for dump storage: + let mut cmd = DumpAdm::new(dump_slice.to_owned(), savecore_dir_cloned); + + // Include memory from the current process if there is one for the panic + // context, in addition to kernel memory: + cmd.content_type(DumpContentType::CurProc); + + // Compress crash dumps: + cmd.compress(true); + + // Do not run savecore(8) automatically on boot (irrelevant anyhow, as the + // config file being mutated by dumpadm won't survive reboots on gimlets). + // The sled-agent will invoke it manually instead. + cmd.no_boot_time_savecore(); -fn below_thresh( - mountpoint: &Utf8PathBuf, - percent: u64, -) -> Result<(bool, u64), ZfsGetError> { - let used = zfs_get_integer(mountpoint, ZFS_PROP_USED)?; - let available = zfs_get_integer(mountpoint, ZFS_PROP_AVAILABLE)?; - let capacity = used + available; - let below = (used * 100) / capacity < percent; - Ok((below, used)) + cmd.execute()?; + + // do we have a destination for the saved dump + if savecore_dir.is_some() { + // and does the dump slice have one to save off + if let Ok(true) = + illumos_utils::dumpadm::dump_flag_is_valid(dump_slice) + { + return illumos_utils::dumpadm::SaveCore.execute(); + } + } + Ok(None) + } +} + +impl ZfsInvoker for RealZfs { + fn zfs_get_prop( + &self, + mountpoint_or_name: &str, + property: &str, + ) -> Result { + let mut cmd = std::process::Command::new(illumos_utils::zfs::ZFS); + cmd.arg("get").arg("-Hpo").arg("value"); + cmd.arg(property); + cmd.arg(mountpoint_or_name); + let output = cmd.output()?; + Ok(String::from_utf8(output.stdout)?.trim().to_string()) + } + + fn mountpoint( + &self, + zpool: &ZpoolName, + mountpoint: &'static str, + ) -> Utf8PathBuf { + zpool.dataset_mountpoint(mountpoint) + } +} + +impl ZoneInvoker for RealZone { + fn get_zones(&self) -> Result, ArchiveLogsError> { + Ok(zone::Adm::list_blocking()? + .into_iter() + .filter(|z| z.global() || z.name().starts_with(ZONE_PREFIX)) + .collect::>()) + } } impl DumpSetupWorker { - fn new(log: Logger) -> Self { + fn new( + coredumpadm_invoker: Box, + zfs_invoker: Box, + zone_invoker: Box, + log: Logger, + ) -> Self { Self { core_dataset_names: vec![], debug_dataset_names: vec![], @@ -261,6 +492,9 @@ impl DumpSetupWorker { known_core_dirs: vec![], savecored_slices: Default::default(), log, + coredumpadm_invoker, + zfs_invoker, + zone_invoker, } } @@ -284,13 +518,13 @@ impl DumpSetupWorker { self.known_debug_dirs = self .debug_dataset_names .iter() - .flat_map(|ds| ds.mountpoint()) + .flat_map(|ds| ds.mountpoint(self.zfs_invoker.as_ref())) .flatten() .collect(); self.known_core_dirs = self .core_dataset_names .iter() - .flat_map(|ds| ds.mountpoint()) + .flat_map(|ds| ds.mountpoint(self.zfs_invoker.as_ref())) .flatten() .collect(); } @@ -304,7 +538,7 @@ impl DumpSetupWorker { // below a certain usage threshold. self.known_debug_dirs.sort_by_cached_key( |mountpoint: &DebugDataset| { - match below_thresh(mountpoint.as_ref(), DATASET_USAGE_PERCENT_CHOICE) { + match self.zfs_invoker.below_thresh(mountpoint.as_ref(), DATASET_USAGE_PERCENT_CHOICE) { Ok((below, used)) => { let priority = if below { 0 } else { 1 }; (priority, used, mountpoint.clone()) @@ -319,7 +553,10 @@ impl DumpSetupWorker { ); self.known_core_dirs.sort_by_cached_key(|mnt| { // these get archived periodically anyway, pick one with room - let available = zfs_get_integer(&**mnt, "available").unwrap_or(0); + let available = self + .zfs_invoker + .zfs_get_integer(mnt.as_ref().as_str(), "available") + .unwrap_or(0); (u64::MAX - available, mnt.clone()) }); @@ -328,16 +565,20 @@ impl DumpSetupWorker { warn!(self.log, "Previously-chosen debug/dump dir {x:?} no longer exists in our view of reality"); self.chosen_debug_dir = None; } else { - match below_thresh(x.as_ref(), DATASET_USAGE_PERCENT_CLEANUP) { + match self + .zfs_invoker + .below_thresh(x.as_ref(), DATASET_USAGE_PERCENT_CLEANUP) + { Ok((true, _)) => {} Ok((false, _)) => { if self.known_debug_dirs.iter().any(|x| { - below_thresh( - x.as_ref(), - DATASET_USAGE_PERCENT_CHOICE, - ) - .unwrap_or((false, 0)) - .0 + self.zfs_invoker + .below_thresh( + x.as_ref(), + DATASET_USAGE_PERCENT_CHOICE, + ) + .unwrap_or((false, 0)) + .0 }) { info!(self.log, "Previously-chosen debug/dump dir {x:?} is over usage threshold, choosing a more vacant disk"); self.chosen_debug_dir = None; @@ -377,7 +618,7 @@ impl DumpSetupWorker { if self.chosen_core_dir.is_none() { for core_dir in &self.known_core_dirs { // tell the system to write *userspace process* cores here. - match illumos_utils::coreadm::coreadm(core_dir) { + match self.coredumpadm_invoker.coreadm(core_dir.as_ref()) { Ok(()) => { self.chosen_core_dir = Some(core_dir.clone()); info!( @@ -398,7 +639,7 @@ impl DumpSetupWorker { for dump_slice in self.known_dump_slices.clone() { // Let's try to see if it appears to have a kernel dump already match illumos_utils::dumpadm::dump_flag_is_valid( - &dump_slice, + dump_slice.as_ref(), ) { Ok(true) => { debug!(self.log, "Dump slice {dump_slice:?} appears to have a valid header; will attempt to savecore"); @@ -423,14 +664,16 @@ impl DumpSetupWorker { // already one there until we can attempt to savecore(8) // it away and clear the flag to make room. for dump_slice in &self.known_dump_slices { - match illumos_utils::dumpadm::dump_flag_is_valid(dump_slice) - { + match illumos_utils::dumpadm::dump_flag_is_valid( + dump_slice.as_ref(), + ) { Ok(false) => { // Have dumpadm write the config for crash dumps to be // on this slice, at least, until a U.2 comes along. - match illumos_utils::dumpadm::dumpadm( - dump_slice, None, - ) { + match self + .coredumpadm_invoker + .dumpadm(dump_slice.as_ref(), None) + { Ok(_) => { info!(self.log, "Using dump device {dump_slice:?} with no savecore destination (no U.2 debug zvol yet)"); self.chosen_dump_slice = @@ -488,8 +731,9 @@ impl DumpSetupWorker { // in the event of a kernel crash if changed_slice { if let Some(dump_slice) = &self.chosen_dump_slice { - if let Err(err) = - illumos_utils::dumpadm::dumpadm(dump_slice, None) + if let Err(err) = self + .coredumpadm_invoker + .dumpadm(dump_slice.as_ref(), None) { error!(self.log, "Could not restore dump slice to {dump_slice:?}: {err:?}"); } @@ -504,10 +748,10 @@ impl DumpSetupWorker { info!(self.log, "No core dump locations yet known."); } for core_dir in &self.known_core_dirs { - if let Ok(dir) = core_dir.read_dir() { + if let Ok(dir) = core_dir.as_ref().read_dir() { for entry in dir.flatten() { if let Some(path) = entry.file_name().to_str() { - let dest = debug_dir.join(path); + let dest = debug_dir.as_ref().join(path); if let Err(err) = Self::copy_sync_and_remove(&entry.path(), &dest) @@ -572,18 +816,13 @@ impl DumpSetupWorker { .chosen_debug_dir .as_ref() .ok_or(ArchiveLogsError::NoDebugDirYet)?; - // zone crate's 'deprecated' functions collide if you try to enable - // its 'sync' and 'async' features simultaneously :( - let rt = - tokio::runtime::Runtime::new().map_err(ArchiveLogsError::Tokio)?; - let oxz_zones = rt.block_on(Zones::get())?; - self.archive_logs_inner( - debug_dir, - PathBuf::from("/var/svc/log"), - "global", - )?; + let oxz_zones = self.zone_invoker.get_zones()?; for zone in oxz_zones { - let logdir = zone.path().join("root/var/svc/log"); + let logdir = if zone.global() { + PathBuf::from("/var/svc/log") + } else { + zone.path().join("root/var/svc/log") + }; let zone_name = zone.name(); self.archive_logs_inner(debug_dir, logdir, zone_name)?; } @@ -607,7 +846,7 @@ impl DumpSetupWorker { .to_string(); rotated_log_files.extend(glob::glob(&pattern)?.flatten()); } - let dest_dir = debug_dir.join(zone_name).into_std_path_buf(); + let dest_dir = debug_dir.as_ref().join(zone_name).into_std_path_buf(); if !rotated_log_files.is_empty() { std::fs::create_dir_all(&dest_dir)?; let count = rotated_log_files.len(); @@ -658,13 +897,15 @@ impl DumpSetupWorker { fn dumpadm_and_savecore( &mut self, dump_slice: &DumpSlicePath, - ) -> Result, DumpAdmError> { + ) -> Result, ExecutionError> { // TODO: untangle savecore from illumos_utils::dumpadm assert!(self.chosen_debug_dir.is_some()); let savecore_dir = self.chosen_debug_dir.clone().unwrap().0; - match illumos_utils::dumpadm::dumpadm(&dump_slice, Some(&savecore_dir)) + match self + .coredumpadm_invoker + .dumpadm(dump_slice.as_ref(), Some(&savecore_dir)) { Ok(saved) => { self.savecored_slices.insert(dump_slice.clone()); @@ -677,7 +918,7 @@ impl DumpSetupWorker { fn cleanup(&self) -> Result<(), CleanupError> { let mut dir_info = Vec::new(); for dir in &self.known_debug_dirs { - match Self::scope_dir_for_cleanup(dir) { + match self.scope_dir_for_cleanup(dir) { Ok(info) => { dir_info.push((info, dir)); } @@ -715,17 +956,24 @@ impl DumpSetupWorker { } fn scope_dir_for_cleanup( + &self, debug_dir: &DebugDataset, ) -> Result { - let used = zfs_get_integer(&**debug_dir, ZFS_PROP_USED)?; - let available = zfs_get_integer(&**debug_dir, ZFS_PROP_AVAILABLE)?; + let used = self + .zfs_invoker + .zfs_get_integer(debug_dir.as_ref().as_str(), ZFS_PROP_USED)?; + let available = self + .zfs_invoker + .zfs_get_integer(debug_dir.as_ref().as_str(), ZFS_PROP_AVAILABLE)?; let capacity = used + available; let target_used = capacity * DATASET_USAGE_PERCENT_CHOICE / 100; let mut file_list = Vec::new(); // find all files in the debug dataset and sort by modified time - for path in glob::glob(debug_dir.join("**/*").as_str())?.flatten() { + for path in + glob::glob(debug_dir.as_ref().join("**/*").as_str())?.flatten() + { let meta = std::fs::metadata(&path)?; // we need this to be a Duration rather than SystemTime so we can // do math to it later. @@ -758,13 +1006,11 @@ impl DumpSetupWorker { } #[derive(thiserror::Error, Debug)] -enum ArchiveLogsError { - #[error("Couldn't make an async runtime to get zone info: {0}")] - Tokio(std::io::Error), +pub enum ArchiveLogsError { #[error("I/O error: {0}")] IoError(#[from] std::io::Error), #[error("Error calling zoneadm: {0}")] - Zoneadm(#[from] AdmError), + Zoneadm(#[from] ZoneError), #[error("Non-UTF8 zone path for zone {0}")] Utf8(String), #[error("Glob pattern invalid: {0}")] @@ -795,3 +1041,431 @@ struct CleanupDirInfo { num_to_delete: u32, file_list: Vec<(Duration, u64, PathBuf)>, } + +#[cfg(test)] +mod tests { + use super::*; + use illumos_utils::dumpadm::{ + DF_VALID, DUMP_MAGIC, DUMP_OFFSET, DUMP_VERSION, + }; + use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; + use std::collections::HashMap; + use std::io::Write; + use std::str::FromStr; + use tempfile::TempDir; + + impl Clone for ZfsGetError { + fn clone(&self) -> Self { + match self { + ZfsGetError::IoError(_err) => unimplemented!(), + ZfsGetError::Utf8(err) => ZfsGetError::Utf8(err.clone()), + ZfsGetError::Parse(err) => ZfsGetError::Parse(err.clone()), + } + } + } + + #[derive(Default)] + struct FakeCoreDumpAdm {} + #[derive(Default)] + struct FakeZfs { + pub zpool_props: HashMap< + &'static str, + HashMap<&'static str, Result>, + >, + } + #[derive(Default)] + struct FakeZone { + pub zones: Vec, + } + + impl CoreDumpAdmInvoker for FakeCoreDumpAdm { + fn coreadm( + &self, + _core_dir: &Utf8PathBuf, + ) -> Result<(), ExecutionError> { + Ok(()) + } + + fn dumpadm( + &self, + _dump_slice: &Utf8PathBuf, + _savecore_dir: Option<&Utf8PathBuf>, + ) -> Result, ExecutionError> { + Ok(None) + } + } + impl ZfsInvoker for FakeZfs { + fn zfs_get_prop( + &self, + mountpoint_or_name: &str, + property: &str, + ) -> Result { + self.zpool_props + .get(mountpoint_or_name) + .unwrap_or_else(|| { + panic!( + "Test did not provide fake zpool {}", + mountpoint_or_name + ) + }) + .get(property) + .unwrap_or_else(|| { + panic!( + "Test did not provide property {property} for fake zpool {}", + mountpoint_or_name + ) + }) + .clone() + } + + fn mountpoint( + &self, + zpool: &ZpoolName, + mountpoint: &'static str, + ) -> Utf8PathBuf { + Utf8PathBuf::from( + self.zpool_props + .get(zpool.to_string().as_str()) + .unwrap_or_else(|| { + panic!("Test did not provide fake zpool {}", zpool) + }) + .get("mountpoint") + .unwrap_or_else(|| { + panic!( + "Test did not provide mountpoint for fake zpool {}", + zpool + ) + }) + .clone() + .unwrap(), + ) + .join(mountpoint) + } + } + impl ZoneInvoker for FakeZone { + fn get_zones(&self) -> Result, ArchiveLogsError> { + Ok(self.zones.clone()) + } + } + + #[test] + fn test_does_not_configure_coreadm_when_no_crash_dataset_mounted() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_does_not_configure_coreadm_when_no_crash_dataset_mounted", + ); + const NOT_MOUNTED_INTERNAL: &str = + "oxi_acab2069-6e63-6c75-de73-20c06c756db0"; + let mut worker = DumpSetupWorker::new( + Box::::default(), + Box::new(FakeZfs { + zpool_props: [( + NOT_MOUNTED_INTERNAL, + [("mounted", Ok("no".to_string()))].into_iter().collect(), + )] + .into_iter() + .collect(), + }), + Box::::default(), + logctx.log.clone(), + ); + + // nothing when no disks + worker.update_disk_loadout(vec![], vec![], vec![]); + assert_eq!(worker.chosen_core_dir, None); + + // nothing when only a disk that's not ready + let non_mounted_zpool = + CoreZpool(ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap()); + worker.update_disk_loadout(vec![], vec![], vec![non_mounted_zpool]); + assert_eq!(worker.chosen_core_dir, None); + logctx.cleanup_successful(); + } + + #[test] + fn test_configures_coreadm_only_when_crash_dataset_mounted() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_configures_coreadm_only_when_crash_dataset_mounted", + ); + const NOT_MOUNTED_INTERNAL: &str = + "oxi_acab2069-6e63-6c75-de73-20c06c756db0"; + const MOUNTED_INTERNAL: &str = + "oxi_474e554e-6174-616c-6965-4e677579656e"; + const ERROR_INTERNAL: &str = "oxi_4861636b-2054-6865-2050-6c616e657421"; + let mounted_zpool = + CoreZpool(ZpoolName::from_str(MOUNTED_INTERNAL).unwrap()); + let non_mounted_zpool = + CoreZpool(ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap()); + let err_zpool = CoreZpool(ZpoolName::from_str(ERROR_INTERNAL).unwrap()); + const ZPOOL_MNT: &str = "/path/to/internal/zpool"; + let mut worker = DumpSetupWorker::new( + Box::::default(), + Box::new(FakeZfs { + zpool_props: [ + ( + NOT_MOUNTED_INTERNAL, + [("mounted", Ok("no".to_string()))] + .into_iter() + .collect(), + ), + ( + MOUNTED_INTERNAL, + [ + ("mounted", Ok("yes".to_string())), + ("mountpoint", Ok(ZPOOL_MNT.to_string())), + ] + .into_iter() + .collect(), + ), + ( + ERROR_INTERNAL, + [( + "mounted", + Err("asdf".parse::().unwrap_err().into()), + )] + .into_iter() + .collect(), + ), + ] + .into_iter() + .collect(), + }), + Box::::default(), + logctx.log.clone(), + ); + + // something when there's one that's ready! + worker.update_disk_loadout( + vec![], + vec![], + vec![non_mounted_zpool.clone(), mounted_zpool], + ); + assert_eq!( + worker.chosen_core_dir.as_ref().unwrap().0, + Utf8PathBuf::from(ZPOOL_MNT).join(CRASH_DATASET) + ); + + // back to nothing if it becomes unavailable + worker.update_disk_loadout( + vec![], + vec![], + vec![non_mounted_zpool, err_zpool], + ); + assert_eq!(worker.chosen_core_dir, None); + logctx.cleanup_successful(); + } + + // we make these so illumos_utils::dumpadm::dump_flag_is_valid returns what we want + fn populate_tempdir_with_fake_dumps( + tempdir: &TempDir, + ) -> (DumpSlicePath, DumpSlicePath) { + let occupied = DumpSlicePath( + Utf8PathBuf::from_path_buf(tempdir.path().join("occupied.bin")) + .unwrap(), + ); + let mut f = std::fs::File::create(occupied.as_ref()).unwrap(); + f.write_all(&[0u8; DUMP_OFFSET as usize]).unwrap(); + f.write_all(&DUMP_MAGIC.to_le_bytes()).unwrap(); + f.write_all(&DUMP_VERSION.to_le_bytes()).unwrap(); + f.write_all(&DF_VALID.to_le_bytes()).unwrap(); + drop(f); + + let vacant = DumpSlicePath( + Utf8PathBuf::from_path_buf(tempdir.path().join("vacant.bin")) + .unwrap(), + ); + let mut f = std::fs::File::create(vacant.as_ref()).unwrap(); + f.write_all(&[0u8; DUMP_OFFSET as usize]).unwrap(); + f.write_all(&DUMP_MAGIC.to_le_bytes()).unwrap(); + f.write_all(&DUMP_VERSION.to_le_bytes()).unwrap(); + f.write_all(&0u32.to_le_bytes()).unwrap(); + drop(f); + + (occupied, vacant) + } + + // if we only have two filled dump slices and nowhere to evacuate them, + // don't configure a dump slice at all. + #[test] + fn test_savecore_and_dumpadm_not_called_when_occupied_and_no_dir() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_savecore_and_dumpadm_not_called_when_occupied_and_no_dir", + ); + let mut worker = DumpSetupWorker::new( + Box::::default(), + Box::::default(), + Box::::default(), + logctx.log.clone(), + ); + let tempdir = TempDir::new().unwrap(); + let (occupied, _) = populate_tempdir_with_fake_dumps(&tempdir); + + worker.update_disk_loadout( + vec![occupied.clone(), occupied], + vec![], + vec![], + ); + assert!(worker.chosen_dump_slice.is_none()); + logctx.cleanup_successful(); + } + + // if we have one dump slice that's free and one that's full, + // and nowhere to savecore the full one, + // we should always call dumpadm with the free one. + #[test] + fn test_dumpadm_called_when_vacant_slice_but_no_dir() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_dumpadm_called_when_vacant_slice_but_no_dir", + ); + let mut worker = DumpSetupWorker::new( + Box::::default(), + Box::::default(), + Box::::default(), + logctx.log.clone(), + ); + let tempdir = TempDir::new().unwrap(); + let (occupied, vacant) = populate_tempdir_with_fake_dumps(&tempdir); + worker.update_disk_loadout( + vec![occupied, vacant.clone()], + vec![], + vec![], + ); + assert_eq!(worker.chosen_dump_slice.as_ref(), Some(&vacant)); + logctx.cleanup_successful(); + } + + // if we have two occupied dump slices, + // but we also have somewhere to unload them, + // call dumpadm and savecore. + #[test] + fn test_savecore_and_dumpadm_invoked_when_slices_occupied_and_dir_is_available( + ) { + let logctx = omicron_test_utils::dev::test_setup_log("test_savecore_and_dumpadm_invoked_when_slices_occupied_and_dir_is_available"); + const MOUNTED_EXTERNAL: &str = + "oxp_446f6e74-4469-6557-6f6e-646572696e67"; + const ZPOOL_MNT: &str = "/path/to/external/zpool"; + let mut worker = DumpSetupWorker::new( + Box::::default(), + Box::new(FakeZfs { + zpool_props: [( + MOUNTED_EXTERNAL, + [ + ("mounted", Ok("yes".to_string())), + ("mountpoint", Ok(ZPOOL_MNT.to_string())), + ] + .into_iter() + .collect(), + )] + .into_iter() + .collect(), + }), + Box::::default(), + logctx.log.clone(), + ); + let tempdir = TempDir::new().unwrap(); + let (occupied, _) = populate_tempdir_with_fake_dumps(&tempdir); + + let mounted_zpool = + DebugZpool(ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap()); + worker.update_disk_loadout( + vec![occupied.clone()], + vec![mounted_zpool], + vec![], + ); + assert_eq!(worker.chosen_dump_slice.as_ref(), Some(&occupied)); + assert_eq!( + worker.chosen_debug_dir.unwrap().0, + Utf8PathBuf::from(ZPOOL_MNT).join(DUMP_DATASET) + ); + logctx.cleanup_successful(); + } + + #[test] + fn test_archives_rotated_logs_and_cores() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_archives_rotated_logs_and_cores", + ); + + let tempdir = TempDir::new().unwrap(); + let core_dir = tempdir.path().join(CRASH_DATASET); + let debug_dir = tempdir.path().join(DUMP_DATASET); + let zone_logs = tempdir.path().join("root/var/svc/log"); + + let tempdir_path = tempdir.path().to_str().unwrap().to_string(); + let zone = Zone::from_str(&format!( + "1:myzone:running:{tempdir_path}::ipkg:shared" + )) + .unwrap(); + + const MOUNTED_INTERNAL: &str = + "oxi_474e554e-6174-616c-6965-4e677579656e"; + const MOUNTED_EXTERNAL: &str = + "oxp_446f6e74-4469-6557-6f6e-646572696e67"; + let mut worker = DumpSetupWorker::new( + Box::::default(), + Box::new(FakeZfs { + zpool_props: [ + ( + MOUNTED_INTERNAL, + [ + ("mounted", Ok("yes".to_string())), + ("mountpoint", Ok(tempdir_path.clone())), + ] + .into_iter() + .collect(), + ), + ( + MOUNTED_EXTERNAL, + [ + ("mounted", Ok("yes".to_string())), + ("mountpoint", Ok(tempdir_path)), + ] + .into_iter() + .collect(), + ), + ] + .into_iter() + .collect(), + }), + Box::new(FakeZone { zones: vec![zone.clone()] }), + logctx.log.clone(), + ); + + std::fs::create_dir_all(&core_dir).unwrap(); + std::fs::create_dir_all(&debug_dir).unwrap(); + std::fs::create_dir_all(&zone_logs).unwrap(); + const LOG_NAME: &'static str = "foo.log.0"; + writeln!( + std::fs::File::create(zone_logs.join(LOG_NAME)).unwrap(), + "hello" + ) + .unwrap(); + + const CORE_NAME: &str = "core.myzone.myexe.123.1690540950"; + writeln!( + std::fs::File::create(core_dir.join(CORE_NAME)).unwrap(), + "crunch" + ) + .unwrap(); + + let mounted_core_zpool = + CoreZpool(ZpoolName::from_str(MOUNTED_INTERNAL).unwrap()); + let mounted_debug_zpool = + DebugZpool(ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap()); + + worker.update_disk_loadout( + vec![], + vec![mounted_debug_zpool], + vec![mounted_core_zpool], + ); + worker.archive_files().unwrap(); + + // it'll be renamed to use an epoch timestamp instead of .0 + let log_glob = + debug_dir.join(zone.name()).join(LOG_NAME.replace(".0", ".*")); + assert_eq!(glob::glob(log_glob.to_str().unwrap()).unwrap().count(), 1); + assert!(!zone_logs.join(LOG_NAME).is_file()); + assert!(debug_dir.join(CORE_NAME).is_file()); + assert!(!core_dir.join(CORE_NAME).is_file()); + logctx.cleanup_successful(); + } +} From f4c7a8d4e4ea93e68e33f6b97c6e22ae94d45f7a Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Feb 2024 10:37:09 -0500 Subject: [PATCH 73/91] Use hostname for Baseboard::Pc::identifier field (#4937) Doing this allows easier testbed setup and usage since we identify new sleds to add via their baseboard. In case we can't get a hostname we revert to using a UUID, as in the prior version of code. --- Cargo.lock | 11 +++++++++++ Cargo.toml | 1 + sled-hardware/Cargo.toml | 1 + sled-hardware/src/illumos/mod.rs | 5 ++++- 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index db80bfd091..25fb63604e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2629,6 +2629,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "gethostname" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" +dependencies = [ + "libc", + "windows-targets 0.48.5", +] + [[package]] name = "getopts" version = "0.2.21" @@ -8071,6 +8081,7 @@ dependencies = [ "camino", "cfg-if", "futures", + "gethostname", "illumos-devinfo", "illumos-utils", "libc", diff --git a/Cargo.toml b/Cargo.toml index 72952ff643..f7c5b11aba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -208,6 +208,7 @@ gateway-client = { path = "clients/gateway-client" } gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9", default-features = false, features = ["std"] } gateway-sp-comms = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "2739c18e80697aa6bc235c935176d14b4d757ee9" } gateway-test-utils = { path = "gateway-test-utils" } +gethostname = "0.4.3" glob = "0.3.1" guppy = "0.17.4" headers = "0.3.9" diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index 66ecbf9d64..3d1259f46f 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -10,6 +10,7 @@ anyhow.workspace = true camino.workspace = true cfg-if.workspace = true futures.workspace = true +gethostname.workspace = true illumos-utils.workspace = true libc.workspace = true macaddr.workspace = true diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 19111c6cda..ebc9a9c2b0 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -7,6 +7,7 @@ use crate::{ UnparsedDisk, }; use camino::Utf8PathBuf; +use gethostname::gethostname; use illumos_devinfo::{DevInfo, DevLinkType, DevLinks, Node, Property}; use omicron_common::disk::DiskIdentity; use slog::debug; @@ -525,7 +526,9 @@ fn poll_device_tree( if inner.baseboard.is_none() { let pc_baseboard = Baseboard::new_pc( - Uuid::new_v4().simple().to_string(), + gethostname().into_string().unwrap_or_else( + |_| Uuid::new_v4().simple().to_string(), + ), root_node.clone(), ); From 8189a8e686b41ae8f17ce8eed9e118f4fa439f26 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 1 Feb 2024 10:51:18 -0500 Subject: [PATCH 74/91] Rework {Sp,Rot,HostPhase1}Updater tests to be less flaky (#4922) There were three tests that were mostly copy/pasted, that all tried to tame an inherent race between two tokio tasks: one is delivering an update to the simulated SP, and the other is polling that simulated SP for the progress of that update. The taming was by throttling the simulated SP to only reply to a single message at a time, then checking some internal state to infer what kind of message the SP had just received, and using that inference to check some invariants. The inference was typically right, but as evidenced by the name of this PR, not perfect. I took a fairly big hammer to the Rot and HostPhase1 updater tests: they now just loop waiting to see the "Complete" progress message, and no longer try to single-step through messages. Under normal conditions (e.g., not heavily-loaded-CI systems), it's likely these tests will skip from "no progress" to "complete", but that's ok. I tried to rework the SpUpdater test to keep the same "single-step through messages" to carefully check the progress reports by adding more details to the simulated SP, so the test can know exactly when the specific message it was trying to infer (i.e., `SpUpdater`'s requests for the update status) arrives. This should fix the flake; if it doesn't, we can replace this one with the same impl that the Rot and HostPhase1 tests now have. Fixes https://github.com/oxidecomputer/omicron/issues/4911 --- .../integration_tests/host_phase1_updater.rs | 178 ++++-------------- nexus/tests/integration_tests/rot_updater.rs | 166 ++++------------ nexus/tests/integration_tests/sp_updater.rs | 132 +++++++------ sp-sim/src/gimlet.rs | 59 +++++- sp-sim/src/lib.rs | 1 + 5 files changed, 206 insertions(+), 330 deletions(-) diff --git a/nexus/tests/integration_tests/host_phase1_updater.rs b/nexus/tests/integration_tests/host_phase1_updater.rs index 01d546636e..b2e4a5bc1c 100644 --- a/nexus/tests/integration_tests/host_phase1_updater.rs +++ b/nexus/tests/integration_tests/host_phase1_updater.rs @@ -6,7 +6,7 @@ //! MGS to SP. use gateway_client::types::SpType; -use gateway_messages::{SpPort, UpdateInProgressStatus, UpdateStatus}; +use gateway_messages::SpPort; use gateway_test_utils::setup as mgs_setup; use omicron_nexus::app::test_interfaces::{ HostPhase1Updater, MgsClients, UpdateProgress, @@ -403,6 +403,7 @@ async fn test_host_phase1_updater_delivers_progress() { let target_host_slot = 0; let update_id = Uuid::new_v4(); let phase1_data = make_fake_host_phase1_image(); + let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; let host_phase1_updater = HostPhase1Updater::new( sp_type, @@ -413,156 +414,57 @@ async fn test_host_phase1_updater_delivers_progress() { &mgstestctx.logctx.log, ); - let phase1_data_len = phase1_data.len() as u32; - // Subscribe to update progress, and check that there is no status yet; we // haven't started the update. let mut progress = host_phase1_updater.progress_watcher(); assert_eq!(*progress.borrow_and_update(), None); - // Install a semaphore on the requests our target SP will receive so we can - // inspect progress messages without racing. - let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; - let sp_accept_sema = target_sp.install_udp_accept_semaphore().await; - let mut sp_responses = target_sp.responses_sent_count().unwrap(); - // Spawn the update on a background task so we can watch `progress` as it is // applied. let do_update_task = tokio::spawn(async move { host_phase1_updater.update(&mut mgs_clients).await }); - // Allow the SP to respond to 2 messages: the message to activate the target - // flash slot and the "prepare update" messages that triggers the start of an - // update, then ensure we see the "started" progress. - sp_accept_sema.send(2).unwrap(); - progress.changed().await.unwrap(); - assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Started)); - - // Ensure our simulated SP is in the state we expect: it's prepared for an - // update but has not yet received any data. - assert_eq!( - target_sp.current_update_status().await, - UpdateStatus::InProgress(UpdateInProgressStatus { - id: update_id.into(), - bytes_received: 0, - total_size: phase1_data_len, - }) - ); - - // Record the number of responses the SP has sent; we'll use - // `sp_responses.changed()` in the loop below, and want to mark whatever - // value this watch channel currently has as seen. - sp_responses.borrow_and_update(); - - // At this point, there are two clients racing each other to talk to our - // simulated SP: - // - // 1. MGS is trying to deliver the update - // 2. `host_phase1_updater` is trying to poll (via MGS) for update status - // - // and we want to ensure that we see any relevant progress reports from - // `host_phase1_updater`. We'll let one MGS -> SP message through at a time - // (waiting until our SP has responded by waiting for a change to - // `sp_responses`) then check its update state: if it changed, the packet we - // let through was data from MGS; otherwise, it was a status request from - // `host_phase1_updater`. - // - // This loop will continue until either: - // - // 1. We see an `UpdateStatus::InProgress` message indicating 100% delivery, - // at which point we break out of the loop - // 2. We time out waiting for the previous step (by timing out for either - // the SP to process a request or `host_phase1_updater` to realize - // there's been progress), at which point we panic and fail this test. - let mut prev_bytes_received = 0; - let mut expect_progress_change = false; - loop { - // Allow the SP to accept and respond to a single UDP packet. - sp_accept_sema.send(1).unwrap(); - - // Wait until the SP has sent a response, with a safety rail that we - // haven't screwed up our untangle-the-race logic: if we don't see the - // SP process any new messages after several seconds, our test is - // broken, so fail. - tokio::time::timeout(Duration::from_secs(10), sp_responses.changed()) - .await - .expect("timeout waiting for SP response count to change") - .expect("sp response count sender dropped"); - - // Inspec the SP's in-memory update state; we expect only `InProgress` - // or `Complete`, and in either case we note whether we expect to see - // status changes from `host_phase1_updater`. - match target_sp.current_update_status().await { - UpdateStatus::InProgress(sp_progress) => { - if sp_progress.bytes_received > prev_bytes_received { - prev_bytes_received = sp_progress.bytes_received; - expect_progress_change = true; - continue; - } - } - UpdateStatus::Complete(_) => { - if prev_bytes_received < phase1_data_len { - break; + // Loop until we see `UpdateProgress::Complete`, ensuring that any + // intermediate progress messages we see are in order. + let mut saw_started = false; + let mut prev_progress = 0.0; + let log = mgstestctx.logctx.log.clone(); + tokio::time::timeout( + Duration::from_secs(20), + tokio::spawn(async move { + loop { + progress.changed().await.unwrap(); + let status = progress + .borrow_and_update() + .clone() + .expect("progress changed but still None"); + debug!(log, "saw new progress status"; "status" => ?status); + match status { + UpdateProgress::Started => { + assert!(!saw_started, "saw Started multiple times"); + saw_started = true; + } + UpdateProgress::InProgress { progress: Some(value) } => { + // even if we didn't see the explicit `Started` message, + // getting `InProgress` means we're past that point. + saw_started = true; + assert!( + value >= prev_progress, + "new progress {value} \ + less than previous progress {prev_progress}" + ); + prev_progress = value; + } + UpdateProgress::Complete => break, + _ => panic!("unexpected progress status {status:?}"), } } - status @ (UpdateStatus::None - | UpdateStatus::Preparing(_) - | UpdateStatus::SpUpdateAuxFlashChckScan { .. } - | UpdateStatus::Aborted(_) - | UpdateStatus::Failed { .. } - | UpdateStatus::RotError { .. }) => { - panic!("unexpected status {status:?}"); - } - } - - // If we get here, the most recent packet did _not_ change the SP's - // internal update state, so it was a status request from - // `host_phase1_updater`. If we expect the updater to see new progress, - // wait for that change here. - if expect_progress_change { - // Safety rail that we haven't screwed up our untangle-the-race - // logic: if we don't see a new progress after several seconds, our - // test is broken, so fail. - tokio::time::timeout(Duration::from_secs(10), progress.changed()) - .await - .expect("progress timeout") - .expect("progress watch sender dropped"); - let status = progress.borrow_and_update().clone().unwrap(); - expect_progress_change = false; - - assert!( - matches!(status, UpdateProgress::InProgress { .. }), - "unexpected progress status {status:?}" - ); - } - } - - // We know the SP has received a complete update, but `HostPhase1Updater` - // may still need to request status to realize that; release the socket - // semaphore so the SP can respond. - sp_accept_sema.send(usize::MAX).unwrap(); - - // Unlike the SP and RoT cases, there are no MGS/SP steps in between the - // update completing and `HostPhase1Updater` sending - // `UpdateProgress::Complete`. Therefore, it's a race whether we'll see - // some number of `InProgress` status before `Complete`, but we should - // quickly move to `Complete`. - loop { - tokio::time::timeout(Duration::from_secs(10), progress.changed()) - .await - .expect("progress timeout") - .expect("progress watch sender dropped"); - let status = progress.borrow_and_update().clone().unwrap(); - match status { - UpdateProgress::Complete => break, - UpdateProgress::InProgress { .. } => continue, - _ => panic!("unexpected progress status {status:?}"), - } - } - - // drop our progress receiver so `do_update_task` can complete - mem::drop(progress); + }), + ) + .await + .expect("timeout waiting for update completion") + .expect("task panic"); do_update_task.await.expect("update task panicked").expect("update failed"); diff --git a/nexus/tests/integration_tests/rot_updater.rs b/nexus/tests/integration_tests/rot_updater.rs index 2e6d65f8b1..6be83e8ed0 100644 --- a/nexus/tests/integration_tests/rot_updater.rs +++ b/nexus/tests/integration_tests/rot_updater.rs @@ -5,7 +5,7 @@ //! Tests `RotUpdater`'s delivery of updates to RoTs via MGS use gateway_client::types::{RotSlot, SpType}; -use gateway_messages::{SpPort, UpdateInProgressStatus, UpdateStatus}; +use gateway_messages::SpPort; use gateway_test_utils::setup as mgs_setup; use hubtools::RawHubrisArchive; use hubtools::{CabooseBuilder, HubrisArchiveBuilder}; @@ -461,6 +461,7 @@ async fn test_rot_updater_delivers_progress() { let update_id = Uuid::new_v4(); let hubris_archive = make_fake_rot_image(); let target_rot_slot = RotSlot::B; + let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; let rot_updater = RotUpdater::new( sp_type, @@ -472,144 +473,57 @@ async fn test_rot_updater_delivers_progress() { ); let hubris_archive = RawHubrisArchive::from_vec(hubris_archive).unwrap(); - let rot_image_len = hubris_archive.image.data.len() as u32; // Subscribe to update progress, and check that there is no status yet; we // haven't started the update. let mut progress = rot_updater.progress_watcher(); assert_eq!(*progress.borrow_and_update(), None); - // Install a semaphore on the requests our target SP will receive so we can - // inspect progress messages without racing. - let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; - let sp_accept_sema = target_sp.install_udp_accept_semaphore().await; - let mut sp_responses = target_sp.responses_sent_count().unwrap(); - // Spawn the update on a background task so we can watch `progress` as it is // applied. let do_update_task = tokio::spawn(async move { rot_updater.update(&mut mgs_clients).await }); - // Allow the SP to respond to 1 message: the "prepare update" messages that - // triggers the start of an update, then ensure we see the "started" - // progress. - sp_accept_sema.send(1).unwrap(); - progress.changed().await.unwrap(); - assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Started)); - - // Ensure our simulated SP is in the state we expect: it's prepared for an - // update but has not yet received any data. - assert_eq!( - target_sp.current_update_status().await, - UpdateStatus::InProgress(UpdateInProgressStatus { - id: update_id.into(), - bytes_received: 0, - total_size: rot_image_len, - }) - ); - - // Record the number of responses the SP has sent; we'll use - // `sp_responses.changed()` in the loop below, and want to mark whatever - // value this watch channel currently has as seen. - sp_responses.borrow_and_update(); - - // At this point, there are two clients racing each other to talk to our - // simulated SP: - // - // 1. MGS is trying to deliver the update - // 2. `rot_updater` is trying to poll (via MGS) for update status - // - // and we want to ensure that we see any relevant progress reports from - // `rot_updater`. We'll let one MGS -> SP message through at a time (waiting - // until our SP has responded by waiting for a change to `sp_responses`) - // then check its update state: if it changed, the packet we let through was - // data from MGS; otherwise, it was a status request from `rot_updater`. - // - // This loop will continue until either: - // - // 1. We see an `UpdateStatus::InProgress` message indicating 100% delivery, - // at which point we break out of the loop - // 2. We time out waiting for the previous step (by timing out for either - // the SP to process a request or `rot_updater` to realize there's been - // progress), at which point we panic and fail this test. - let mut prev_bytes_received = 0; - let mut expect_progress_change = false; - loop { - // Allow the SP to accept and respond to a single UDP packet. - sp_accept_sema.send(1).unwrap(); - - // Wait until the SP has sent a response, with a safety rail that we - // haven't screwed up our untangle-the-race logic: if we don't see the - // SP process any new messages after several seconds, our test is - // broken, so fail. - tokio::time::timeout(Duration::from_secs(10), sp_responses.changed()) - .await - .expect("timeout waiting for SP response count to change") - .expect("sp response count sender dropped"); - - // Inspec the SP's in-memory update state; we expect only `InProgress` - // or `Complete`, and in either case we note whether we expect to see - // status changes from `rot_updater`. - match target_sp.current_update_status().await { - UpdateStatus::InProgress(rot_progress) => { - if rot_progress.bytes_received > prev_bytes_received { - prev_bytes_received = rot_progress.bytes_received; - expect_progress_change = true; - continue; - } - } - UpdateStatus::Complete(_) => { - if prev_bytes_received < rot_image_len { - prev_bytes_received = rot_image_len; - } - } - status @ (UpdateStatus::None - | UpdateStatus::Preparing(_) - | UpdateStatus::SpUpdateAuxFlashChckScan { .. } - | UpdateStatus::Aborted(_) - | UpdateStatus::Failed { .. } - | UpdateStatus::RotError { .. }) => { - panic!("unexpected status {status:?}"); - } - } - - // If we get here, the most recent packet did _not_ change the SP's - // internal update state, so it was a status request from `rot_updater`. - // If we expect the updater to see new progress, wait for that change - // here. - if expect_progress_change || prev_bytes_received == rot_image_len { - // Safety rail that we haven't screwed up our untangle-the-race - // logic: if we don't see a new progress after several seconds, our - // test is broken, so fail. - tokio::time::timeout(Duration::from_secs(10), progress.changed()) - .await - .expect("progress timeout") - .expect("progress watch sender dropped"); - let status = progress.borrow_and_update().clone().unwrap(); - expect_progress_change = false; - - // We're done if we've observed the final progress message. - if let UpdateProgress::InProgress { progress: Some(value) } = status - { - if value == 1.0 { - break; + // Loop until we see `UpdateProgress::Complete`, ensuring that any + // intermediate progress messages we see are in order. + let mut saw_started = false; + let mut prev_progress = 0.0; + let log = mgstestctx.logctx.log.clone(); + tokio::time::timeout( + Duration::from_secs(20), + tokio::spawn(async move { + loop { + progress.changed().await.unwrap(); + let status = progress + .borrow_and_update() + .clone() + .expect("progress changed but still None"); + debug!(log, "saw new progress status"; "status" => ?status); + match status { + UpdateProgress::Started => { + assert!(!saw_started, "saw Started multiple times"); + saw_started = true; + } + UpdateProgress::InProgress { progress: Some(value) } => { + // even if we didn't see the explicit `Started` message, + // getting `InProgress` means we're past that point. + saw_started = true; + assert!( + value >= prev_progress, + "new progress {value} \ + less than previous progress {prev_progress}" + ); + prev_progress = value; + } + UpdateProgress::Complete => break, + _ => panic!("unexpected progress status {status:?}"), } - } else { - panic!("unexpected progerss status {status:?}"); } - } - } - - // The update has been fully delivered to the SP, but we don't see an - // `UpdateStatus::Complete` message until the RoT is reset. Release the SP - // semaphore since we're no longer racing to observe intermediate progress, - // and wait for the completion message. - sp_accept_sema.send(usize::MAX).unwrap(); - progress.changed().await.unwrap(); - assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Complete)); - - // drop our progress receiver so `do_update_task` can complete - mem::drop(progress); + }), + ) + .await + .expect("timeout waiting for update completion") + .expect("task panic"); do_update_task.await.expect("update task panicked").expect("update failed"); diff --git a/nexus/tests/integration_tests/sp_updater.rs b/nexus/tests/integration_tests/sp_updater.rs index 1b6764e609..8314d22173 100644 --- a/nexus/tests/integration_tests/sp_updater.rs +++ b/nexus/tests/integration_tests/sp_updater.rs @@ -12,9 +12,9 @@ use hubtools::{CabooseBuilder, HubrisArchiveBuilder}; use omicron_nexus::app::test_interfaces::{ MgsClients, SpUpdater, UpdateProgress, }; -use sp_sim::SimulatedSp; use sp_sim::SIM_GIMLET_BOARD; use sp_sim::SIM_SIDECAR_BOARD; +use sp_sim::{SimSpHandledRequest, SimulatedSp}; use std::mem; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -471,16 +471,37 @@ async fn test_sp_updater_delivers_progress() { let target_sp = &mgstestctx.simrack.gimlets[sp_slot as usize]; let sp_accept_sema = target_sp.install_udp_accept_semaphore().await; let mut sp_responses = target_sp.responses_sent_count().unwrap(); + let num_prior_sp_response = *sp_responses.borrow_and_update(); // Spawn the update on a background task so we can watch `progress` as it is // applied. let do_update_task = tokio::spawn(async move { sp_updater.update(&mut mgs_clients).await }); - // Allow the SP to respond to 2 messages: the caboose check and the "prepare - // update" messages that triggers the start of an update, then ensure we see - // the "started" progress. - sp_accept_sema.send(2).unwrap(); + // Allow the SP to respond to 3 messages: the caboose check, "prepare + // update" message, and the status check that preparation is complete. These + // are all triggered by the start of an update. + sp_accept_sema.send(3).unwrap(); + + // Wait until this 3-packet initial update setup/handshake is complete. + let mut sp_responses = tokio::time::timeout( + Duration::from_secs(10), + tokio::spawn(async move { + loop { + sp_responses.changed().await.expect("sender dropped"); + if *sp_responses.borrow_and_update() - num_prior_sp_response + == 3 + { + return sp_responses; + } + } + }), + ) + .await + .expect("timeout waiting for SP update to start") + .expect("task panic"); + + // Ensure our updater reports that the update has started. progress.changed().await.unwrap(); assert_eq!(*progress.borrow_and_update(), Some(UpdateProgress::Started)); @@ -495,11 +516,6 @@ async fn test_sp_updater_delivers_progress() { }) ); - // Record the number of responses the SP has sent; we'll use - // `sp_responses.changed()` in the loop below, and want to mark whatever - // value this watch channel currently has as seen. - sp_responses.borrow_and_update(); - // At this point, there are two clients racing each other to talk to our // simulated SP: // @@ -509,8 +525,9 @@ async fn test_sp_updater_delivers_progress() { // and we want to ensure that we see any relevant progress reports from // `sp_updater`. We'll let one MGS -> SP message through at a time (waiting // until our SP has responded by waiting for a change to `sp_responses`) - // then check its update state: if it changed, the packet we let through was - // data from MGS; otherwise, it was a status request from `sp_updater`. + // then check whether that message was a request for update status. If it + // was, we wait and ensure we see a new progress report from `sp_updater`. + // If it wasn't, we move on and allow the next message through. // // This loop will continue until either: // @@ -519,9 +536,13 @@ async fn test_sp_updater_delivers_progress() { // 2. We time out waiting for the previous step (by timing out for either // the SP to process a request or `sp_updater` to realize there's been // progress), at which point we panic and fail this test. - let mut prev_bytes_received = 0; - let mut expect_progress_change = false; + let mut prev_progress = 0.0; loop { + debug!( + mgstestctx.logctx.log, "unblocking one SP packet"; + "prev_progress" => %prev_progress, + ); + // Allow the SP to accept and respond to a single UDP packet. sp_accept_sema.send(1).unwrap(); @@ -534,56 +555,49 @@ async fn test_sp_updater_delivers_progress() { .expect("timeout waiting for SP response count to change") .expect("sp response count sender dropped"); - // Inspec the SP's in-memory update state; we expect only `InProgress` - // or `Complete`, and in either case we note whether we expect to see - // status changes from `sp_updater`. - match target_sp.current_update_status().await { - UpdateStatus::InProgress(sp_progress) => { - if sp_progress.bytes_received > prev_bytes_received { - prev_bytes_received = sp_progress.bytes_received; - expect_progress_change = true; - continue; - } - } - UpdateStatus::Complete(_) => { - if prev_bytes_received < sp_image_len { - prev_bytes_received = sp_image_len; - } + // Check what our simulated SP just got. If it was an update status + // request, that was `sp_updater`, and we'll fall through to check on + // the progress we should see. Otherwise, we'll move on to the next + // packet. + match target_sp.last_request_handled() { + request @ (None | Some(SimSpHandledRequest::NotImplemented)) => { + debug!( + mgstestctx.logctx.log, "irrelevant previous request"; + "request" => ?request, + ); + continue; } - status @ (UpdateStatus::None - | UpdateStatus::Preparing(_) - | UpdateStatus::SpUpdateAuxFlashChckScan { .. } - | UpdateStatus::Aborted(_) - | UpdateStatus::Failed { .. } - | UpdateStatus::RotError { .. }) => { - panic!("unexpected status {status:?}"); + Some(SimSpHandledRequest::ComponentUpdateStatus(_)) => { + debug!(mgstestctx.logctx.log, "saw update status request"); } } - // If we get here, the most recent packet did _not_ change the SP's - // internal update state, so it was a status request from `sp_updater`. - // If we expect the updater to see new progress, wait for that change - // here. - if expect_progress_change || prev_bytes_received == sp_image_len { - // Safety rail that we haven't screwed up our untangle-the-race - // logic: if we don't see a new progress after several seconds, our - // test is broken, so fail. - tokio::time::timeout(Duration::from_secs(10), progress.changed()) - .await - .expect("progress timeout") - .expect("progress watch sender dropped"); - let status = progress.borrow_and_update().clone().unwrap(); - expect_progress_change = false; - - // We're done if we've observed the final progress message. - if let UpdateProgress::InProgress { progress: Some(value) } = status - { - if value == 1.0 { - break; - } - } else { - panic!("unexpected progerss status {status:?}"); + // Safety rail that we haven't screwed up our untangle-the-race + // logic: if we don't see a new progress after several seconds, our + // test is broken, so fail. + tokio::time::timeout(Duration::from_secs(10), progress.changed()) + .await + .expect("progress timeout") + .expect("progress watch sender dropped"); + let status = progress.borrow_and_update().clone().unwrap(); + debug!( + mgstestctx.logctx.log, "got new status from SpUpdater"; + "status" => ?status, + ); + + // We're done if we've observed the final progress message. + if let UpdateProgress::InProgress { progress: Some(value) } = status { + assert!( + value >= prev_progress, + "new progress {value} \ + less than previous progress {prev_progress}" + ); + prev_progress = value; + if value == 1.0 { + break; } + } else { + panic!("unexpected progerss status {status:?}"); } } diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 5cfad94c86..5a7949a288 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -57,6 +57,22 @@ use tokio::task::{self, JoinHandle}; pub const SIM_GIMLET_BOARD: &str = "SimGimletSp"; +/// Type of request most recently handled by a simulated SP. +/// +/// Many request types are not covered by this enum. This only exists to enable +/// certain particular tests. +// If you need an additional request type to be reported by this enum, feel free +// to add it and update the appropriate `Handler` function below (see +// `update_status()`). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SimSpHandledRequest { + /// The most recent request was for the update status of a component. + ComponentUpdateStatus(SpComponent), + /// The most recent request was some other type that is currently not + /// implemented in this tracker. + NotImplemented, +} + pub struct Gimlet { rot: Mutex, manufacturing_public_key: Ed25519PublicKey, @@ -66,6 +82,7 @@ pub struct Gimlet { commands: mpsc::UnboundedSender, inner_tasks: Vec>, responses_sent_count: Option>, + last_request_handled: Arc>>, } impl Drop for Gimlet { @@ -168,6 +185,7 @@ impl Gimlet { let mut serial_console_addrs = HashMap::new(); let mut inner_tasks = Vec::new(); let (commands, commands_rx) = mpsc::unbounded_channel(); + let last_request_handled = Arc::default(); let (manufacturing_public_key, rot) = RotSprocket::bootstrap_from_config(&gimlet.common); @@ -185,6 +203,7 @@ impl Gimlet { commands, inner_tasks, responses_sent_count: None, + last_request_handled, }); }; @@ -257,6 +276,7 @@ impl Gimlet { gimlet.common.serial_number.clone(), incoming_console_tx, commands_rx, + Arc::clone(&last_request_handled), log, ); inner_tasks @@ -271,12 +291,17 @@ impl Gimlet { commands, inner_tasks, responses_sent_count: Some(responses_sent_count), + last_request_handled, }) } pub fn serial_console_addr(&self, component: &str) -> Option { self.serial_console_addrs.get(component).copied() } + + pub fn last_request_handled(&self) -> Option { + *self.last_request_handled.lock().unwrap() + } } struct SerialConsoleTcpTask { @@ -459,9 +484,11 @@ struct UdpTask { handler: Arc>, commands: mpsc::UnboundedReceiver, responses_sent_count: watch::Sender, + last_request_handled: Arc>>, } impl UdpTask { + #[allow(clippy::too_many_arguments)] fn new( servers: [UdpServer; 2], components: Vec, @@ -469,6 +496,7 @@ impl UdpTask { serial_number: String, incoming_serial_console: HashMap>>, commands: mpsc::UnboundedReceiver, + last_request_handled: Arc>>, log: Logger, ) -> (Self, Arc>, watch::Receiver) { let [udp0, udp1] = servers; @@ -488,6 +516,7 @@ impl UdpTask { handler: Arc::clone(&handler), commands, responses_sent_count, + last_request_handled, }, handler, responses_sent_count_rx, @@ -513,16 +542,27 @@ impl UdpTask { } recv0 = self.udp0.recv_from(), if throttle_count > 0 => { - if let Some((resp, addr)) = server::handle_request( - &mut *self.handler.lock().await, - recv0, - &mut out_buf, - responsiveness, - SpPort::One, - ).await? { + let (result, handled_request) = { + let mut handler = self.handler.lock().await; + handler.last_request_handled = None; + let result = server::handle_request( + &mut *handler, + recv0, + &mut out_buf, + responsiveness, + SpPort::One, + ).await?; + (result, + handler.last_request_handled.unwrap_or( + SimSpHandledRequest::NotImplemented, + )) + }; + if let Some((resp, addr)) = result { throttle_count -= 1; self.udp0.send_to(resp, addr).await?; self.responses_sent_count.send_modify(|n| *n += 1); + *self.last_request_handled.lock().unwrap() = + Some(handled_request); } } @@ -594,6 +634,8 @@ struct Handler { update_state: SimSpUpdate, reset_pending: Option, + last_request_handled: Option, + // To simulate an SP reset, we should (after doing whatever housekeeping we // need to track the reset) intentionally _fail_ to respond to the request, // simulating a `-> !` function on the SP that triggers a reset. To provide @@ -635,6 +677,7 @@ impl Handler { startup_options: StartupOptions::empty(), update_state: SimSpUpdate::default(), reset_pending: None, + last_request_handled: None, should_fail_to_respond_signal: None, } } @@ -1003,6 +1046,8 @@ impl SpHandler for Handler { "port" => ?port, "component" => ?component, ); + self.last_request_handled = + Some(SimSpHandledRequest::ComponentUpdateStatus(component)); Ok(self.update_state.status()) } diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index 87643af9a8..8a8418b84d 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -15,6 +15,7 @@ use async_trait::async_trait; pub use config::Config; use gateway_messages::SpPort; pub use gimlet::Gimlet; +pub use gimlet::SimSpHandledRequest; pub use gimlet::SIM_GIMLET_BOARD; pub use server::logger; pub use sidecar::Sidecar; From e72625c9c40e66605be0a81585f9a1956e0a9122 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 1 Feb 2024 12:01:38 -0500 Subject: [PATCH 75/91] Add a background task for update plan execution (#4891) This PR is the first step in creating a background task that is capable of taking a `Blueprint` and then reifying that blueprint into deployed or updated software. This PR uses the initial version of a Blueprint introduced in #4804. A basic executor that sends the related `OmicronZonesConfig` to the appropriate sled-agents for newly added sleds was created. A background task that loads the target `Blueprint` from the database and feeds it to the executor is also included, along with a test for each. --- common/src/nexus_config.rs | 36 +- dev-tools/omdb/src/bin/omdb/nexus.rs | 2 + dev-tools/omdb/tests/env.out | 24 + dev-tools/omdb/tests/successes.out | 22 + nexus/examples/config.toml | 2 + .../src/app/background/blueprint_execution.rs | 430 ++++++++++++++++++ nexus/src/app/background/blueprint_load.rs | 291 ++++++++++++ nexus/src/app/background/common.rs | 2 +- nexus/src/app/background/init.rs | 37 ++ nexus/src/app/background/mod.rs | 2 + nexus/src/app/mod.rs | 1 + nexus/tests/config.test.toml | 2 + smf/nexus/multi-sled/config-partial.toml | 2 + smf/nexus/single-sled/config-partial.toml | 2 + 14 files changed, 848 insertions(+), 7 deletions(-) create mode 100644 nexus/src/app/background/blueprint_execution.rs create mode 100644 nexus/src/app/background/blueprint_load.rs diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index e987790a21..24f4c34797 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -334,6 +334,8 @@ pub struct BackgroundTaskConfig { pub inventory: InventoryConfig, /// configuration for phantom disks task pub phantom_disks: PhantomDiskConfig, + /// configuration for blueprint related tasks + pub blueprints: BlueprintTasksConfig, /// configuration for service zone nat sync task pub sync_service_zone_nat: SyncServiceZoneNatConfig, /// configuration for the bfd manager task @@ -428,6 +430,20 @@ pub struct PhantomDiskConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct BlueprintTasksConfig { + /// period (in seconds) for periodic activations of the background task that + /// reads the latest target blueprint from the database + #[serde_as(as = "DurationSeconds")] + pub period_secs_load: Duration, + + /// period (in seconds) for periodic activations of the background task that + /// executes the latest target blueprint + #[serde_as(as = "DurationSeconds")] + pub period_secs_execute: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -528,12 +544,12 @@ impl std::fmt::Display for SchemeName { mod test { use super::{ default_techport_external_server_port, AuthnConfig, - BackgroundTaskConfig, Config, ConfigDropshotWithTls, ConsoleConfig, - Database, DeploymentConfig, DnsTasksConfig, DpdConfig, - ExternalEndpointsConfig, InternalDns, InventoryConfig, LoadError, - LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig, - PhantomDiskConfig, SchemeName, TimeseriesDbConfig, Tunables, - UpdatesConfig, + BackgroundTaskConfig, BlueprintTasksConfig, Config, + ConfigDropshotWithTls, ConsoleConfig, Database, DeploymentConfig, + DnsTasksConfig, DpdConfig, ExternalEndpointsConfig, InternalDns, + InventoryConfig, LoadError, LoadErrorKind, MgdConfig, NatCleanupConfig, + PackageConfig, PhantomDiskConfig, SchemeName, TimeseriesDbConfig, + Tunables, UpdatesConfig, }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; use crate::api::internal::shared::SwitchLocation; @@ -687,6 +703,8 @@ mod test { inventory.nkeep = 11 inventory.disable = false phantom_disks.period_secs = 30 + blueprints.period_secs_load = 10 + blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" @@ -795,6 +813,10 @@ mod test { phantom_disks: PhantomDiskConfig { period_secs: Duration::from_secs(30), }, + blueprints: BlueprintTasksConfig { + period_secs_load: Duration::from_secs(10), + period_secs_execute: Duration::from_secs(60) + }, sync_service_zone_nat: SyncServiceZoneNatConfig { period_secs: Duration::from_secs(30) } @@ -857,6 +879,8 @@ mod test { inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 + blueprints.period_secs_load = 10 + blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index ea89923caa..f00c05f1ec 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -256,6 +256,8 @@ async fn cmd_nexus_background_tasks_show( "dns_servers_external", "dns_propagation_external", "nat_v4_garbage_collector", + "blueprint_loader", + "blueprint_executor", ] { if let Some(bgtask) = tasks.remove(name) { print_task(&bgtask); diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 878b3f04dd..72e9d2e8fc 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -28,6 +28,14 @@ task: "bfd_manager" switches +task: "blueprint_executor" + Executes the target blueprint + + +task: "blueprint_loader" + Loads the current target blueprint from the DB + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -106,6 +114,14 @@ task: "bfd_manager" switches +task: "blueprint_executor" + Executes the target blueprint + + +task: "blueprint_loader" + Loads the current target blueprint from the DB + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -171,6 +187,14 @@ task: "bfd_manager" switches +task: "blueprint_executor" + Executes the target blueprint + + +task: "blueprint_loader" + Loads the current target blueprint from the DB + + task: "dns_config_external" watches external DNS data stored in CockroachDB diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index e5a38049f3..dc77ade735 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -222,6 +222,14 @@ task: "bfd_manager" switches +task: "blueprint_executor" + Executes the target blueprint + + +task: "blueprint_loader" + Loads the current target blueprint from the DB + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -344,6 +352,20 @@ task: "nat_v4_garbage_collector" started at (s ago) and ran for ms warning: unknown background task: "nat_v4_garbage_collector" (don't know how to interpret details: Null) +task: "blueprint_loader" + configured period: every 1m 40s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "blueprint_loader" (don't know how to interpret details: Object {"status": String("no target blueprint")}) + +task: "blueprint_executor" + configured period: every 10m + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms + last completion reported error: no blueprint + task: "bfd_manager" configured period: every 30s currently executing: no diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 1cfe3ae8a2..4263c34f3d 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -106,6 +106,8 @@ inventory.nkeep = 5 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +blueprints.period_secs_load = 10 +blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs new file mode 100644 index 0000000000..8d6ea8d8ce --- /dev/null +++ b/nexus/src/app/background/blueprint_execution.rs @@ -0,0 +1,430 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for realizing a plan blueprint + +use super::common::BackgroundTask; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::stream; +use futures::FutureExt; +use futures::StreamExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::{Blueprint, BlueprintTarget, OmicronZonesConfig}; +use serde_json::json; +use sled_agent_client::Client as SledAgentClient; +use slog::Logger; +use std::collections::BTreeMap; +use std::sync::Arc; +use tokio::sync::watch; +use uuid::Uuid; + +/// Background task that takes a [`Blueprint`] and realizes the change to +/// the state of the system based on the `Blueprint`. +pub struct BlueprintExecutor { + datastore: Arc, + rx_blueprint: watch::Receiver>>, +} + +impl BlueprintExecutor { + pub fn new( + datastore: Arc, + rx_blueprint: watch::Receiver< + Option>, + >, + ) -> BlueprintExecutor { + BlueprintExecutor { datastore, rx_blueprint } + } + + // This is a modified copy of the functionality from `nexus/src/app/sled.rs`. + // There's no good way to access this functionality right now since it is a + // method on the `Nexus` type. We want to have a more constrained type we can + // pass into background tasks for this type of functionality, but for now we + // just copy the functionality. + async fn sled_client( + &self, + opctx: &OpContext, + sled_id: &Uuid, + ) -> Result { + let (.., sled) = LookupPath::new(opctx, &self.datastore) + .sled_id(*sled_id) + .fetch() + .await + .with_context(|| { + format!( + "Failed to create sled_agent::Client for sled_id: {}", + sled_id + ) + })?; + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .unwrap(); + Ok(SledAgentClient::new_with_client( + &format!("http://{}", sled.address()), + client, + opctx.log.clone(), + )) + } + + async fn realize_blueprint( + &self, + opctx: &OpContext, + blueprint: &Blueprint, + ) -> Result<(), Vec> { + let log = opctx.log.new(o!("comment" => blueprint.comment.clone())); + self.deploy_zones(&log, opctx, &blueprint.omicron_zones).await + } + + async fn deploy_zones( + &self, + log: &Logger, + opctx: &OpContext, + zones: &BTreeMap, + ) -> Result<(), Vec> { + let errors: Vec<_> = stream::iter(zones.clone()) + .filter_map(|(sled_id, config)| async move { + let client = match self.sled_client(&opctx, &sled_id).await { + Ok(client) => client, + Err(err) => { + warn!(log, "{err:#}"); + return Some(err); + } + }; + let result = client + .omicron_zones_put(&config) + .await + .with_context(|| { + format!("Failed to put {config:#?} to sled {sled_id}") + }); + + match result { + Err(error) => { + warn!(log, "{error:#}"); + Some(error) + } + Ok(_) => { + info!( + log, + "Successfully deployed zones for sled agent"; + "sled_id" => %sled_id, + "generation" => config.generation.to_string() + ); + None + } + } + }) + .collect() + .await; + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } + } +} + +impl BackgroundTask for BlueprintExecutor { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + // Get the latest blueprint, cloning to prevent holding a read lock + // on the watch. + let update = self.rx_blueprint.borrow_and_update().clone(); + + let Some(update) = update else { + warn!(&opctx.log, + "Blueprint execution: skipped"; + "reason" => "no blueprint"); + return json!({"error": "no blueprint" }); + }; + + let (bp_target, blueprint) = &*update; + if !bp_target.enabled { + warn!(&opctx.log, + "Blueprint execution: skipped"; + "reason" => "blueprint disabled", + "target_id" => %blueprint.id); + return json!({ + "target_id": blueprint.id.to_string(), + "error": "blueprint disabled" + }); + } + + let result = self.realize_blueprint(opctx, blueprint).await; + + // Return the result as a `serde_json::Value` + match result { + Ok(()) => json!({}), + Err(errors) => { + let errors: Vec<_> = errors + .into_iter() + .map(|e| format!("{:#}", e)) + .collect(); + json!({ + "target_id": blueprint.id.to_string(), + "errors": errors + }) + } + } + } + .boxed() + } +} +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::common::BackgroundTask; + use httptest::matchers::{all_of, json_decoded, request}; + use httptest::responders::status_code; + use httptest::Expectation; + use nexus_db_model::{ + ByteCount, SledBaseboard, SledSystemHardware, SledUpdate, + }; + use nexus_test_utils_macros::nexus_test; + use nexus_types::inventory::{ + OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, + }; + use omicron_common::api::external::Generation; + use serde::Deserialize; + use std::collections::BTreeSet; + use std::net::SocketAddr; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + fn create_blueprint( + omicron_zones: BTreeMap, + ) -> (BlueprintTarget, Blueprint) { + let id = Uuid::new_v4(); + ( + BlueprintTarget { + target_id: id, + enabled: true, + time_made_target: chrono::Utc::now(), + }, + Blueprint { + id, + omicron_zones, + zones_in_service: BTreeSet::new(), + parent_blueprint_id: None, + time_created: chrono::Utc::now(), + creator: "test".to_string(), + comment: "test blueprint".to_string(), + }, + ) + } + + #[nexus_test(server = crate::Server)] + async fn test_deploy_omicron_zones(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let (blueprint_tx, blueprint_rx) = watch::channel(None); + let mut task = BlueprintExecutor::new(datastore.clone(), blueprint_rx); + + // With no blueprint we should fail with an appropriate message. + let value = task.activate(&opctx).await; + assert_eq!(value, json!({"error": "no blueprint"})); + + // Get a success (empty) result back when the blueprint has an empty set of zones + let blueprint = Arc::new(create_blueprint(BTreeMap::new())); + blueprint_tx.send(Some(blueprint)).unwrap(); + let value = task.activate(&opctx).await; + assert_eq!(value, json!({})); + + // Create some fake sled-agent servers to respond to zone puts and add + // sleds to CRDB. + let mut s1 = httptest::Server::run(); + let mut s2 = httptest::Server::run(); + let sled_id1 = Uuid::new_v4(); + let sled_id2 = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + for (i, (sled_id, server)) in + [(sled_id1, &s1), (sled_id2, &s2)].iter().enumerate() + { + let SocketAddr::V6(addr) = server.addr() else { + panic!("Expected Ipv6 address. Got {}", server.addr()); + }; + let update = SledUpdate::new( + *sled_id, + addr, + SledBaseboard { + serial_number: i.to_string(), + part_number: "test".into(), + revision: 1, + }, + SledSystemHardware { + is_scrimlet: false, + usable_hardware_threads: 4, + usable_physical_ram: ByteCount(1000.into()), + reservoir_size: ByteCount(999.into()), + }, + rack_id, + ); + datastore + .sled_upsert(update) + .await + .expect("Failed to insert sled to db"); + } + + // The particular dataset doesn't matter for this test. + // We re-use the same one to not obfuscate things + let dataset = OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()).parse().unwrap(), + }; + + let generation = Generation::new(); + + // Zones are updated in a particular order, but each request contains + // the full set of zones that must be running. + // See `rack_setup::service::ServiceInner::run` for more details. + let mut zones = OmicronZonesConfig { + generation, + zones: vec![OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalDns { + dataset, + dns_address: "oh-hello-internal-dns".into(), + gz_address: "::1".parse().unwrap(), + gz_address_index: 0, + http_address: "some-ipv6-address".into(), + }, + }], + }; + + // Create a blueprint with only the `InternalDns` zone for both servers + // We reuse the same `OmicronZonesConfig` because the details don't + // matter for this test. + let blueprint = Arc::new(create_blueprint(BTreeMap::from([ + (sled_id1, zones.clone()), + (sled_id2, zones.clone()), + ]))); + + // Send the blueprint with the first set of zones to the task + blueprint_tx.send(Some(blueprint)).unwrap(); + + // Check that the initial requests were sent to the fake sled-agents + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(all_of![ + request::method_path("PUT", "/omicron-zones",), + // Our generation number should be 1 and there should + // be only a single zone. + request::body(json_decoded(|c: &OmicronZonesConfig| { + c.generation == 1u32.into() && c.zones.len() == 1 + })) + ]) + .respond_with(status_code(204)), + ); + } + + // Activate the task to trigger zone configuration on the sled-agents + let value = task.activate(&opctx).await; + assert_eq!(value, json!({})); + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Do it again. This should trigger the same request. + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-zones", + )) + .respond_with(status_code(204)), + ); + } + let value = task.activate(&opctx).await; + assert_eq!(value, json!({})); + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Take another lap, but this time, have one server fail the request and + // try again. + s1.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-zones", + )) + .respond_with(status_code(204)), + ); + s2.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-zones", + )) + .respond_with(status_code(500)), + ); + + // Define a type we can use to pick stuff out of error objects. + #[derive(Deserialize)] + struct ErrorResult { + errors: Vec, + } + + let value = task.activate(&opctx).await; + println!("{:?}", value); + let result: ErrorResult = serde_json::from_value(value).unwrap(); + assert_eq!(result.errors.len(), 1); + assert!( + result.errors[0].starts_with("Failed to put OmicronZonesConfig") + ); + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Add an `InternalNtp` zone for our next update + zones.generation = generation.next(); + zones.zones.push(OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalNtp { + address: "::1".into(), + dns_servers: vec!["::1".parse().unwrap()], + domain: None, + ntp_servers: vec!["some-ntp-server-addr".into()], + }, + }); + + // Update our watch channel + let blueprint = Arc::new(create_blueprint(BTreeMap::from([ + (sled_id1, zones.clone()), + (sled_id2, zones.clone()), + ]))); + blueprint_tx.send(Some(blueprint)).unwrap(); + + // Set our new expectations + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(all_of![ + request::method_path("PUT", "/omicron-zones",), + // Our generation number should be bumped and there should + // be two zones. + request::body(json_decoded(|c: &OmicronZonesConfig| { + c.generation == 2u32.into() && c.zones.len() == 2 + })) + ]) + .respond_with(status_code(204)), + ); + } + + // Activate the task + let value = task.activate(&opctx).await; + assert_eq!(value, json!({})); + s1.verify_and_clear(); + s2.verify_and_clear(); + } +} diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/blueprint_load.rs new file mode 100644 index 0000000000..c34d2ab103 --- /dev/null +++ b/nexus/src/app/background/blueprint_load.rs @@ -0,0 +1,291 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for loading the target blueprint from the DB +//! +//! This task triggers the `blueprint_execution` background task when the +//! blueprint changes. + +use super::common::BackgroundTask; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::{Blueprint, BlueprintTarget}; +use serde_json::json; +use std::sync::Arc; +use tokio::sync::watch; + +pub struct TargetBlueprintLoader { + datastore: Arc, + last: Option>, + tx: watch::Sender>>, +} + +impl TargetBlueprintLoader { + pub fn new(datastore: Arc) -> TargetBlueprintLoader { + let (tx, _) = watch::channel(None); + TargetBlueprintLoader { datastore, last: None, tx } + } + + /// Expose the target blueprint + pub fn watcher( + &self, + ) -> watch::Receiver>> { + self.tx.subscribe() + } +} + +impl BackgroundTask for TargetBlueprintLoader { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + // Set up a logger for this activation that includes metadata about + // the current target. + let log = match &self.last { + None => opctx.log.clone(), + Some(old) => opctx.log.new(o!( + "original_target_id" => old.1.id.to_string(), + "original_time_created" => old.1.time_created.to_string(), + )), + }; + + // Retrieve the latest target blueprint + let result = + self.datastore.blueprint_target_get_current_full(opctx).await; + + // Decide what to do with the result + match (&mut self.last, result) { + (_, Err(error)) => { + // We failed to read the blueprint. There's nothing to do + // but log an error. We'll retry when we're activated again. + let message = format!("{:#}", error); + warn!( + &log, + "failed to read target blueprint"; + "error" => &message + ); + let e = + format!("failed to read target blueprint: {message}"); + json!({"error": e}) + } + (None, Ok(None)) => { + // We haven't found a blueprint yet. Do nothing. + json!({"status": "no target blueprint"}) + } + (Some(old), Ok(None)) => { + // We have transitioned from having a blueprint to not + // having one. This should not happen. + let message = format!( + "target blueprint with id {} was removed. There is no \ + longer any target blueprint", + old.1.id + ); + let old_id = old.1.id.to_string(); + self.last = None; + self.tx.send_replace(self.last.clone()); + error!(&log, "{message:?}"); + json!({ + "removed_target_id": old_id, + "status": "no target blueprint (removed)", + "error": message + }) + } + (None, Ok(Some((new_bp_target, new_blueprint)))) => { + // We've found a target blueprint for the first time. + // Save it and notify any watchers. + let target_id = new_blueprint.id.to_string(); + let time_created = new_blueprint.time_created.to_string(); + info!( + log, + "found new target blueprint (first find)"; + "target_id" => &target_id, + "time_created" => &time_created + ); + self.last = Some(Arc::new((new_bp_target, new_blueprint))); + self.tx.send_replace(self.last.clone()); + json!({ + "target_id": target_id, + "time_created": time_created, + "time_found": chrono::Utc::now().to_string(), + "status": "first target blueprint" + }) + } + (Some(old), Ok(Some((new_bp_target, new_blueprint)))) => { + let target_id = new_blueprint.id.to_string(); + let time_created = new_blueprint.time_created.to_string(); + if old.1.id != new_blueprint.id { + // The current target blueprint has been updated + info!( + log, + "found new target blueprint"; + "target_id" => &target_id, + "time_created" => &time_created + ); + self.last = + Some(Arc::new((new_bp_target, new_blueprint))); + self.tx.send_replace(self.last.clone()); + json!({ + "target_id": target_id, + "time_created": time_created, + "time_found": chrono::Utc::now().to_string(), + "status": "target blueprint updated" + }) + } else { + // The new target id matches the old target id + // + // Let's see if the blueprints hold the same contents. + // It should not be possible for the contents of a + // blueprint to change, but we check to catch possible + // bugs further up the stack. + if old.1 != new_blueprint { + let message = format!( + "blueprint for id {} changed. \ + Blueprints are supposed to be immutable.", + target_id + ); + error!(&log, "{}", message); + json!({ + "target_id": target_id, + "status": "target blueprint unchanged (error)", + "error": message + }) + } else { + // We found a new target blueprint that exactly matches + // the old target blueprint. This is the common case + // when we're activated by a timeout. + debug!( + log, + "found latest target blueprint (unchanged)"; + "target_id" => &target_id, + "time_created" => &time_created.clone() + ); + json!({ + "target_id": target_id, + "time_created": time_created, + "status": "target blueprint unchanged" + }) + } + } + } + } + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::app::background::common::BackgroundTask; + use nexus_inventory::now_db_precision; + use nexus_test_utils_macros::nexus_test; + use nexus_types::deployment::{Blueprint, BlueprintTarget}; + use serde::Deserialize; + use std::collections::{BTreeMap, BTreeSet}; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + fn create_blueprint( + parent_blueprint_id: Option, + ) -> (BlueprintTarget, Blueprint) { + let id = Uuid::new_v4(); + ( + BlueprintTarget { + target_id: id, + enabled: true, + time_made_target: now_db_precision(), + }, + Blueprint { + id, + omicron_zones: BTreeMap::new(), + zones_in_service: BTreeSet::new(), + parent_blueprint_id, + time_created: now_db_precision(), + creator: "test".to_string(), + comment: "test blueprint".to_string(), + }, + ) + } + + #[derive(Deserialize)] + #[allow(unused)] + struct TargetUpdate { + pub target_id: Uuid, + pub time_created: chrono::DateTime, + pub time_found: Option>, + pub status: String, + } + + #[nexus_test(server = crate::Server)] + async fn test_load_blueprints(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let mut task = TargetBlueprintLoader::new(datastore.clone()); + let mut rx = task.watcher(); + + // We expect an appropriate status with no blueprint in the datastore + let value = task.activate(&opctx).await; + assert_eq!(json!({"status": "no target blueprint"}), value); + assert!(rx.borrow().is_none()); + + let (target, blueprint) = create_blueprint(None); + + // Inserting a blueprint, but not making it the target returns the same + // status + datastore.blueprint_insert(&opctx, &blueprint).await.unwrap(); + let value = task.activate(&opctx).await; + assert_eq!(json!({"status": "no target blueprint"}), value); + assert!(rx.borrow().is_none()); + + // Setting a target blueprint makes the loader see it and broadcast it + datastore.blueprint_target_set_current(&opctx, target).await.unwrap(); + let value = task.activate(&opctx).await; + let update = serde_json::from_value::(value).unwrap(); + assert_eq!(update.target_id, blueprint.id); + assert_eq!(update.status, "first target blueprint"); + let rx_update = rx.borrow_and_update().clone().unwrap(); + assert_eq!(rx_update.0, target); + assert_eq!(rx_update.1, blueprint); + + // Activation without changing the target blueprint results in no update + let value = task.activate(&opctx).await; + let update = serde_json::from_value::(value).unwrap(); + assert_eq!(update.target_id, blueprint.id); + assert_eq!(update.status, "target blueprint unchanged"); + assert_eq!(false, rx.has_changed().unwrap()); + + // Adding a new blueprint and updating the target triggers a change + let (new_target, new_blueprint) = create_blueprint(Some(blueprint.id)); + datastore.blueprint_insert(&opctx, &new_blueprint).await.unwrap(); + datastore + .blueprint_target_set_current(&opctx, new_target) + .await + .unwrap(); + let value = task.activate(&opctx).await; + let update = serde_json::from_value::(value).unwrap(); + assert_eq!(update.target_id, new_blueprint.id); + assert_eq!(update.status, "target blueprint updated"); + let rx_update = rx.borrow_and_update().clone().unwrap(); + assert_eq!(rx_update.0, new_target); + assert_eq!(rx_update.1, new_blueprint); + + // Activating again without changing the target blueprint results in + // no update + let value = task.activate(&opctx).await; + let update = serde_json::from_value::(value).unwrap(); + assert_eq!(update.target_id, new_blueprint.id); + assert_eq!(update.status, "target blueprint unchanged"); + assert_eq!(false, rx.has_changed().unwrap()); + } +} diff --git a/nexus/src/app/background/common.rs b/nexus/src/app/background/common.rs index 4fcce74714..f954a35639 100644 --- a/nexus/src/app/background/common.rs +++ b/nexus/src/app/background/common.rs @@ -408,7 +408,7 @@ impl TaskExec { start_time, start_instant, reason, - iteration: iteration, + iteration, }); }); diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 6eacb07dfa..95fe5c933e 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -5,6 +5,8 @@ //! Background task initialization use super::bfd; +use super::blueprint_execution; +use super::blueprint_load; use super::common; use super::dns_config; use super::dns_propagation; @@ -62,6 +64,12 @@ pub struct BackgroundTasks { /// task handle for the task that detects phantom disks pub task_phantom_disks: common::TaskHandle, + /// task handle for blueprint target loader + pub task_blueprint_loader: common::TaskHandle, + + /// task handle for blueprint execution background task + pub task_blueprint_executor: common::TaskHandle, + /// task handle for the service zone nat tracker pub task_service_zone_nat_tracker: common::TaskHandle, } @@ -192,6 +200,33 @@ impl BackgroundTasks { task }; + // Background task: blueprint loader + let blueprint_loader = + blueprint_load::TargetBlueprintLoader::new(datastore.clone()); + let rx_blueprint = blueprint_loader.watcher(); + let task_blueprint_loader = driver.register( + String::from("blueprint_loader"), + String::from("Loads the current target blueprint from the DB"), + config.blueprints.period_secs_load, + Box::new(blueprint_loader), + opctx.child(BTreeMap::new()), + vec![], + ); + + // Background task: blueprint executor + let blueprint_executor = blueprint_execution::BlueprintExecutor::new( + datastore.clone(), + rx_blueprint.clone(), + ); + let task_blueprint_executor = driver.register( + String::from("blueprint_executor"), + String::from("Executes the target blueprint"), + config.blueprints.period_secs_execute, + Box::new(blueprint_executor), + opctx.child(BTreeMap::new()), + vec![Box::new(rx_blueprint)], + ); + let task_service_zone_nat_tracker = { driver.register( "service_zone_nat_tracker".to_string(), @@ -220,6 +255,8 @@ impl BackgroundTasks { bfd_manager, task_inventory_collection, task_phantom_disks, + task_blueprint_loader, + task_blueprint_executor, task_service_zone_nat_tracker, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index dc9eff7d79..2c5fa0ab3c 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -5,6 +5,8 @@ //! Background tasks mod bfd; +mod blueprint_execution; +mod blueprint_load; mod common; mod dns_config; mod dns_propagation; diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 65525557b3..c9ca4db73e 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -361,6 +361,7 @@ impl Nexus { authn::Context::internal_api(), Arc::clone(&db_datastore), ); + let background_tasks = background::BackgroundTasks::start( &background_ctx, Arc::clone(&db_datastore), diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index a795f57f4c..3571388747 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -100,6 +100,8 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +blueprints.period_secs_load = 100 +blueprints.period_secs_execute = 600 sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 2eed205ddf..8fc2429169 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -48,6 +48,8 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +blueprints.period_secs_load = 10 +blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 53bdeaadd6..15f0a4ebe1 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -48,6 +48,8 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +blueprints.period_secs_load = 10 +blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] From 5208d216d63d51c32901db7607ce3466ad8ab087 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 1 Feb 2024 09:35:52 -0800 Subject: [PATCH 76/91] [wicketd] replace snafu with thiserror (#4950) This is currently the only use of snafu in omicron (outside of transitive deps), and I figure it is simple enough that we can replace it with thiserror, reducing use of a direct dependency. This was the first time I'd encountered snafu so I read a bit about it. As far as I understand, the main benefit of snafu is that it pushes you heavily towards an error-per-module pattern. However, thiserror does permit that pattern as well, and in practice it is only a little more verbose than snafu to do right (`map_err` vs `context`, though `snafu` introduces new types that aren't in the source code like `IoSnafu` and `ParseSnafu`). --- Cargo.lock | 1 - Cargo.toml | 1 - wicketd/Cargo.toml | 1 - wicketd/src/config.rs | 36 +++++++++++++++++++++++++----------- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25fb63604e..50628dd1af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10302,7 +10302,6 @@ dependencies = [ "sled-hardware", "slog", "slog-dtrace", - "snafu", "subprocess", "tar", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index f7c5b11aba..3d22af19a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -355,7 +355,6 @@ slog-envlogger = "2.2" slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } slog-term = "2.9" smf = "0.2" -snafu = "0.7" sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } diff --git a/wicketd/Cargo.toml b/wicketd/Cargo.toml index 83e7bf33ca..26e54eb3bc 100644 --- a/wicketd/Cargo.toml +++ b/wicketd/Cargo.toml @@ -42,7 +42,6 @@ tokio-stream.workspace = true tokio-util.workspace = true tough.workspace = true trust-dns-resolver.workspace = true -snafu.workspace = true toml.workspace = true uuid.workspace = true diff --git a/wicketd/src/config.rs b/wicketd/src/config.rs index 613da904d8..dda0688c3d 100644 --- a/wicketd/src/config.rs +++ b/wicketd/src/config.rs @@ -4,11 +4,10 @@ //! Configuration related types used by wicketd +use camino::{Utf8Path, Utf8PathBuf}; use dropshot::ConfigLogging; use serde::{Deserialize, Serialize}; -use snafu::prelude::*; -use std::path::Path; -use std::path::PathBuf; +use thiserror::Error; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Config { @@ -19,17 +18,32 @@ impl Config { /// Load a `Config` from the given TOML file /// /// This config object can be used to create a wicketd server. - pub fn from_file>(path: P) -> Result { + pub fn from_file>( + path: P, + ) -> Result { let path = path.as_ref(); - let data = std::fs::read_to_string(path).context(IoSnafu { path })?; - toml::from_str(&data).context(ParseSnafu { path }) + let data = std::fs::read_to_string(path).map_err(|error| { + ConfigError::Io { error, path: path.to_owned() } + })?; + toml::from_str(&data).map_err(|error| ConfigError::Parse { + error, + path: path.to_owned(), + }) } } -#[derive(Debug, Snafu)] +#[derive(Debug, Error)] pub enum ConfigError { - #[snafu(display("Failed to read config file: {}", path.display()))] - Io { source: std::io::Error, path: PathBuf }, - #[snafu(display("Failed to parse config file: {}", path.display()))] - Parse { source: toml::de::Error, path: PathBuf }, + #[error("Failed to read config file: {path}")] + Io { + #[source] + error: std::io::Error, + path: Utf8PathBuf, + }, + #[error("Failed to parse config file: {path}")] + Parse { + #[source] + error: toml::de::Error, + path: Utf8PathBuf, + }, } From 2cbc6b8b3de63f1e332859884f9bf4a2f2d2e1da Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:31:41 -0800 Subject: [PATCH 77/91] Update Rust crate itertools to 0.12.1 (#4926) --- Cargo.lock | 28 ++++++++++++++-------------- Cargo.toml | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50628dd1af..1ef3bdfd3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2743,7 +2743,7 @@ dependencies = [ "fixedbitset", "guppy-workspace-hack", "indexmap 2.2.2", - "itertools 0.12.0", + "itertools 0.12.1", "nested", "once_cell", "pathdiff", @@ -3435,7 +3435,7 @@ dependencies = [ "installinator-artifact-client", "installinator-common", "ipcc", - "itertools 0.12.0", + "itertools 0.12.1", "libc", "omicron-common", "omicron-test-utils", @@ -3658,9 +3658,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -4296,7 +4296,7 @@ dependencies = [ "illumos-utils", "internal-dns", "ipnetwork", - "itertools 0.12.0", + "itertools 0.12.1", "macaddr", "newtype_derive", "nexus-db-model", @@ -4942,7 +4942,7 @@ dependencies = [ "illumos-utils", "internal-dns", "ipnetwork", - "itertools 0.12.0", + "itertools 0.12.1", "macaddr", "mg-admin-client", "mime_guess", @@ -5160,7 +5160,7 @@ dependencies = [ "installinator-common", "internal-dns", "ipnetwork", - "itertools 0.12.0", + "itertools 0.12.1", "key-manager", "libc", "macaddr", @@ -5687,7 +5687,7 @@ dependencies = [ "futures", "highway", "indexmap 2.2.2", - "itertools 0.12.0", + "itertools 0.12.1", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", @@ -6799,7 +6799,7 @@ dependencies = [ "cassowary", "crossterm", "indoc 2.0.3", - "itertools 0.12.0", + "itertools 0.12.1", "lru", "paste", "stability", @@ -6896,7 +6896,7 @@ dependencies = [ "chrono", "crossterm", "fd-lock 3.0.13", - "itertools 0.12.0", + "itertools 0.12.1", "nu-ansi-term", "serde", "strip-ansi-escapes", @@ -8433,7 +8433,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" dependencies = [ - "itertools 0.12.0", + "itertools 0.12.1", "nom", "unicode_categories", ] @@ -9516,7 +9516,7 @@ dependencies = [ "futures", "hex", "hubtools", - "itertools 0.12.0", + "itertools 0.12.1", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", @@ -10178,7 +10178,7 @@ dependencies = [ "humantime", "indexmap 2.2.2", "indicatif", - "itertools 0.12.0", + "itertools 0.12.1", "omicron-common", "omicron-passwords", "omicron-workspace-hack", @@ -10284,7 +10284,7 @@ dependencies = [ "installinator-common", "internal-dns", "ipnetwork", - "itertools 0.12.0", + "itertools 0.12.1", "maplit", "omicron-certificates", "omicron-common", diff --git a/Cargo.toml b/Cargo.toml index 3d22af19a9..fcc9f4920d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -235,7 +235,7 @@ internal-dns = { path = "internal-dns" } ipcc = { path = "ipcc" } ipnet = "2.9" ipnetwork = { version = "0.20", features = ["schemars"] } -itertools = "0.12.0" +itertools = "0.12.1" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" libc = "0.2.152" From f50e3ad5d1a492949050c3433ad9840202dc059c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 1 Feb 2024 10:31:58 -0800 Subject: [PATCH 78/91] Update Rust crate libc to 0.2.153 (#4953) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1ef3bdfd3d..b41abce1f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3769,9 +3769,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.152" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libdlpi-sys" diff --git a/Cargo.toml b/Cargo.toml index fcc9f4920d..b83c8a44bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -238,7 +238,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] } itertools = "0.12.1" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" -libc = "0.2.152" +libc = "0.2.153" linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 658514e042..be82bc5fda 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -66,7 +66,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.152", features = ["extra_traits"] } +libc = { version = "0.2.153", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } @@ -174,7 +174,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.152", features = ["extra_traits"] } +libc = { version = "0.2.153", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } From b88e966e1ad0d860c0291741a11b56fcb5cf817b Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 1 Feb 2024 11:31:05 -0800 Subject: [PATCH 79/91] planner should wait on NTP zones before adding more zones (#4924) --- nexus/deployment/src/planner.rs | 111 +++++++++++++++++++++++++++++--- nexus/src/app/deployment.rs | 27 +++++++- 2 files changed, 127 insertions(+), 11 deletions(-) diff --git a/nexus/deployment/src/planner.rs b/nexus/deployment/src/planner.rs index f228a7a150..0a8e1f0b81 100644 --- a/nexus/deployment/src/planner.rs +++ b/nexus/deployment/src/planner.rs @@ -11,12 +11,23 @@ use crate::blueprint_builder::Ensure; use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; use nexus_types::deployment::Policy; +use nexus_types::inventory::Collection; use slog::{info, Logger}; pub struct Planner<'a> { log: Logger, policy: &'a Policy, blueprint: BlueprintBuilder<'a>, + // latest inventory collection + // + // We must be very careful when using this during planning. The real-world + // state may have changed since this inventory was collected. Planning + // choices should not depend on this still being totally accurate. + // + // If we do start depending on specific criteria (e.g., it contains + // information about all sleds that we expect), we should verify that up + // front and update callers to ensure that it's true. + inventory: &'a Collection, } impl<'a> Planner<'a> { @@ -25,10 +36,13 @@ impl<'a> Planner<'a> { parent_blueprint: &'a Blueprint, policy: &'a Policy, creator: &str, + // NOTE: Right now, we just assume that this is the latest inventory + // collection. See the comment on the corresponding field in `Planner`. + inventory: &'a Collection, ) -> Planner<'a> { let blueprint = BlueprintBuilder::new_based_on(parent_blueprint, policy, creator); - Planner { log, policy, blueprint } + Planner { log, policy, blueprint, inventory } } pub fn plan(mut self) -> Result { @@ -66,6 +80,41 @@ impl<'a> Planner<'a> { continue; } + // Now we've established that the current blueprint _says_ there's + // an NTP zone on this system. But we must wait for it to actually + // be there before we can proceed to add anything else. Otherwise, + // we may wind up trying to provision this zone at the same time as + // other zones, and Sled Agent will reject requests to provision + // other zones before the clock is synchronized. + // + // Note that it's always possible that the NTP zone was added after + // this inventory was collected (in which case we'll erroneously + // choose to bail out here, but we'll pick it up again next time + // we're invoked). It's conceivable that the NTP zone was removed + // after this inventory was collected (in which case we'd be making + // a wrong decision here). However, we don't ever do this today. + // If we were to do something like that (maybe as part of upgrading + // the NTP zone or switching between an internal NTP vs. boundary + // NTP zone), we'll need to be careful how we do it to avoid a + // problem here. + let has_ntp_inventory = self + .inventory + .omicron_zones + .get(&sled_id) + .map(|sled_zones| { + sled_zones.zones.zones.iter().any(|z| z.zone_type.is_ntp()) + }) + .unwrap_or(false); + if !has_ntp_inventory { + info!( + &self.log, + "parent blueprint contains NTP zone, but it's not in \ + inventory yet"; + "sled_id" => ?sled_id, + ); + continue; + } + // Every zpool on the sled should have a Crucible zone on it. let mut ncrucibles_added = 0; for zpool_name in &sled_info.zpools { @@ -106,9 +155,11 @@ mod test { use crate::blueprint_builder::test::example; use crate::blueprint_builder::test::policy_add_sled; use crate::blueprint_builder::BlueprintBuilder; + use nexus_inventory::now_db_precision; + use nexus_types::inventory::OmicronZoneType; + use nexus_types::inventory::OmicronZonesFound; use omicron_common::api::external::Generation; use omicron_test_utils::dev::test_setup_log; - use sled_agent_client::types::OmicronZoneType; /// Runs through a basic sequence of blueprints for adding a sled #[test] @@ -116,7 +167,7 @@ mod test { let logctx = test_setup_log("planner_basic_add_sled"); // Use our example inventory collection. - let (collection, mut policy) = example(); + let (mut collection, mut policy) = example(); // Build the initial blueprint. We don't bother verifying it here // because there's a separate test for that. @@ -135,6 +186,7 @@ mod test { &blueprint1, &policy, "no-op?", + &collection, ) .plan() .expect("failed to plan"); @@ -156,6 +208,7 @@ mod test { &blueprint2, &policy, "test: add NTP?", + &collection, ) .plan() .expect("failed to plan"); @@ -177,18 +230,55 @@ mod test { assert_eq!(diff.sleds_removed().count(), 0); assert_eq!(diff.sleds_changed().count(), 0); - // Check that the next step is to add Crucible zones + // Check that with no change in inventory, the planner makes no changes. + // It needs to wait for inventory to reflect the new NTP zone before + // proceeding. let blueprint4 = Planner::new_based_on( + logctx.log.clone(), + &blueprint3, + &policy, + "test: add nothing more", + &collection, + ) + .plan() + .expect("failed to plan"); + let diff = blueprint3.diff(&blueprint4); + println!("3 -> 4 (expected no changes):\n{}", diff); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_changed().count(), 0); + + // Now update the inventory to have the requested NTP zone. + assert!(collection + .omicron_zones + .insert( + new_sled_id, + OmicronZonesFound { + time_collected: now_db_precision(), + source: String::from("test suite"), + sled_id: new_sled_id, + zones: blueprint4 + .omicron_zones + .get(&new_sled_id) + .cloned() + .expect("blueprint should contain zones for new sled"), + } + ) + .is_none()); + + // Check that the next step is to add Crucible zones + let blueprint5 = Planner::new_based_on( logctx.log.clone(), &blueprint3, &policy, "test: add Crucible zones?", + &collection, ) .plan() .expect("failed to plan"); - let diff = blueprint3.diff(&blueprint4); - println!("3 -> 4 (expect Crucible zones):\n{}", diff); + let diff = blueprint3.diff(&blueprint5); + println!("3 -> 5 (expect Crucible zones):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); let sleds = diff.sleds_changed().collect::>(); @@ -210,17 +300,18 @@ mod test { } // Check that there are no more steps - let blueprint5 = Planner::new_based_on( + let blueprint6 = Planner::new_based_on( logctx.log.clone(), - &blueprint4, + &blueprint5, &policy, "test: no-op?", + &collection, ) .plan() .expect("failed to plan"); - let diff = blueprint4.diff(&blueprint5); - println!("4 -> 5 (expect no changes):\n{}", diff); + let diff = blueprint5.diff(&blueprint6); + println!("5 -> 6 (expect no changes):\n{}", diff); assert_eq!(diff.sleds_added().count(), 0); assert_eq!(diff.sleds_removed().count(), 0); assert_eq!(diff.sleds_changed().count(), 0); diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 70d6d242fb..65f8f4d028 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -17,12 +17,14 @@ use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; use nexus_types::identity::Asset; +use nexus_types::inventory::Collection; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; @@ -41,6 +43,7 @@ const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; struct PlanningContext { policy: Policy, creator: String, + inventory: Option, } impl super::Nexus { @@ -171,7 +174,25 @@ impl super::Nexus { }) .collect(); - Ok(PlanningContext { creator, policy: Policy { sleds } }) + // The choice of which inventory collection to use here is not + // necessarily trivial. Inventory collections may be incomplete due to + // transient (or even persistent) errors. It's not yet clear what + // general criteria we'll want to use in picking a collection here. But + // as of this writing, this is only used for one specific case, which is + // to implement a gate that prevents the planner from provisioning + // non-NTP zones on a sled unless we know there's an NTP zone already on + // that sled. For that purpose, it's okay if this collection is + // incomplete due to a transient error -- that would just prevent + // forward progress in the planner until the next time we try this. + // (Critically, it won't cause the planner to do anything wrong.) + let inventory = datastore + .inventory_get_latest_collection(opctx) + .await + .internal_context( + "fetching latest inventory collection for blueprint planner", + )?; + + Ok(PlanningContext { creator, policy: Policy { sleds }, inventory }) } async fn blueprint_add( @@ -222,11 +243,15 @@ impl super::Nexus { }; let planning_context = self.blueprint_planning_context(opctx).await?; + let inventory = planning_context.inventory.ok_or_else(|| { + Error::internal_error("no recent inventory collection found") + })?; let planner = Planner::new_based_on( opctx.log.clone(), &parent_blueprint, &planning_context.policy, &planning_context.creator, + &inventory, ); let blueprint = planner.plan().map_err(|error| { Error::internal_error(&format!( From 0531d6161ee2b670a0ab2a72f26727d4ec3cacaf Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 1 Feb 2024 12:00:43 -0800 Subject: [PATCH 80/91] Update how-to-run with new rack_network_config subnet (#4948) --- docs/how-to-run.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index c1f78a0521..6a0b8f79d5 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -298,7 +298,7 @@ routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. # For softnpu, an address within the "infra" block above that will be used for # the softnpu uplink port. You can just pick the first address in that pool. -addresses = ["192.168.1.30/32"] +addresses = ["192.168.1.30/24"] # Name of the uplink port. This should always be "qsfp0" when using softnpu. port = "qsfp0" # The speed of this port. From 8e26331a7502cbbeca4f5f871e24cb318aded382 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Thu, 1 Feb 2024 14:46:19 -0800 Subject: [PATCH 81/91] Validate instance hostnames in the Nexus API (#4938) - Add the `Hostname` type to the external API, which checks that a string conforms to RFC 1035 (and others) describing valid Internet hostnames. Add some tests for that. - Use the new type everywhere in Nexus and its integration tests, though we still serialize to a String in the database and when passing the instance information to the sled-agent. The sled-agent does its own validation internally through OPTE. - Add a few regression tests for creating instances with known-bad hostnames. --- common/Cargo.toml | 1 + common/src/api/external/mod.rs | 126 ++++++++++++++++- common/src/api/internal/nexus.rs | 7 +- end-to-end-tests/src/instance_launch.rs | 2 +- nexus/db-model/src/instance.rs | 7 +- nexus/db-queries/src/db/datastore/instance.rs | 6 +- .../db-queries/src/db/queries/external_ip.rs | 2 +- .../src/db/queries/network_interface.rs | 2 +- nexus/src/app/instance.rs | 19 ++- nexus/src/app/sagas/instance_create.rs | 2 +- nexus/src/app/sagas/instance_delete.rs | 2 +- nexus/src/app/sagas/instance_migrate.rs | 2 +- nexus/src/app/sagas/instance_start.rs | 2 +- nexus/src/app/sagas/snapshot_create.rs | 2 +- nexus/test-utils/src/resource_helpers.rs | 19 ++- nexus/tests/integration_tests/endpoints.rs | 2 +- nexus/tests/integration_tests/instances.rs | 127 +++++++++++++----- nexus/tests/integration_tests/projects.rs | 2 +- nexus/tests/integration_tests/quotas.rs | 2 +- nexus/tests/integration_tests/snapshots.rs | 2 +- .../integration_tests/subnet_allocation.rs | 2 +- nexus/types/src/external_api/params.rs | 4 +- openapi/nexus.json | 10 +- openapi/sled-agent.json | 14 +- sled-agent/src/instance.rs | 3 +- sled-agent/src/sim/sled_agent.rs | 2 +- 26 files changed, 308 insertions(+), 63 deletions(-) diff --git a/common/Cargo.toml b/common/Cargo.toml index 3941f5303e..ebb8c8c9b4 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -38,6 +38,7 @@ parse-display.workspace = true progenitor.workspace = true omicron-workspace-hack.workspace = true once_cell.workspace = true +regress.workspace = true [dev-dependencies] camino-tempfile.workspace = true diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index dc3537fbb2..cdc929d89b 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -696,6 +696,109 @@ impl TryFrom for Generation { } } +/// An RFC-1035-compliant hostname. +#[derive( + Clone, Debug, Deserialize, Display, Eq, PartialEq, SerializeDisplay, +)] +#[display("{0}")] +#[serde(try_from = "String", into = "String")] +pub struct Hostname(String); + +impl Hostname { + /// Return the hostname as a string slice. + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +// Regular expression for hostnames. +// +// Each name is a dot-separated sequence of labels. Each label is supposed to +// be an "LDH": letter, dash, or hyphen. Hostnames can consist of one label, or +// many, separated by a `.`. While _domain_ names are allowed to end in a `.`, +// making them fully-qualified, hostnames are not. +// +// Note that labels are allowed to contain a hyphen, but may not start or end +// with one. See RFC 952, "Lexical grammar" section. +// +// Note that we need to use a regex engine capable of lookbehind to support +// this, since we need to check that labels don't end with a `-`. +const HOSTNAME_REGEX: &str = r#"^([a-zA-Z0-9]+[a-zA-Z0-9\-]*(?\x00`. +const HOSTNAME_MAX_LEN: u32 = 253; + +impl FromStr for Hostname { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + anyhow::ensure!( + s.len() <= HOSTNAME_MAX_LEN as usize, + "Max hostname length is {HOSTNAME_MAX_LEN}" + ); + let re = regress::Regex::new(HOSTNAME_REGEX).unwrap(); + if re.find(s).is_some() { + Ok(Hostname(s.to_string())) + } else { + anyhow::bail!("Hostnames must comply with RFC 1035") + } + } +} + +impl TryFrom<&str> for Hostname { + type Error = ::Err; + + fn try_from(s: &str) -> Result { + s.parse() + } +} + +impl TryFrom for Hostname { + type Error = ::Err; + + fn try_from(s: String) -> Result { + s.as_str().parse() + } +} + +// Custom implementation of JsonSchema for Hostname to ensure RFC-1035-style +// validation +impl JsonSchema for Hostname { + fn schema_name() -> String { + "Hostname".to_string() + } + + fn json_schema( + _: &mut schemars::gen::SchemaGenerator, + ) -> schemars::schema::Schema { + schemars::schema::Schema::Object(schemars::schema::SchemaObject { + metadata: Some(Box::new(schemars::schema::Metadata { + title: Some("An RFC-1035-compliant hostname".to_string()), + description: Some( + "A hostname identifies a host on a network, and \ + is usually a dot-delimited sequence of labels, \ + where each label contains only letters, digits, \ + or the hyphen. See RFCs 1035 and 952 for more details." + .to_string(), + ), + ..Default::default() + })), + instance_type: Some(schemars::schema::SingleOrVec::Single( + Box::new(schemars::schema::InstanceType::String), + )), + string: Some(Box::new(schemars::schema::StringValidation { + max_length: Some(HOSTNAME_MAX_LEN), + min_length: Some(1), + pattern: Some(HOSTNAME_REGEX.to_string()), + })), + ..Default::default() + }) + } +} + // General types used to implement API resources /// Identifies a type of API resource @@ -939,7 +1042,7 @@ pub struct Instance { /// memory allocated for this Instance pub memory: ByteCount, /// RFC1035-compliant hostname for the Instance. - pub hostname: String, // TODO-cleanup different type? + pub hostname: String, #[serde(flatten)] pub runtime: InstanceRuntimeState, @@ -2737,6 +2840,7 @@ mod test { VpcFirewallRuleUpdateParams, }; use crate::api::external::Error; + use crate::api::external::Hostname; use crate::api::external::ResourceType; use std::convert::TryFrom; use std::str::FromStr; @@ -3460,4 +3564,24 @@ mod test { let conv = mac.to_i64(); assert_eq!(original, conv); } + + #[test] + fn test_hostname_from_str() { + assert!(Hostname::from_str("name").is_ok()); + assert!(Hostname::from_str("a.good.name").is_ok()); + assert!(Hostname::from_str("another.very-good.name").is_ok()); + assert!(Hostname::from_str("0name").is_ok()); + assert!(Hostname::from_str("name0").is_ok()); + assert!(Hostname::from_str("0name0").is_ok()); + + assert!(Hostname::from_str("").is_err()); + assert!(Hostname::from_str("no_no").is_err()); + assert!(Hostname::from_str("no.fqdns.").is_err()); + assert!(Hostname::from_str("empty..label").is_err()); + assert!(Hostname::from_str("-hypen.cannot.start").is_err()); + assert!(Hostname::from_str("hypen.-cannot.start").is_err()); + assert!(Hostname::from_str("hypen.cannot.end-").is_err()); + assert!(Hostname::from_str("hyphen-cannot-end-").is_err()); + assert!(Hostname::from_str(&"too-long".repeat(100)).is_err()); + } } diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 780e60b1a2..3972e011cf 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -5,8 +5,8 @@ //! APIs exposed by Nexus. use crate::api::external::{ - ByteCount, DiskState, Generation, InstanceCpuCount, InstanceState, IpNet, - SemverVersion, Vni, + ByteCount, DiskState, Generation, Hostname, InstanceCpuCount, + InstanceState, IpNet, SemverVersion, Vni, }; use chrono::{DateTime, Utc}; use parse_display::{Display, FromStr}; @@ -36,8 +36,7 @@ pub struct InstanceProperties { pub ncpus: InstanceCpuCount, pub memory: ByteCount, /// RFC1035-compliant hostname for the instance. - // TODO-cleanup different type? - pub hostname: String, + pub hostname: Hostname, } /// The dynamic runtime properties of an instance: its current VMM ID (if any), diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index c1da731c35..f27261e82d 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -64,7 +64,7 @@ async fn instance_launch() -> Result<()> { .body(InstanceCreate { name: generate_name("instance")?, description: String::new(), - hostname: "localshark".into(), // 🦈 + hostname: "localshark".parse().unwrap(), // 🦈 memory: ByteCount(1024 * 1024 * 1024), ncpus: InstanceCpuCount(2), disks: vec![InstanceDiskAttachment::Attach { diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index e10f8c2603..f7731ff903 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -46,7 +46,10 @@ pub struct Instance { pub memory: ByteCount, /// The instance's hostname. - // TODO-cleanup: Different type? + // TODO-cleanup: We use a validated wrapper type in the API, but not in + // between the database. This is to handle existing names that do not pass + // the new validation. We should swap this for a SQL-serializable validated + // type. #[diesel(column_name = hostname)] pub hostname: String, @@ -81,7 +84,7 @@ impl Instance { user_data: params.user_data.clone(), ncpus: params.ncpus.into(), memory: params.memory.into(), - hostname: params.hostname.clone(), + hostname: params.hostname.to_string(), boot_on_fault: false, runtime_state, } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index ca7efe32f7..acea7bb4e3 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -97,7 +97,11 @@ impl From for omicron_common::api::external::Instance { project_id: value.instance.project_id, ncpus: value.instance.ncpus.into(), memory: value.instance.memory.into(), - hostname: value.instance.hostname, + hostname: value + .instance + .hostname + .parse() + .expect("found invalid hostname in the database"), runtime: omicron_common::api::external::InstanceRuntimeState { run_state: *run_state.state(), time_run_state_updated, diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 392e669243..54595a8444 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -998,7 +998,7 @@ mod tests { identity: IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("instance {}", name) }, ncpus: InstanceCpuCount(omicron_common::api::external::InstanceCpuCount(1)).into(), memory: ByteCount(omicron_common::api::external::ByteCount::from_gibibytes_u32(1)).into(), - hostname: "test".into(), + hostname: "test".parse().unwrap(), ssh_public_keys: None, user_data: vec![], network_interfaces: Default::default(), diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 3cfbead2f7..ae3e2c8ead 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -1738,7 +1738,7 @@ mod tests { }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(4), - hostname: "inst".to_string(), + hostname: "inst".parse().unwrap(), user_data: vec![], ssh_public_keys: Some(Vec::new()), network_interfaces: InstanceNetworkInterfaceAttachment::None, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index eb78d4179c..4b52b597ba 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1010,6 +1010,23 @@ impl super::Nexus { ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; + // Check that the hostname is valid. + // + // TODO-cleanup: This can be removed when we are confident that no + // instances exist prior to the addition of strict hostname validation + // in the API. + let Ok(hostname) = db_instance.hostname.parse() else { + let msg = format!( + "The instance hostname '{}' is no longer valid. \ + To access the data on its disks, this instance \ + must be deleted, and a new one created with the \ + relevant disks. The new hostname will be validated \ + at that time.", + db_instance.hostname, + ); + return Err(Error::invalid_request(&msg)); + }; + // Gather disk information and turn that into DiskRequests let disks = self .db_datastore @@ -1175,7 +1192,7 @@ impl super::Nexus { properties: InstanceProperties { ncpus: db_instance.ncpus.into(), memory: db_instance.memory.into(), - hostname: db_instance.hostname.clone(), + hostname, }, nics, source_nat, diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index ed1b23fe82..88dd0ae36e 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -1102,7 +1102,7 @@ pub mod test { }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("inst"), + hostname: "inst".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 067e2d79ed..0e253913b0 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -235,7 +235,7 @@ mod test { }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("inst"), + hostname: "inst".parse().unwrap(), user_data: vec![], ssh_public_keys: Some(Vec::new()), network_interfaces: diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 5e91b8fed1..ff3ff66e78 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -568,7 +568,7 @@ mod tests { }, ncpus: InstanceCpuCount(2), memory: ByteCount::from_gibibytes_u32(2), - hostname: String::from(INSTANCE_NAME), + hostname: INSTANCE_NAME.parse().unwrap(), user_data: b"#cloud-config".to_vec(), ssh_public_keys: Some(Vec::new()), network_interfaces: diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index b4cc6f4cc6..b1d9506c31 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -747,7 +747,7 @@ mod test { }, ncpus: InstanceCpuCount(2), memory: ByteCount::from_gibibytes_u32(2), - hostname: String::from(INSTANCE_NAME), + hostname: INSTANCE_NAME.parse().unwrap(), user_data: b"#cloud-config".to_vec(), ssh_public_keys: Some(Vec::new()), network_interfaces: diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index d80b1b9029..e017ab377b 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1940,7 +1940,7 @@ mod test { }, ncpus: InstanceCpuCount(2), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("base_instance"), + hostname: "base-instance".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 254723d32b..764332c5bc 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -518,7 +518,7 @@ pub async fn create_instance_with( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), + hostname: "the-host".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), @@ -532,6 +532,23 @@ pub async fn create_instance_with( .await } +/// Creates an instance, asserting a status code and returning the error. +/// +/// Note that this accepts any serializable body, which allows users to create +/// invalid inputs to test our parameter validation. +pub async fn create_instance_with_error( + client: &ClientTestContext, + project_name: &str, + body: &T, + status: StatusCode, +) -> HttpErrorResponseBody +where + T: serde::Serialize, +{ + let url = format!("/v1/instances?project={project_name}"); + object_create_error(client, &url, body, status).await +} + pub async fn create_vpc( client: &ClientTestContext, project_name: &str, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 38e248471b..cd04bb6018 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -421,7 +421,7 @@ pub static DEMO_INSTANCE_CREATE: Lazy = }, ncpus: InstanceCpuCount(1), memory: ByteCount::from_gibibytes_u32(16), - hostname: String::from("demo-instance"), + hostname: "demo-instance".parse().unwrap(), user_data: vec![], ssh_public_keys: Some(Vec::new()), network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index e5d1c2f143..09f91a2288 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -74,7 +74,8 @@ use dropshot::{HttpErrorResponseBody, ResultsPage}; use nexus_test_utils::identity_eq; use nexus_test_utils::resource_helpers::{ - create_instance, create_instance_with, create_project, + create_instance, create_instance_with, create_instance_with_error, + create_project, }; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::shared::SiloRole; @@ -165,6 +166,64 @@ async fn test_instances_access_before_create_returns_not_found( ); } +// Regression tests for https://github.com/oxidecomputer/omicron/issues/4923. +#[nexus_test] +async fn test_cannot_create_instance_with_bad_hostname( + cptestctx: &ControlPlaneTestContext, +) { + test_create_instance_with_bad_hostname_impl(cptestctx, "bad_hostname") + .await; +} + +#[nexus_test] +async fn test_cannot_create_instance_with_empty_hostname( + cptestctx: &ControlPlaneTestContext, +) { + test_create_instance_with_bad_hostname_impl(cptestctx, "").await; +} + +async fn test_create_instance_with_bad_hostname_impl( + cptestctx: &ControlPlaneTestContext, + hostname: &str, +) { + let client = &cptestctx.external_client; + let _project = create_project_and_pool(client).await; + + // Create an instance, with what should be an invalid hostname. + // + // We'll do this by creating a _valid_ set of parameters, convert it to + // JSON, and then muck with the hostname. + let instance_name = "happy-accident"; + let params = params::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!("instance {:?}", instance_name), + }, + ncpus: InstanceCpuCount(4), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "the-host".parse().unwrap(), + user_data: + b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), + network_interfaces: Default::default(), + external_ips: vec![], + disks: vec![], + start: false, + ssh_public_keys: None, + }; + let mut body: serde_json::Value = + serde_json::from_str(&serde_json::to_string(¶ms).unwrap()).unwrap(); + body["hostname"] = hostname.into(); + let err = create_instance_with_error( + client, + PROJECT_NAME, + &body, + StatusCode::BAD_REQUEST, + ) + .await; + assert!(err.message.contains("Hostnames must comply with RFC 1035")); +} + #[nexus_test] async fn test_instance_access(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; @@ -231,7 +290,7 @@ async fn test_instances_create_reboot_halt( // These particulars are hardcoded in create_instance(). assert_eq!(nfoundcpus, 4); assert_eq!(instance.memory.to_whole_gibibytes(), 1); - assert_eq!(instance.hostname, "the_host"); + assert_eq!(instance.hostname.as_str(), "the-host"); assert_eq!(instance.runtime.run_state, InstanceState::Starting); // Attempt to create a second instance with a conflicting name. @@ -247,7 +306,7 @@ async fn test_instances_create_reboot_halt( }, ncpus: instance.ncpus, memory: instance.memory, - hostname: instance.hostname.clone(), + hostname: instance.hostname.parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: @@ -1220,7 +1279,7 @@ async fn test_instances_create_stopped_start( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), + hostname: "the-host".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: @@ -1388,7 +1447,7 @@ async fn test_instance_using_image_from_other_project_fails( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: "stolen".into(), + hostname: "stolen".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: @@ -1463,7 +1522,7 @@ async fn test_instance_create_saga_removes_instance_database_record( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("inst"), + hostname: "inst".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: interface_params.clone(), @@ -1491,7 +1550,7 @@ async fn test_instance_create_saga_removes_instance_database_record( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("inst2"), + hostname: "inst2".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: interface_params, @@ -1580,7 +1639,7 @@ async fn test_instance_with_single_explicit_ip_address( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nic-test"), + hostname: "nic-test".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: interface_params, @@ -1694,7 +1753,7 @@ async fn test_instance_with_new_custom_network_interfaces( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nic-test"), + hostname: "nic-test".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: interface_params, @@ -1808,7 +1867,7 @@ async fn test_instance_create_delete_network_interface( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nic-test"), + hostname: "nic-test".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::None, @@ -2049,7 +2108,7 @@ async fn test_instance_update_network_interfaces( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nic-test"), + hostname: "nic-test".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::None, @@ -2442,7 +2501,7 @@ async fn test_instance_with_multiple_nics_unwinds_completely( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nic-test"), + hostname: "nic-test".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: interface_params, @@ -2508,7 +2567,7 @@ async fn test_attach_one_disk_to_instance(cptestctx: &ControlPlaneTestContext) { }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nfs"), + hostname: "nfs".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -2568,7 +2627,7 @@ async fn test_instance_create_attach_disks( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(3), - hostname: String::from("nfs"), + hostname: "nfs".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -2665,7 +2724,7 @@ async fn test_instance_create_attach_disks_undo( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nfs"), + hostname: "nfs".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -2750,7 +2809,7 @@ async fn test_attach_eight_disks_to_instance( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nfs"), + hostname: "nfs".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -2831,7 +2890,7 @@ async fn test_cannot_attach_nine_disks_to_instance( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nfs"), + hostname: "nfs".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -2926,7 +2985,7 @@ async fn test_cannot_attach_faulted_disks(cptestctx: &ControlPlaneTestContext) { }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nfs"), + hostname: "nfs".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -3010,7 +3069,7 @@ async fn test_disks_detached_when_instance_destroyed( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nfs"), + hostname: "nfs".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -3101,7 +3160,7 @@ async fn test_disks_detached_when_instance_destroyed( }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("nfsv2"), + hostname: "nfsv2".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -3161,7 +3220,7 @@ async fn test_instances_memory_rejected_less_than_min_memory_size( }, ncpus: InstanceCpuCount(1), memory: ByteCount::from(MIN_MEMORY_BYTES_PER_INSTANCE / 2), - hostname: String::from("inst"), + hostname: "inst".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), @@ -3211,7 +3270,7 @@ async fn test_instances_memory_not_divisible_by_min_memory_size( }, ncpus: InstanceCpuCount(1), memory: ByteCount::from(1024 * 1024 * 1024 + 300), - hostname: String::from("inst"), + hostname: "inst".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), @@ -3261,7 +3320,7 @@ async fn test_instances_memory_greater_than_max_size( ncpus: InstanceCpuCount(1), memory: ByteCount::try_from(MAX_MEMORY_BYTES_PER_INSTANCE + (1 << 30)) .unwrap(), - hostname: String::from("inst"), + hostname: "inst".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), @@ -3347,7 +3406,7 @@ async fn test_instance_create_with_ssh_keys( // By default should transfer all profile keys ssh_public_keys: None, start: false, - hostname: instance_name.to_string(), + hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -3393,7 +3452,7 @@ async fn test_instance_create_with_ssh_keys( // Should only transfer the first key ssh_public_keys: Some(vec![user_keys[0].identity.name.clone().into()]), start: false, - hostname: instance_name.to_string(), + hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -3438,7 +3497,7 @@ async fn test_instance_create_with_ssh_keys( // Should transfer no keys ssh_public_keys: Some(vec![]), start: false, - hostname: instance_name.to_string(), + hostname: instance_name.parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![], @@ -3556,7 +3615,7 @@ async fn test_cannot_provision_instance_beyond_cpu_capacity( }, ncpus, memory: ByteCount::from_gibibytes_u32(1), - hostname: config.0.to_string(), + hostname: config.0.parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: @@ -3610,7 +3669,7 @@ async fn test_cannot_provision_instance_beyond_cpu_limit( }, ncpus: too_many_cpus, memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("test"), + hostname: "test".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, @@ -3662,7 +3721,7 @@ async fn test_cannot_provision_instance_beyond_ram_capacity( }, ncpus: InstanceCpuCount::try_from(i64::from(1)).unwrap(), memory: ByteCount::try_from(config.1).unwrap(), - hostname: config.0.to_string(), + hostname: config.0.parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: @@ -3913,7 +3972,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), + hostname: "the-host".parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { @@ -3978,7 +4037,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), + hostname: "the-host".parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { @@ -4039,7 +4098,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), + hostname: "the-host".parse().unwrap(), user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { @@ -4172,7 +4231,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), + hostname: "the-host".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), @@ -4300,7 +4359,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { }, ncpus: InstanceCpuCount::try_from(2).unwrap(), memory: ByteCount::from_gibibytes_u32(4), - hostname: String::from("inst"), + hostname: "inst".parse().unwrap(), user_data: vec![], ssh_public_keys: None, network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index a89f2508ac..e13aca0cd0 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -155,7 +155,7 @@ async fn test_project_deletion_with_instance( }, ncpus: InstanceCpuCount(4), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("the_host"), + hostname: "the-host".parse().unwrap(), user_data: b"none".to_vec(), ssh_public_keys: Some(Vec::new()), network_interfaces: diff --git a/nexus/tests/integration_tests/quotas.rs b/nexus/tests/integration_tests/quotas.rs index c0422d0030..4e3335d04c 100644 --- a/nexus/tests/integration_tests/quotas.rs +++ b/nexus/tests/integration_tests/quotas.rs @@ -78,7 +78,7 @@ impl ResourceAllocator { }, ncpus: InstanceCpuCount(cpus), memory: ByteCount::from_gibibytes_u32(memory), - hostname: "host".to_string(), + hostname: "host".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), ssh_public_keys: Some(Vec::new()), diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 9a2ee3d310..3731a80668 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -119,7 +119,7 @@ async fn test_snapshot_basic(cptestctx: &ControlPlaneTestContext) { }, ncpus: InstanceCpuCount(2), memory: ByteCount::from_gibibytes_u32(1), - hostname: String::from("base_instance"), + hostname: "base-instance".parse().unwrap(), user_data: b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" .to_vec(), diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 3c9e18817f..3362d5a4ac 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -56,7 +56,7 @@ async fn create_instance_expect_failure( }, ncpus: InstanceCpuCount(1), memory: ByteCount::from_gibibytes_u32(1), - hostname: name.to_string(), + hostname: name.parse().unwrap(), user_data: vec![], ssh_public_keys: Some(Vec::new()), network_interfaces, diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index bda6a876ee..6cb878084d 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -9,7 +9,7 @@ use crate::external_api::shared; use base64::Engine; use chrono::{DateTime, Utc}; use omicron_common::api::external::{ - AddressLotKind, ByteCount, IdentityMetadataCreateParams, + AddressLotKind, ByteCount, Hostname, IdentityMetadataCreateParams, IdentityMetadataUpdateParams, InstanceCpuCount, IpNet, Ipv4Net, Ipv6Net, Name, NameOrId, PaginationOrder, RouteDestination, RouteTarget, SemverVersion, @@ -1004,7 +1004,7 @@ pub struct InstanceCreate { pub identity: IdentityMetadataCreateParams, pub ncpus: InstanceCpuCount, pub memory: ByteCount, - pub hostname: String, // TODO-cleanup different type? + pub hostname: Hostname, /// User data for instance initialization systems (such as cloud-init). /// Must be a Base64-encoded string, as specified in RFC 4648 § 4 (+ and / diff --git a/openapi/nexus.json b/openapi/nexus.json index 98073c8625..7aedd1b523 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -12221,6 +12221,14 @@ "start_time" ] }, + "Hostname": { + "title": "An RFC-1035-compliant hostname", + "description": "A hostname identifies a host on a network, and is usually a dot-delimited sequence of labels, where each label contains only letters, digits, or the hyphen. See RFCs 1035 and 952 for more details.", + "type": "string", + "pattern": "^([a-zA-Z0-9]+[a-zA-Z0-9\\-]*(? Date: Thu, 1 Feb 2024 18:14:47 -0500 Subject: [PATCH 82/91] Ensure we can get valid Baseboards from sim SPs (#4957) --- dev-tools/omdb/tests/successes.out | 4 +- .../tests/output/collector_basic.txt | 44 +++++++++---------- .../tests/output/collector_errors.txt | 44 +++++++++---------- .../output/collector_sled_agent_errors.txt | 44 +++++++++---------- sp-sim/src/gimlet.rs | 3 +- 5 files changed, 70 insertions(+), 69 deletions(-) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index dc77ade735..416b669068 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -131,8 +131,8 @@ SPs FOUND THROUGH IGNITION SERVICE PROCESSOR STATES TYPE SLOT MODEL SERIAL REV HUBRIS PWR ROT_ACTIVE - Sled 0 FAKE_SIM_GIMLET SimGimlet00 0 0000000000000000 A2 slot A - Sled 1 FAKE_SIM_GIMLET SimGimlet01 0 0000000000000000 A2 slot A + Sled 0 i86pc SimGimlet00 0 0000000000000000 A2 slot A + Sled 1 i86pc SimGimlet01 0 0000000000000000 A2 slot A Switch 0 FAKE_SIM_SIDECAR SimSidecar0 0 0000000000000000 A2 slot A Switch 1 FAKE_SIM_SIDECAR SimSidecar1 0 0000000000000000 A2 slot A diff --git a/nexus/inventory/tests/output/collector_basic.txt b/nexus/inventory/tests/output/collector_basic.txt index e59e19967a..4a05f09e1c 100644 --- a/nexus/inventory/tests/output/collector_basic.txt +++ b/nexus/inventory/tests/output/collector_basic.txt @@ -1,8 +1,8 @@ baseboards: - part "FAKE_SIM_GIMLET" serial "SimGimlet00" - part "FAKE_SIM_GIMLET" serial "SimGimlet01" part "FAKE_SIM_SIDECAR" serial "SimSidecar0" part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + part "i86pc" serial "SimGimlet00" + part "i86pc" serial "SimGimlet01" part "sim-gimlet" serial "sim-03265caf-da7d-46c7-b1c2-39fa90ce5c65" part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" @@ -23,52 +23,52 @@ rot pages: data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" SPs: - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + baseboard part "i86pc" serial "SimGimlet00" + baseboard part "i86pc" serial "SimGimlet01" RoTs: - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + baseboard part "i86pc" serial "SimGimlet00" + baseboard part "i86pc" serial "SimGimlet01" cabooses found: - SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" - SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" - SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" + SpSlot0 baseboard part "i86pc" serial "SimGimlet00": board "SimGimletSp" + SpSlot0 baseboard part "i86pc" serial "SimGimlet01": board "SimGimletSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + SpSlot1 baseboard part "i86pc" serial "SimGimlet00": board "SimGimletSp" + SpSlot1 baseboard part "i86pc" serial "SimGimlet01": board "SimGimletSp" RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotA baseboard part "i86pc" serial "SimGimlet00": board "SimRot" + RotSlotA baseboard part "i86pc" serial "SimGimlet01": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" + RotSlotB baseboard part "i86pc" serial "SimGimlet00": board "SimRot" + RotSlotB baseboard part "i86pc" serial "SimGimlet01": board "SimRot" rot pages found: - Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" sled agents found: sled 03265caf-da7d-46c7-b1c2-39fa90ce5c65 (Gimlet) diff --git a/nexus/inventory/tests/output/collector_errors.txt b/nexus/inventory/tests/output/collector_errors.txt index c39d6b249a..20e9bb301e 100644 --- a/nexus/inventory/tests/output/collector_errors.txt +++ b/nexus/inventory/tests/output/collector_errors.txt @@ -1,8 +1,8 @@ baseboards: - part "FAKE_SIM_GIMLET" serial "SimGimlet00" - part "FAKE_SIM_GIMLET" serial "SimGimlet01" part "FAKE_SIM_SIDECAR" serial "SimSidecar0" part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + part "i86pc" serial "SimGimlet00" + part "i86pc" serial "SimGimlet01" cabooses: board "SimGimletSp" name "SimGimlet" version "0.0.1" git_commit "ffffffff" @@ -21,52 +21,52 @@ rot pages: data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" SPs: - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + baseboard part "i86pc" serial "SimGimlet00" + baseboard part "i86pc" serial "SimGimlet01" RoTs: - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + baseboard part "i86pc" serial "SimGimlet00" + baseboard part "i86pc" serial "SimGimlet01" cabooses found: - SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" - SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" - SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" + SpSlot0 baseboard part "i86pc" serial "SimGimlet00": board "SimGimletSp" + SpSlot0 baseboard part "i86pc" serial "SimGimlet01": board "SimGimletSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + SpSlot1 baseboard part "i86pc" serial "SimGimlet00": board "SimGimletSp" + SpSlot1 baseboard part "i86pc" serial "SimGimlet01": board "SimGimletSp" RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotA baseboard part "i86pc" serial "SimGimlet00": board "SimRot" + RotSlotA baseboard part "i86pc" serial "SimGimlet01": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" + RotSlotB baseboard part "i86pc" serial "SimGimlet00": board "SimRot" + RotSlotB baseboard part "i86pc" serial "SimGimlet01": board "SimRot" rot pages found: - Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" sled agents found: diff --git a/nexus/inventory/tests/output/collector_sled_agent_errors.txt b/nexus/inventory/tests/output/collector_sled_agent_errors.txt index 9ebf2cece9..aaa31fd1bb 100644 --- a/nexus/inventory/tests/output/collector_sled_agent_errors.txt +++ b/nexus/inventory/tests/output/collector_sled_agent_errors.txt @@ -1,8 +1,8 @@ baseboards: - part "FAKE_SIM_GIMLET" serial "SimGimlet00" - part "FAKE_SIM_GIMLET" serial "SimGimlet01" part "FAKE_SIM_SIDECAR" serial "SimSidecar0" part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + part "i86pc" serial "SimGimlet00" + part "i86pc" serial "SimGimlet01" part "sim-gimlet" serial "sim-9cb9b78f-5614-440c-b66d-e8e81fab69b0" cabooses: @@ -22,52 +22,52 @@ rot pages: data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" SPs: - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + baseboard part "i86pc" serial "SimGimlet00" + baseboard part "i86pc" serial "SimGimlet01" RoTs: - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00" - baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0" baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1" + baseboard part "i86pc" serial "SimGimlet00" + baseboard part "i86pc" serial "SimGimlet01" cabooses found: - SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" - SpSlot0 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot0 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimGimletSp" - SpSlot1 baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimGimletSp" + SpSlot0 baseboard part "i86pc" serial "SimGimlet00": board "SimGimletSp" + SpSlot0 baseboard part "i86pc" serial "SimGimlet01": board "SimGimletSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimSidecarSp" SpSlot1 baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimSidecarSp" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" - RotSlotA baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + SpSlot1 baseboard part "i86pc" serial "SimGimlet00": board "SimGimletSp" + SpSlot1 baseboard part "i86pc" serial "SimGimlet01": board "SimGimletSp" RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotA baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": board "SimRot" - RotSlotB baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": board "SimRot" + RotSlotA baseboard part "i86pc" serial "SimGimlet00": board "SimRot" + RotSlotA baseboard part "i86pc" serial "SimGimlet01": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": board "SimRot" RotSlotB baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": board "SimRot" + RotSlotB baseboard part "i86pc" serial "SimGimlet00": board "SimRot" + RotSlotB baseboard part "i86pc" serial "SimGimlet01": board "SimRot" rot pages found: - Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - Cmpa baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" Cmpa baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jbXBhAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaActive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + Cmpa baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNtcGEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaActive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWFjdGl2ZQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaInactive baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaActive baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaInactive baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLWluYWN0aXZlAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" - CfpaScratch baseboard part "FAKE_SIM_GIMLET" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaInactive baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtaW5hY3RpdmUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar0": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" CfpaScratch baseboard part "FAKE_SIM_SIDECAR" serial "SimSidecar1": data_base64 "c2lkZWNhci1jZnBhLXNjcmF0Y2gAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "i86pc" serial "SimGimlet00": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + CfpaScratch baseboard part "i86pc" serial "SimGimlet01": data_base64 "Z2ltbGV0LWNmcGEtc2NyYXRjaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" sled agents found: sled 9cb9b78f-5614-440c-b66d-e8e81fab69b0 (Gimlet) diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 5a7949a288..0c109c1bd7 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -683,7 +683,8 @@ impl Handler { } fn sp_state_impl(&self) -> SpStateV2 { - const FAKE_GIMLET_MODEL: &[u8] = b"FAKE_SIM_GIMLET"; + // Make the Baseboard a PC so that our testbeds work as expected. + const FAKE_GIMLET_MODEL: &[u8] = b"i86pc"; let mut model = [0; 32]; model[..FAKE_GIMLET_MODEL.len()].copy_from_slice(FAKE_GIMLET_MODEL); From 6132e1393d2587606631beab04e1445fbaf745ce Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 1 Feb 2024 20:41:28 -0600 Subject: [PATCH 83/91] Exclude non-discoverable silos from IP pool silos list (#4962) They're still linkable, just like they're still GETable by name or ID. They just don't show up in lists. Closes https://github.com/oxidecomputer/omicron/issues/4955 --- nexus/db-model/src/schema.rs | 7 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 4 ++ nexus/tests/integration_tests/ip_pools.rs | 74 +++++++++++++++----- 3 files changed, 67 insertions(+), 18 deletions(-) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index cfe7daa27e..736442282c 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1511,7 +1511,12 @@ table! { } } -allow_tables_to_appear_in_same_query!(ip_pool_range, ip_pool, ip_pool_resource); +allow_tables_to_appear_in_same_query!( + ip_pool_range, + ip_pool, + ip_pool_resource, + silo +); joinable!(ip_pool_range -> ip_pool (ip_pool_id)); joinable!(ip_pool_resource -> ip_pool (ip_pool_id)); diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index d316d1adb7..4634fda9ee 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -330,6 +330,7 @@ impl DataStore { ) -> ListResultVec { use db::schema::ip_pool; use db::schema::ip_pool_resource; + use db::schema::silo; paginated( ip_pool_resource::table, @@ -337,8 +338,11 @@ impl DataStore { pagparams, ) .inner_join(ip_pool::table) + .inner_join(silo::table.on(silo::id.eq(ip_pool_resource::resource_id))) .filter(ip_pool::id.eq(authz_pool.id())) .filter(ip_pool::time_deleted.is_null()) + .filter(silo::time_deleted.is_null()) + .filter(silo::discoverable.eq(true)) .select(IpPoolResource::as_select()) .load_async::( &*self.pool_connection_authorized(opctx).await?, diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 4f88fa787b..f1d8825d0e 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -438,8 +438,13 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { let assocs_p0 = silos_for_pool(client, "p0").await; assert_eq!(assocs_p0.items.len(), 0); - let silo_name = cptestctx.silo_name.as_str(); - let silo_pools = pools_for_silo(client, silo_name).await; + // we need to use a discoverable silo because non-discoverable silos, while + // linkable, are filtered out of the list of linked silos for a pool. the + // test silo at cptestctx.silo_name is non-discoverable. + let silo = + create_silo(&client, "my-silo", true, SiloIdentityMode::SamlJit).await; + + let silo_pools = pools_for_silo(client, silo.name().as_str()).await; assert_eq!(silo_pools.len(), 0); // expect 404 on association if the specified silo doesn't exist @@ -466,9 +471,10 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { assert_eq!(error.message, not_found); // associate by name with silo that exists - let silo = NameOrId::Name(cptestctx.silo_name.clone()); - let params = - params::IpPoolLinkSilo { silo: silo.clone(), is_default: false }; + let params = params::IpPoolLinkSilo { + silo: NameOrId::Name(silo.name().clone()), + is_default: false, + }; let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; @@ -483,7 +489,7 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { assert_eq!(error.error_code.unwrap(), "ObjectAlreadyExists"); // get silo ID so we can test association by ID as well - let silo_url = format!("/v1/system/silos/{}", cptestctx.silo_name); + let silo_url = format!("/v1/system/silos/{}", silo.name()); let silo_id = object_get::(client, &silo_url).await.identity.id; let assocs_p0 = silos_for_pool(client, "p0").await; @@ -495,7 +501,7 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { assert_eq!(assocs_p0.items.len(), 1); assert_eq!(assocs_p0.items[0], silo_link); - let silo_pools = pools_for_silo(client, silo_name).await; + let silo_pools = pools_for_silo(client, silo.name().as_str()).await; assert_eq!(silo_pools.len(), 1); assert_eq!(silo_pools[0].identity.id, p0.identity.id); assert_eq!(silo_pools[0].is_default, false); @@ -519,7 +525,7 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { } ); - let silo_pools = pools_for_silo(client, silo_name).await; + let silo_pools = pools_for_silo(client, silo.name().as_str()).await; assert_eq!(silo_pools.len(), 2); assert_eq!(silo_pools[0].id(), p0.id()); assert_eq!(silo_pools[0].is_default, false); @@ -539,14 +545,14 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { assert_eq!(error.error_code.unwrap(), "ObjectAlreadyExists"); // unlink p1 from silo (doesn't matter that it's a default) - let url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); + let url = format!("/v1/system/ip-pools/p1/silos/{}", silo.name().as_str()); object_delete(client, &url).await; let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 0); // after unlinking p1, only p0 is left - let silo_pools = pools_for_silo(client, silo_name).await; + let silo_pools = pools_for_silo(client, silo.name().as_str()).await; assert_eq!(silo_pools.len(), 1); assert_eq!(silo_pools[0].identity.id, p0.identity.id); assert_eq!(silo_pools[0].is_default, false); @@ -555,6 +561,35 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { object_delete(client, "/v1/system/ip-pools/p1").await; } +/// Non-discoverable silos can be linked to a pool, but they do not show up +/// in the list of silos for that pool, just as they do not show up in the +/// top-level list of silos +#[nexus_test] +async fn test_ip_pool_silo_list_only_discoverable( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + create_pool(client, "p0").await; + + // there should be no linked silos + let silos_p0 = silos_for_pool(client, "p0").await; + assert_eq!(silos_p0.items.len(), 0); + + let silo_disc = + create_silo(&client, "silo-disc", true, SiloIdentityMode::SamlJit) + .await; + link_ip_pool(client, "p0", &silo_disc.id(), false).await; + + let silo_non_disc = + create_silo(&client, "silo-non-disc", false, SiloIdentityMode::SamlJit) + .await; + link_ip_pool(client, "p0", &silo_non_disc.id(), false).await; + + let silos_p0 = silos_for_pool(client, "p0").await; + assert_eq!(silos_p0.items.len(), 1); + assert_eq!(silos_p0.items[0].silo_id, silo_disc.id()); +} + #[nexus_test] async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; @@ -569,10 +604,15 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 0); + // we need to use a discoverable silo because non-discoverable silos, while + // linkable, are filtered out of the list of linked silos for a pool. the + // test silo at cptestctx.silo_name is non-discoverable. + let silo = + create_silo(&client, "my-silo", true, SiloIdentityMode::SamlJit).await; + // put 404s if link doesn't exist yet let params = IpPoolSiloUpdate { is_default: true }; - let p0_silo_url = - format!("/v1/system/ip-pools/p0/silos/{}", cptestctx.silo_name); + let p0_silo_url = format!("/v1/system/ip-pools/p0/silos/{}", silo.name()); let error = object_put_error(client, &p0_silo_url, ¶ms, StatusCode::NOT_FOUND) .await; @@ -582,9 +622,10 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { ); // associate both pools with the test silo - let silo = NameOrId::Name(cptestctx.silo_name.clone()); - let params = - params::IpPoolLinkSilo { silo: silo.clone(), is_default: false }; + let params = params::IpPoolLinkSilo { + silo: NameOrId::Name(silo.name().clone()), + is_default: false, + }; let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; let _: IpPoolSiloLink = @@ -620,8 +661,7 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { // set p1 default let params = IpPoolSiloUpdate { is_default: true }; - let p1_silo_url = - format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); + let p1_silo_url = format!("/v1/system/ip-pools/p1/silos/{}", silo.name()); let _: IpPoolSiloLink = object_put(client, &p1_silo_url, ¶ms).await; // p1 is now default From 4e901691c4e3dadef06533e3c2f8794dfb5f31f7 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 1 Feb 2024 20:00:40 -0800 Subject: [PATCH 84/91] [nexus-macros] convert dump tests to snapshot tests (#4965) Currently, these "dump" tests are mainly for debugging. However, the output of these macros is expected to be stable, so we can turn them into snapshot tests. (Ordinarily I'd use cargo-insta for snapshot tests, but we use expectorate which works fine.) --- Cargo.lock | 2 + Cargo.toml | 2 +- nexus/authz-macros/Cargo.toml | 1 + nexus/authz-macros/outputs/instance.txt | 79 ++++ nexus/authz-macros/outputs/organization.txt | 75 ++++ nexus/authz-macros/src/lib.rs | 13 +- nexus/db-macros/Cargo.toml | 1 + nexus/db-macros/outputs/project.txt | 393 ++++++++++++++++++++ nexus/db-macros/outputs/silo_user.txt | 183 +++++++++ nexus/db-macros/outputs/update_artifact.txt | 209 +++++++++++ nexus/db-macros/src/lookup.rs | 26 +- 11 files changed, 967 insertions(+), 17 deletions(-) create mode 100644 nexus/authz-macros/outputs/instance.txt create mode 100644 nexus/authz-macros/outputs/organization.txt create mode 100644 nexus/db-macros/outputs/project.txt create mode 100644 nexus/db-macros/outputs/silo_user.txt create mode 100644 nexus/db-macros/outputs/update_artifact.txt diff --git a/Cargo.lock b/Cargo.lock index b41abce1f4..a54d03e52a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -352,6 +352,7 @@ dependencies = [ name = "authz-macros" version = "0.1.0" dependencies = [ + "expectorate", "heck 0.4.1", "omicron-workspace-hack", "prettyplease", @@ -1511,6 +1512,7 @@ dependencies = [ name = "db-macros" version = "0.1.0" dependencies = [ + "expectorate", "heck 0.4.1", "omicron-workspace-hack", "prettyplease", diff --git a/Cargo.toml b/Cargo.toml index b83c8a44bb..600fbf185c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -301,7 +301,7 @@ postgres-protocol = "0.6.6" predicates = "3.1.0" pretty_assertions = "1.4.0" pretty-hex = "0.4.1" -prettyplease = "0.2.16" +prettyplease = { version = "0.2.16", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } diff --git a/nexus/authz-macros/Cargo.toml b/nexus/authz-macros/Cargo.toml index 816100eb58..e9bdaf4708 100644 --- a/nexus/authz-macros/Cargo.toml +++ b/nexus/authz-macros/Cargo.toml @@ -17,4 +17,5 @@ syn.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] +expectorate.workspace = true prettyplease.workspace = true diff --git a/nexus/authz-macros/outputs/instance.txt b/nexus/authz-macros/outputs/instance.txt new file mode 100644 index 0000000000..abebf2f742 --- /dev/null +++ b/nexus/authz-macros/outputs/instance.txt @@ -0,0 +1,79 @@ +///`authz` type for a resource of type InstanceUsed to uniquely identify a resource of type Instance across renames, moves, etc., and to do authorization checks (see [`crate::context::OpContext::authorize()`]). See [`crate::authz`] module-level documentation for more information. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Instance { + parent: Project, + key: (String, String), + lookup_type: LookupType, +} +impl Instance { + /// Makes a new `authz` struct for this resource with the given + /// `parent`, unique key `key`, looked up as described by + /// `lookup_type` + pub fn new( + parent: Project, + key: SomeCompositeId, + lookup_type: LookupType, + ) -> Instance { + Instance { + parent, + key: key.into(), + lookup_type, + } + } + /// A version of `new` that takes the primary key type directly. + /// This is only different from [`Self::new`] if this resource + /// uses a different input key type. + pub fn with_primary_key( + parent: Project, + key: (String, String), + lookup_type: LookupType, + ) -> Instance { + Instance { + parent, + key, + lookup_type, + } + } + pub fn id(&self) -> (String, String) { + self.key.clone().into() + } + /// Describes how to register this type with Oso + pub(super) fn init() -> Init { + use oso::PolarClass; + Init { + polar_snippet: "\n resource Instance {\n permissions = [\n \"list_children\",\n \"modify\",\n \"read\",\n \"create_child\",\n ];\n\n relations = { containing_project: Project };\n \"list_children\" if \"viewer\" on \"containing_project\";\n \"read\" if \"viewer\" on \"containing_project\";\n \"modify\" if \"collaborator\" on \"containing_project\";\n \"create_child\" if \"collaborator\" on \"containing_project\";\n }\n\n has_relation(parent: Project, \"containing_project\", child: Instance)\n if child.project = parent;\n ", + polar_class: Instance::get_polar_class(), + } + } +} +impl Eq for Instance {} +impl PartialEq for Instance { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } +} +impl oso::PolarClass for Instance { + fn get_polar_class_builder() -> oso::ClassBuilder { + oso::Class::builder() + .with_equality_check() + .add_method( + "has_role", + |r: &Instance, actor: AuthenticatedActor, role: String| { false }, + ) + .add_attribute_getter("project", |r: &Instance| r.parent.clone()) + } +} +impl ApiResource for Instance { + fn parent(&self) -> Option<&dyn AuthorizedResource> { + Some(&self.parent) + } + fn resource_type(&self) -> ResourceType { + ResourceType::Instance + } + fn lookup_type(&self) -> &LookupType { + &self.lookup_type + } + fn as_resource_with_roles(&self) -> Option<&dyn ApiResourceWithRoles> { + None + } +} diff --git a/nexus/authz-macros/outputs/organization.txt b/nexus/authz-macros/outputs/organization.txt new file mode 100644 index 0000000000..e6215a07aa --- /dev/null +++ b/nexus/authz-macros/outputs/organization.txt @@ -0,0 +1,75 @@ +///`authz` type for a resource of type OrganizationUsed to uniquely identify a resource of type Organization across renames, moves, etc., and to do authorization checks (see [`crate::context::OpContext::authorize()`]). See [`crate::authz`] module-level documentation for more information. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Organization { + parent: Fleet, + key: Uuid, + lookup_type: LookupType, +} +impl Organization { + /// Makes a new `authz` struct for this resource with the given + /// `parent`, unique key `key`, looked up as described by + /// `lookup_type` + pub fn new(parent: Fleet, key: Uuid, lookup_type: LookupType) -> Organization { + Organization { + parent, + key: key.into(), + lookup_type, + } + } + /// A version of `new` that takes the primary key type directly. + /// This is only different from [`Self::new`] if this resource + /// uses a different input key type. + pub fn with_primary_key( + parent: Fleet, + key: Uuid, + lookup_type: LookupType, + ) -> Organization { + Organization { + parent, + key, + lookup_type, + } + } + pub fn id(&self) -> Uuid { + self.key.clone().into() + } + /// Describes how to register this type with Oso + pub(super) fn init() -> Init { + use oso::PolarClass; + Init { + polar_snippet: "", + polar_class: Organization::get_polar_class(), + } + } +} +impl Eq for Organization {} +impl PartialEq for Organization { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } +} +impl oso::PolarClass for Organization { + fn get_polar_class_builder() -> oso::ClassBuilder { + oso::Class::builder() + .with_equality_check() + .add_method( + "has_role", + |r: &Organization, actor: AuthenticatedActor, role: String| { false }, + ) + .add_attribute_getter("fleet", |r: &Organization| r.parent.clone()) + } +} +impl ApiResource for Organization { + fn parent(&self) -> Option<&dyn AuthorizedResource> { + Some(&self.parent) + } + fn resource_type(&self) -> ResourceType { + ResourceType::Organization + } + fn lookup_type(&self) -> &LookupType { + &self.lookup_type + } + fn as_resource_with_roles(&self) -> Option<&dyn ApiResourceWithRoles> { + None + } +} diff --git a/nexus/authz-macros/src/lib.rs b/nexus/authz-macros/src/lib.rs index 3d6f265fea..648ae6d952 100644 --- a/nexus/authz-macros/src/lib.rs +++ b/nexus/authz-macros/src/lib.rs @@ -460,12 +460,17 @@ fn do_authz_resource( }) } -// See the test for lookup_resource. #[cfg(test)] mod tests { use super::*; + use expectorate::assert_contents; + + /// Ensures that generated code is as expected. + /// + /// For more information, see `test_lookup_snapshots` in + /// nexus/db-macros/src/lookup.rs. #[test] - fn test_authz_dump() { + fn test_authz_snapshots() { let output = do_authz_resource(quote! { name = "Organization", parent = "Fleet", @@ -474,7 +479,7 @@ mod tests { polar_snippet = Custom, }) .unwrap(); - println!("{}", pretty_format(output)); + assert_contents("outputs/organization.txt", &pretty_format(output)); let output = do_authz_resource(quote! { name = "Instance", @@ -487,7 +492,7 @@ mod tests { polar_snippet = InProject, }) .unwrap(); - println!("{}", pretty_format(output)); + assert_contents("outputs/instance.txt", &pretty_format(output)); } fn pretty_format(input: TokenStream) -> String { diff --git a/nexus/db-macros/Cargo.toml b/nexus/db-macros/Cargo.toml index 64398b266c..46e5d9a5d6 100644 --- a/nexus/db-macros/Cargo.toml +++ b/nexus/db-macros/Cargo.toml @@ -18,4 +18,5 @@ syn = { workspace = true, features = ["extra-traits"] } omicron-workspace-hack.workspace = true [dev-dependencies] +expectorate.workspace = true prettyplease.workspace = true diff --git a/nexus/db-macros/outputs/project.txt b/nexus/db-macros/outputs/project.txt new file mode 100644 index 0000000000..9f4b5cfaa2 --- /dev/null +++ b/nexus/db-macros/outputs/project.txt @@ -0,0 +1,393 @@ +///Selects a resource of type Project (or any of its children, using the functions on this struct) for lookup or fetch +pub enum Project<'a> { + /// An error occurred while selecting the resource + /// + /// This error will be returned by any lookup/fetch attempts. + Error(Root<'a>, Error), + /// We're looking for a resource with the given name in the given + /// parent collection + Name(Silo<'a>, &'a Name), + /// Same as [`Self::Name`], but the name is owned rather than borrowed + OwnedName(Silo<'a>, Name), + /// We're looking for a resource with the given primary key + /// + /// This has no parent container -- a by-id lookup is always global + PrimaryKey(Root<'a>, Uuid), +} +impl<'a> Project<'a> { + ///Select a resource of type Disk within this Project, identified by its name + pub fn disk_name<'b, 'c>(self, name: &'b Name) -> Disk<'c> + where + 'a: 'c, + 'b: 'c, + { + Disk::Name(self, name) + } + ///Select a resource of type Disk within this Project, identified by its name + pub fn disk_name_owned<'c>(self, name: Name) -> Disk<'c> + where + 'a: 'c, + { + Disk::OwnedName(self, name) + } + ///Select a resource of type Instance within this Project, identified by its name + pub fn instance_name<'b, 'c>(self, name: &'b Name) -> Instance<'c> + where + 'a: 'c, + 'b: 'c, + { + Instance::Name(self, name) + } + ///Select a resource of type Instance within this Project, identified by its name + pub fn instance_name_owned<'c>(self, name: Name) -> Instance<'c> + where + 'a: 'c, + { + Instance::OwnedName(self, name) + } + /// Fetch the record corresponding to the selected resource + /// + /// This is equivalent to `fetch_for(authz::Action::Read)`. + pub async fn fetch( + &self, + ) -> LookupResult<(authz::Silo, authz::Project, nexus_db_model::Project)> { + self.fetch_for(authz::Action::Read).await + } + /// Turn the Result of [`fetch`] into a Result, E>. + pub async fn optional_fetch( + &self, + ) -> LookupResult> { + self.optional_fetch_for(authz::Action::Read).await + } + /// Fetch the record corresponding to the selected resource and + /// check whether the caller is allowed to do the specified `action` + /// + /// The return value is a tuple that also includes the `authz` + /// objects for all resources along the path to this one (i.e., all + /// parent resources) and the authz object for this resource itself. + /// These objects are useful for identifying those resources by + /// id, for doing other authz checks, or for looking up related + /// objects. + pub async fn fetch_for( + &self, + action: authz::Action, + ) -> LookupResult<(authz::Silo, authz::Project, nexus_db_model::Project)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let datastore = &lookup.datastore; + match &self { + Project::Error(_, error) => Err(error.clone()), + Project::Name(parent, &ref name) | Project::OwnedName(parent, ref name) => { + let (authz_silo,) = parent.lookup().await?; + let (authz_project, db_row) = Self::fetch_by_name_for( + opctx, + datastore, + &authz_silo, + name, + action, + ) + .await?; + Ok((authz_silo, authz_project, db_row)) + } + Project::PrimaryKey(_, v0) => { + Self::fetch_by_id_for(opctx, datastore, v0, action).await + } + } + .and_then(|input| { + let (ref authz_silo, .., ref authz_project, ref _db_row) = &input; + Self::silo_check(opctx, authz_silo, authz_project)?; + Ok(input) + }) + } + /// Turn the Result of [`fetch_for`] into a Result, E>. + pub async fn optional_fetch_for( + &self, + action: authz::Action, + ) -> LookupResult> { + let result = self.fetch_for(action).await; + match result { + Err(Error::ObjectNotFound { type_name: _, lookup_type: _ }) => Ok(None), + _ => Ok(Some(result?)), + } + } + /// Fetch an `authz` object for the selected resource and check + /// whether the caller is allowed to do the specified `action` + /// + /// The return value is a tuple that also includes the `authz` + /// objects for all resources along the path to this one (i.e., all + /// parent resources) and the authz object for this resource itself. + /// These objects are useful for identifying those resources by + /// id, for doing other authz checks, or for looking up related + /// objects. + pub async fn lookup_for( + &self, + action: authz::Action, + ) -> LookupResult<(authz::Silo, authz::Project)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let (authz_silo, authz_project) = self.lookup().await?; + opctx.authorize(action, &authz_project).await?; + Ok((authz_silo, authz_project)) + .and_then(|input| { + let (ref authz_silo, .., ref authz_project) = &input; + Self::silo_check(opctx, authz_silo, authz_project)?; + Ok(input) + }) + } + /// Turn the Result of [`lookup_for`] into a Result, E>. + pub async fn optional_lookup_for( + &self, + action: authz::Action, + ) -> LookupResult> { + let result = self.lookup_for(action).await; + match result { + Err(Error::ObjectNotFound { type_name: _, lookup_type: _ }) => Ok(None), + _ => Ok(Some(result?)), + } + } + /// Fetch the "authz" objects for the selected resource and all its + /// parents + /// + /// This function does not check whether the caller has permission + /// to read this information. That's why it's not `pub`. Outside + /// this module, you want `lookup_for(authz::Action)`. + async fn lookup(&self) -> LookupResult<(authz::Silo, authz::Project)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let datastore = &lookup.datastore; + match &self { + Project::Error(_, error) => Err(error.clone()), + Project::Name(parent, &ref name) | Project::OwnedName(parent, ref name) => { + let (authz_silo,) = parent.lookup().await?; + let (authz_project, _) = Self::lookup_by_name_no_authz( + opctx, + datastore, + &authz_silo, + name, + ) + .await?; + Ok((authz_silo, authz_project)) + } + Project::PrimaryKey(_, v0) => { + let (authz_silo, authz_project, _) = Self::lookup_by_id_no_authz( + opctx, + datastore, + v0, + ) + .await?; + Ok((authz_silo, authz_project)) + } + } + } + /// Build the `authz` object for this resource + fn make_authz( + authz_parent: &authz::Silo, + db_row: &nexus_db_model::Project, + lookup_type: LookupType, + ) -> authz::Project { + authz::Project::with_primary_key(authz_parent.clone(), db_row.id(), lookup_type) + } + /// Getting the [`LookupPath`] for this lookup + /// + /// This is used when we actually query the database. At that + /// point, we need the `OpContext` and `DataStore` that are being + /// used for this lookup. + fn lookup_root(&self) -> &LookupPath<'a> { + match &self { + Project::Error(root, ..) => root.lookup_root(), + Project::Name(parent, _) | Project::OwnedName(parent, _) => { + parent.lookup_root() + } + Project::PrimaryKey(root, ..) => root.lookup_root(), + } + } + /// For a "siloed" resource (i.e., one that's nested under "Silo" in + /// the resource hierarchy), check whether a given resource's Silo + /// (given by `authz_silo`) matches the Silo of the actor doing the + /// fetch/lookup (given by `opctx`). + /// + /// This check should not be strictly necessary. We should never + /// wind up hitting the error conditions here. That's because in + /// order to reach this check, we must have done a successful authz + /// check. That check should have failed because there's no way to + /// grant users access to resources in other Silos. So why do this + /// check at all? As a belt-and-suspenders way to make sure we + /// never return objects to a user that are from a different Silo + /// than the one they're attached to. But what do we do if the + /// check fails? We definitely want to know about it so that we can + /// determine if there's an authz bug here, and if so, fix it. + /// That's why we log this at "error" level. We also override the + /// lookup return value with a suitable error indicating the + /// resource does not exist or the caller did not supply + /// credentials, just as if they didn't have access to the object. + fn silo_check( + opctx: &OpContext, + authz_silo: &authz::Silo, + authz_project: &authz::Project, + ) -> Result<(), Error> { + let log = &opctx.log; + let actor_silo_id = match opctx + .authn + .silo_or_builtin() + .internal_context("siloed resource check") + { + Ok(Some(silo)) => silo.id(), + Ok(None) => { + trace!( + log, + "successful lookup of siloed resource {:?} \ + using built-in user", + "Project", + ); + return Ok(()); + } + Err(error) => { + error!( + log, + "unexpected successful lookup of siloed resource \ + {:?} with no actor in OpContext", + "Project", + ); + return Err(error); + } + }; + let resource_silo_id = authz_silo.id(); + if resource_silo_id != actor_silo_id { + use crate::authz::ApiResource; + error!( + log, + "unexpected successful lookup of siloed resource \ + {:?} in a different Silo from current actor (resource \ + Silo {}, actor Silo {})", + "Project", resource_silo_id, actor_silo_id, + ); + Err(authz_project.not_found()) + } else { + Ok(()) + } + } + /// Fetch the database row for a resource by doing a lookup by + /// name, possibly within a collection + /// + /// This function checks whether the caller has permissions to + /// read the requested data. However, it's not intended to be + /// used outside this module. See `fetch_for(authz::Action)`. + async fn fetch_by_name_for( + opctx: &OpContext, + datastore: &DataStore, + authz_silo: &authz::Silo, + name: &Name, + action: authz::Action, + ) -> LookupResult<(authz::Project, nexus_db_model::Project)> { + let (authz_project, db_row) = Self::lookup_by_name_no_authz( + opctx, + datastore, + authz_silo, + name, + ) + .await?; + opctx.authorize(action, &authz_project).await?; + Ok((authz_project, db_row)) + } + /// Lowest-level function for looking up a resource in the + /// database by name, possibly within a collection + /// + /// This function does not check whether the caller has + /// permission to read this information. That's why it's not + /// `pub`. Outside this module, you want `fetch()` or + /// `lookup_for(authz::Action)`. + async fn lookup_by_name_no_authz( + opctx: &OpContext, + datastore: &DataStore, + authz_silo: &authz::Silo, + name: &Name, + ) -> LookupResult<(authz::Project, nexus_db_model::Project)> { + use db::schema::project::dsl; + dsl::project + .filter(dsl::time_deleted.is_null()) + .filter(dsl::name.eq(name.clone())) + .filter(dsl::silo_id.eq(authz_silo.id())) + .select(nexus_db_model::Project::as_select()) + .get_result_async(&*datastore.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Project, + LookupType::ByName(name.as_str().to_string()), + ), + ) + }) + .map(|db_row| { + ( + Self::make_authz( + authz_silo, + &db_row, + LookupType::ByName(name.as_str().to_string()), + ), + db_row, + ) + }) + } + /// Fetch the database row for a resource by doing a lookup by id + /// + /// This function checks whether the caller has permissions to read + /// the requested data. However, it's not intended to be used + /// outside this module. See `fetch_for(authz::Action)`. + async fn fetch_by_id_for( + opctx: &OpContext, + datastore: &DataStore, + v0: &Uuid, + action: authz::Action, + ) -> LookupResult<(authz::Silo, authz::Project, nexus_db_model::Project)> { + let (authz_silo, authz_project, db_row) = Self::lookup_by_id_no_authz( + opctx, + datastore, + v0, + ) + .await?; + opctx.authorize(action, &authz_project).await?; + Ok((authz_silo, authz_project, db_row)) + } + /// Lowest-level function for looking up a resource in the database + /// by id + /// + /// This function does not check whether the caller has permission + /// to read this information. That's why it's not `pub`. Outside + /// this module, you want `fetch()` or `lookup_for(authz::Action)`. + async fn lookup_by_id_no_authz( + opctx: &OpContext, + datastore: &DataStore, + v0: &Uuid, + ) -> LookupResult<(authz::Silo, authz::Project, nexus_db_model::Project)> { + use db::schema::project::dsl; + let db_row = dsl::project + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(v0.clone())) + .select(nexus_db_model::Project::as_select()) + .get_result_async(&*datastore.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Project, + LookupType::ById(v0.clone()), + ), + ) + })?; + let (authz_silo, _) = Silo::lookup_by_id_no_authz( + opctx, + datastore, + &db_row.silo_id, + ) + .await?; + let authz_project = Self::make_authz( + &authz_silo, + &db_row, + LookupType::ById(v0.clone()), + ); + Ok((authz_silo, authz_project, db_row)) + } +} diff --git a/nexus/db-macros/outputs/silo_user.txt b/nexus/db-macros/outputs/silo_user.txt new file mode 100644 index 0000000000..2c9568ff54 --- /dev/null +++ b/nexus/db-macros/outputs/silo_user.txt @@ -0,0 +1,183 @@ +///Selects a resource of type SiloUser (or any of its children, using the functions on this struct) for lookup or fetch +pub enum SiloUser<'a> { + /// An error occurred while selecting the resource + /// + /// This error will be returned by any lookup/fetch attempts. + Error(Root<'a>, Error), + /// We're looking for a resource with the given primary key + /// + /// This has no parent container -- a by-id lookup is always global + PrimaryKey(Root<'a>, Uuid), +} +impl<'a> SiloUser<'a> { + /// Fetch the record corresponding to the selected resource + /// + /// This is equivalent to `fetch_for(authz::Action::Read)`. + pub async fn fetch( + &self, + ) -> LookupResult<(authz::SiloUser, nexus_db_model::SiloUser)> { + self.fetch_for(authz::Action::Read).await + } + /// Turn the Result of [`fetch`] into a Result, E>. + pub async fn optional_fetch( + &self, + ) -> LookupResult> { + self.optional_fetch_for(authz::Action::Read).await + } + /// Fetch the record corresponding to the selected resource and + /// check whether the caller is allowed to do the specified `action` + /// + /// The return value is a tuple that also includes the `authz` + /// objects for all resources along the path to this one (i.e., all + /// parent resources) and the authz object for this resource itself. + /// These objects are useful for identifying those resources by + /// id, for doing other authz checks, or for looking up related + /// objects. + pub async fn fetch_for( + &self, + action: authz::Action, + ) -> LookupResult<(authz::SiloUser, nexus_db_model::SiloUser)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let datastore = &lookup.datastore; + match &self { + SiloUser::Error(_, error) => Err(error.clone()), + SiloUser::PrimaryKey(_, v0) => { + Self::fetch_by_id_for(opctx, datastore, v0, action).await + } + } + } + /// Turn the Result of [`fetch_for`] into a Result, E>. + pub async fn optional_fetch_for( + &self, + action: authz::Action, + ) -> LookupResult> { + let result = self.fetch_for(action).await; + match result { + Err(Error::ObjectNotFound { type_name: _, lookup_type: _ }) => Ok(None), + _ => Ok(Some(result?)), + } + } + /// Fetch an `authz` object for the selected resource and check + /// whether the caller is allowed to do the specified `action` + /// + /// The return value is a tuple that also includes the `authz` + /// objects for all resources along the path to this one (i.e., all + /// parent resources) and the authz object for this resource itself. + /// These objects are useful for identifying those resources by + /// id, for doing other authz checks, or for looking up related + /// objects. + pub async fn lookup_for( + &self, + action: authz::Action, + ) -> LookupResult<(authz::SiloUser,)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let (authz_silo_user,) = self.lookup().await?; + opctx.authorize(action, &authz_silo_user).await?; + Ok((authz_silo_user,)) + } + /// Turn the Result of [`lookup_for`] into a Result, E>. + pub async fn optional_lookup_for( + &self, + action: authz::Action, + ) -> LookupResult> { + let result = self.lookup_for(action).await; + match result { + Err(Error::ObjectNotFound { type_name: _, lookup_type: _ }) => Ok(None), + _ => Ok(Some(result?)), + } + } + /// Fetch the "authz" objects for the selected resource and all its + /// parents + /// + /// This function does not check whether the caller has permission + /// to read this information. That's why it's not `pub`. Outside + /// this module, you want `lookup_for(authz::Action)`. + async fn lookup(&self) -> LookupResult<(authz::SiloUser,)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let datastore = &lookup.datastore; + match &self { + SiloUser::Error(_, error) => Err(error.clone()), + SiloUser::PrimaryKey(_, v0) => { + let (authz_silo_user, _) = Self::lookup_by_id_no_authz( + opctx, + datastore, + v0, + ) + .await?; + Ok((authz_silo_user,)) + } + } + } + /// Build the `authz` object for this resource + fn make_authz( + authz_parent: &authz::Fleet, + db_row: &nexus_db_model::SiloUser, + lookup_type: LookupType, + ) -> authz::SiloUser { + authz::SiloUser::with_primary_key(authz_parent.clone(), db_row.id(), lookup_type) + } + /// Getting the [`LookupPath`] for this lookup + /// + /// This is used when we actually query the database. At that + /// point, we need the `OpContext` and `DataStore` that are being + /// used for this lookup. + fn lookup_root(&self) -> &LookupPath<'a> { + match &self { + SiloUser::Error(root, ..) => root.lookup_root(), + SiloUser::PrimaryKey(root, ..) => root.lookup_root(), + } + } + /// Fetch the database row for a resource by doing a lookup by id + /// + /// This function checks whether the caller has permissions to read + /// the requested data. However, it's not intended to be used + /// outside this module. See `fetch_for(authz::Action)`. + async fn fetch_by_id_for( + opctx: &OpContext, + datastore: &DataStore, + v0: &Uuid, + action: authz::Action, + ) -> LookupResult<(authz::SiloUser, nexus_db_model::SiloUser)> { + let (authz_silo_user, db_row) = Self::lookup_by_id_no_authz(opctx, datastore, v0) + .await?; + opctx.authorize(action, &authz_silo_user).await?; + Ok((authz_silo_user, db_row)) + } + /// Lowest-level function for looking up a resource in the database + /// by id + /// + /// This function does not check whether the caller has permission + /// to read this information. That's why it's not `pub`. Outside + /// this module, you want `fetch()` or `lookup_for(authz::Action)`. + async fn lookup_by_id_no_authz( + opctx: &OpContext, + datastore: &DataStore, + v0: &Uuid, + ) -> LookupResult<(authz::SiloUser, nexus_db_model::SiloUser)> { + use db::schema::silo_user::dsl; + let db_row = dsl::silo_user + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(v0.clone())) + .select(nexus_db_model::SiloUser::as_select()) + .get_result_async(&*datastore.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::SiloUser, + LookupType::ById(v0.clone()), + ), + ) + })?; + let authz_silo_user = Self::make_authz( + &&authz::FLEET, + &db_row, + LookupType::ById(v0.clone()), + ); + Ok((authz_silo_user, db_row)) + } +} diff --git a/nexus/db-macros/outputs/update_artifact.txt b/nexus/db-macros/outputs/update_artifact.txt new file mode 100644 index 0000000000..c5b67a4cb9 --- /dev/null +++ b/nexus/db-macros/outputs/update_artifact.txt @@ -0,0 +1,209 @@ +///Selects a resource of type UpdateArtifact (or any of its children, using the functions on this struct) for lookup or fetch +pub enum UpdateArtifact<'a> { + /// An error occurred while selecting the resource + /// + /// This error will be returned by any lookup/fetch attempts. + Error(Root<'a>, Error), + /// We're looking for a resource with the given primary key + /// + /// This has no parent container -- a by-id lookup is always global + PrimaryKey(Root<'a>, String, i64, KnownArtifactKind), +} +impl<'a> UpdateArtifact<'a> { + /// Fetch the record corresponding to the selected resource + /// + /// This is equivalent to `fetch_for(authz::Action::Read)`. + pub async fn fetch( + &self, + ) -> LookupResult<(authz::UpdateArtifact, nexus_db_model::UpdateArtifact)> { + self.fetch_for(authz::Action::Read).await + } + /// Turn the Result of [`fetch`] into a Result, E>. + pub async fn optional_fetch( + &self, + ) -> LookupResult> { + self.optional_fetch_for(authz::Action::Read).await + } + /// Fetch the record corresponding to the selected resource and + /// check whether the caller is allowed to do the specified `action` + /// + /// The return value is a tuple that also includes the `authz` + /// objects for all resources along the path to this one (i.e., all + /// parent resources) and the authz object for this resource itself. + /// These objects are useful for identifying those resources by + /// id, for doing other authz checks, or for looking up related + /// objects. + pub async fn fetch_for( + &self, + action: authz::Action, + ) -> LookupResult<(authz::UpdateArtifact, nexus_db_model::UpdateArtifact)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let datastore = &lookup.datastore; + match &self { + UpdateArtifact::Error(_, error) => Err(error.clone()), + UpdateArtifact::PrimaryKey(_, v0, v1, v2) => { + Self::fetch_by_id_for(opctx, datastore, v0, v1, v2, action).await + } + } + } + /// Turn the Result of [`fetch_for`] into a Result, E>. + pub async fn optional_fetch_for( + &self, + action: authz::Action, + ) -> LookupResult> { + let result = self.fetch_for(action).await; + match result { + Err(Error::ObjectNotFound { type_name: _, lookup_type: _ }) => Ok(None), + _ => Ok(Some(result?)), + } + } + /// Fetch an `authz` object for the selected resource and check + /// whether the caller is allowed to do the specified `action` + /// + /// The return value is a tuple that also includes the `authz` + /// objects for all resources along the path to this one (i.e., all + /// parent resources) and the authz object for this resource itself. + /// These objects are useful for identifying those resources by + /// id, for doing other authz checks, or for looking up related + /// objects. + pub async fn lookup_for( + &self, + action: authz::Action, + ) -> LookupResult<(authz::UpdateArtifact,)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let (authz_update_artifact,) = self.lookup().await?; + opctx.authorize(action, &authz_update_artifact).await?; + Ok((authz_update_artifact,)) + } + /// Turn the Result of [`lookup_for`] into a Result, E>. + pub async fn optional_lookup_for( + &self, + action: authz::Action, + ) -> LookupResult> { + let result = self.lookup_for(action).await; + match result { + Err(Error::ObjectNotFound { type_name: _, lookup_type: _ }) => Ok(None), + _ => Ok(Some(result?)), + } + } + /// Fetch the "authz" objects for the selected resource and all its + /// parents + /// + /// This function does not check whether the caller has permission + /// to read this information. That's why it's not `pub`. Outside + /// this module, you want `lookup_for(authz::Action)`. + async fn lookup(&self) -> LookupResult<(authz::UpdateArtifact,)> { + let lookup = self.lookup_root(); + let opctx = &lookup.opctx; + let datastore = &lookup.datastore; + match &self { + UpdateArtifact::Error(_, error) => Err(error.clone()), + UpdateArtifact::PrimaryKey(_, v0, v1, v2) => { + let (authz_update_artifact, _) = Self::lookup_by_id_no_authz( + opctx, + datastore, + v0, + v1, + v2, + ) + .await?; + Ok((authz_update_artifact,)) + } + } + } + /// Build the `authz` object for this resource + fn make_authz( + authz_parent: &authz::Fleet, + db_row: &nexus_db_model::UpdateArtifact, + lookup_type: LookupType, + ) -> authz::UpdateArtifact { + authz::UpdateArtifact::with_primary_key( + authz_parent.clone(), + db_row.id(), + lookup_type, + ) + } + /// Getting the [`LookupPath`] for this lookup + /// + /// This is used when we actually query the database. At that + /// point, we need the `OpContext` and `DataStore` that are being + /// used for this lookup. + fn lookup_root(&self) -> &LookupPath<'a> { + match &self { + UpdateArtifact::Error(root, ..) => root.lookup_root(), + UpdateArtifact::PrimaryKey(root, ..) => root.lookup_root(), + } + } + /// Fetch the database row for a resource by doing a lookup by id + /// + /// This function checks whether the caller has permissions to read + /// the requested data. However, it's not intended to be used + /// outside this module. See `fetch_for(authz::Action)`. + async fn fetch_by_id_for( + opctx: &OpContext, + datastore: &DataStore, + v0: &String, + v1: &i64, + v2: &KnownArtifactKind, + action: authz::Action, + ) -> LookupResult<(authz::UpdateArtifact, nexus_db_model::UpdateArtifact)> { + let (authz_update_artifact, db_row) = Self::lookup_by_id_no_authz( + opctx, + datastore, + v0, + v1, + v2, + ) + .await?; + opctx.authorize(action, &authz_update_artifact).await?; + Ok((authz_update_artifact, db_row)) + } + /// Lowest-level function for looking up a resource in the database + /// by id + /// + /// This function does not check whether the caller has permission + /// to read this information. That's why it's not `pub`. Outside + /// this module, you want `fetch()` or `lookup_for(authz::Action)`. + async fn lookup_by_id_no_authz( + opctx: &OpContext, + datastore: &DataStore, + v0: &String, + v1: &i64, + v2: &KnownArtifactKind, + ) -> LookupResult<(authz::UpdateArtifact, nexus_db_model::UpdateArtifact)> { + use db::schema::update_artifact::dsl; + let db_row = dsl::update_artifact + .filter(dsl::name.eq(v0.clone())) + .filter(dsl::version.eq(v1.clone())) + .filter(dsl::kind.eq(v2.clone())) + .select(nexus_db_model::UpdateArtifact::as_select()) + .get_result_async(&*datastore.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::UpdateArtifact, + LookupType::ByCompositeId( + format!( + "name = {:?}, version = {:?}, kind = {:?}", + v0, + v1, + v2, + ), + ), + ), + ) + })?; + let authz_update_artifact = Self::make_authz( + &&authz::FLEET, + &db_row, + LookupType::ByCompositeId( + format!("name = {:?}, version = {:?}, kind = {:?}", v0, v1, v2), + ), + ); + Ok((authz_update_artifact, db_row)) + } +} diff --git a/nexus/db-macros/src/lookup.rs b/nexus/db-macros/src/lookup.rs index c7906c7bf0..c04c373ccb 100644 --- a/nexus/db-macros/src/lookup.rs +++ b/nexus/db-macros/src/lookup.rs @@ -913,22 +913,24 @@ fn generate_database_functions(config: &Config) -> TokenStream { } } -// This isn't so much a test (although it does make sure we don't panic on some -// basic cases). This is a way to dump the output of the macro for some common -// inputs. This is invaluable for debugging. If there's a bug where the macro -// generates syntactically invalid Rust, `cargo expand` will often not print the -// macro's output. Instead, you can paste the output of this test into -// lookup.rs, replacing the call to the macro, then reformat the file, and then -// build it in order to see the compiler error in context. #[cfg(test)] mod test { use super::lookup_resource; + use expectorate::assert_contents; use proc_macro2::TokenStream; use quote::quote; + /// Ensure that generated code is as expected. + /// + /// This is both a test, and a way to dump the output of the macro for some + /// common inputs. This is invaluable for debugging. If there's a bug + /// where the macro generates syntactically invalid Rust, `cargo expand` + /// will often not print the macro's output. Instead, you can paste the + /// output of this test into lookup.rs, replacing the call to the macro, + /// then reformat the file, and then build it in order to see the compiler + /// error in context. #[test] - #[ignore] - fn test_lookup_dump() { + fn test_lookup_snapshots() { let output = lookup_resource(quote! { name = "Project", ancestors = ["Silo"], @@ -938,7 +940,7 @@ mod test { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] }) .unwrap(); - println!("{}", pretty_format(output)); + assert_contents("outputs/project.txt", &pretty_format(output)); let output = lookup_resource(quote! { name = "SiloUser", @@ -949,7 +951,7 @@ mod test { primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] }) .unwrap(); - println!("{}", pretty_format(output)); + assert_contents("outputs/silo_user.txt", &pretty_format(output)); let output = lookup_resource(quote! { name = "UpdateArtifact", @@ -964,7 +966,7 @@ mod test { ] }) .unwrap(); - println!("{}", pretty_format(output)); + assert_contents("outputs/update_artifact.txt", &pretty_format(output)); } fn pretty_format(input: TokenStream) -> String { From 966a87ea79ab8b8f263cae4d64ec33b5da504231 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 08:24:00 +0000 Subject: [PATCH 85/91] Update taiki-e/install-action digest to f0940d2 (#4966) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`7dcb31b` -> `f0940d2`](https://togithub.com/taiki-e/install-action/compare/7dcb31b...f0940d2) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 4eec09c455..25cef73b23 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@7dcb31b8033f96afe112f0df83dcb01f9969d23b # v2 + uses: taiki-e/install-action@f0940d272d95b8f48df48b1fc7ead81b95bca8b6 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From ed2e63b85b6843899ba271da3a6883f4079132e4 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 2 Feb 2024 10:32:51 -0800 Subject: [PATCH 86/91] move blueprint execution to its own package (#4963) --- Cargo.lock | 26 ++ Cargo.toml | 3 + nexus/Cargo.toml | 1 + nexus/blueprint-execution/Cargo.toml | 34 ++ nexus/blueprint-execution/build.rs | 10 + nexus/blueprint-execution/src/lib.rs | 34 ++ .../blueprint-execution/src/omicron_zones.rs | 346 ++++++++++++++++++ .../tests/config.test.toml | 1 + .../src/app/background/blueprint_execution.rs | 274 ++++---------- 9 files changed, 530 insertions(+), 199 deletions(-) create mode 100644 nexus/blueprint-execution/Cargo.toml create mode 100644 nexus/blueprint-execution/build.rs create mode 100644 nexus/blueprint-execution/src/lib.rs create mode 100644 nexus/blueprint-execution/src/omicron_zones.rs create mode 120000 nexus/blueprint-execution/tests/config.test.toml diff --git a/Cargo.lock b/Cargo.lock index a54d03e52a..0ab396bf4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4213,6 +4213,31 @@ dependencies = [ "rustc_version 0.1.7", ] +[[package]] +name = "nexus-blueprint-execution" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "futures", + "httptest", + "nexus-db-model", + "nexus-db-queries", + "nexus-test-utils", + "nexus-test-utils-macros", + "nexus-types", + "omicron-common", + "omicron-nexus", + "omicron-rpaths", + "omicron-workspace-hack", + "pq-sys", + "reqwest", + "sled-agent-client", + "slog", + "tokio", + "uuid", +] + [[package]] name = "nexus-client" version = "0.1.0" @@ -4948,6 +4973,7 @@ dependencies = [ "macaddr", "mg-admin-client", "mime_guess", + "nexus-blueprint-execution", "nexus-db-model", "nexus-db-queries", "nexus-defaults", diff --git a/Cargo.toml b/Cargo.toml index 600fbf185c..ea679498ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ members = [ "key-manager", "nexus", "nexus/authz-macros", + "nexus/blueprint-execution", "nexus/db-macros", "nexus/db-model", "nexus/db-queries", @@ -113,6 +114,7 @@ default-members = [ "key-manager", "nexus", "nexus/authz-macros", + "nexus/blueprint-execution", "nexus/db-macros", "nexus/db-model", "nexus/db-queries", @@ -246,6 +248,7 @@ mime_guess = "2.0.4" mockall = "0.12" newtype_derive = "0.1.6" mg-admin-client = { path = "clients/mg-admin-client" } +nexus-blueprint-execution = { path = "nexus/blueprint-execution" } nexus-client = { path = "clients/nexus-client" } nexus-db-model = { path = "nexus/db-model" } nexus-db-queries = { path = "nexus/db-queries" } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 87703cce77..6e9f2f135d 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -77,6 +77,7 @@ tough.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true +nexus-blueprint-execution.workspace = true nexus-defaults.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true diff --git a/nexus/blueprint-execution/Cargo.toml b/nexus/blueprint-execution/Cargo.toml new file mode 100644 index 0000000000..11d8003599 --- /dev/null +++ b/nexus/blueprint-execution/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "nexus-blueprint-execution" +version = "0.1.0" +edition = "2021" + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +anyhow.workspace = true +futures.workspace = true +nexus-db-queries.workspace = true +nexus-types.workspace = true +reqwest.workspace = true +sled-agent-client.workspace = true +slog.workspace = true +uuid.workspace = true + +# See omicron-rpaths for more about the "pq-sys" dependency. This is needed +# because we use the database in the test suite, though it doesn't appear to +# work to put the pq-sys dependency only in dev-dependencies. +pq-sys = "*" + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +chrono.workspace = true +httptest.workspace = true +nexus-db-model.workspace = true +nexus-test-utils.workspace = true +nexus-test-utils-macros.workspace = true +omicron-common.workspace = true +omicron-nexus.workspace = true +tokio.workspace = true diff --git a/nexus/blueprint-execution/build.rs b/nexus/blueprint-execution/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/nexus/blueprint-execution/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/nexus/blueprint-execution/src/lib.rs b/nexus/blueprint-execution/src/lib.rs new file mode 100644 index 0000000000..f7bfd7d30c --- /dev/null +++ b/nexus/blueprint-execution/src/lib.rs @@ -0,0 +1,34 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Execution of Nexus blueprints +//! +//! See `nexus_deployment` crate-level docs for background. + +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use slog::o; + +mod omicron_zones; + +/// Make one attempt to realize the given blueprint, meaning to take actions to +/// alter the real system to match the blueprint +/// +/// The assumption is that callers are running this periodically or in a loop to +/// deal with transient errors or changes in the underlying system state. +pub async fn realize_blueprint( + opctx: &OpContext, + datastore: &DataStore, + blueprint: &Blueprint, +) -> Result<(), Vec> { + let log = opctx.log.new(o!("comment" => blueprint.comment.clone())); + omicron_zones::deploy_zones( + &log, + opctx, + datastore, + &blueprint.omicron_zones, + ) + .await +} diff --git a/nexus/blueprint-execution/src/omicron_zones.rs b/nexus/blueprint-execution/src/omicron_zones.rs new file mode 100644 index 0000000000..f3e81d283d --- /dev/null +++ b/nexus/blueprint-execution/src/omicron_zones.rs @@ -0,0 +1,346 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manges deployment of Omicron zones to Sled Agents + +use anyhow::Context; +use futures::stream; +use futures::StreamExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::OmicronZonesConfig; +use sled_agent_client::Client as SledAgentClient; +use slog::info; +use slog::warn; +use slog::Logger; +use std::collections::BTreeMap; +use uuid::Uuid; + +/// Idempotently ensure that the specified Omicron zones are deployed to the +/// corresponding sleds +pub(crate) async fn deploy_zones( + log: &Logger, + opctx: &OpContext, + datastore: &DataStore, + zones: &BTreeMap, +) -> Result<(), Vec> { + let errors: Vec<_> = stream::iter(zones) + .filter_map(|(sled_id, config)| async move { + let client = match sled_client(opctx, datastore, *sled_id).await { + Ok(client) => client, + Err(err) => { + warn!(log, "{err:#}"); + return Some(err); + } + }; + let result = + client.omicron_zones_put(&config).await.with_context(|| { + format!("Failed to put {config:#?} to sled {sled_id}") + }); + + match result { + Err(error) => { + warn!(log, "{error:#}"); + Some(error) + } + Ok(_) => { + info!( + log, + "Successfully deployed zones for sled agent"; + "sled_id" => %sled_id, + "generation" => %config.generation, + ); + None + } + } + }) + .collect() + .await; + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } +} + +// This is a modified copy of the functionality from `nexus/src/app/sled.rs`. +// There's no good way to access this functionality right now since it is a +// method on the `Nexus` type. We want to have a more constrained type we can +// pass into background tasks for this type of functionality, but for now we +// just copy the functionality. +async fn sled_client( + opctx: &OpContext, + datastore: &DataStore, + sled_id: Uuid, +) -> Result { + let (.., sled) = LookupPath::new(opctx, datastore) + .sled_id(sled_id) + .fetch() + .await + .with_context(|| { + format!( + "Failed to create sled_agent::Client for sled_id: {}", + sled_id + ) + })?; + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .timeout(dur) + .build() + .unwrap(); + Ok(SledAgentClient::new_with_client( + &format!("http://{}", sled.address()), + client, + opctx.log.clone(), + )) +} + +#[cfg(test)] +mod test { + use super::deploy_zones; + use httptest::matchers::{all_of, json_decoded, request}; + use httptest::responders::status_code; + use httptest::Expectation; + use nexus_db_model::{ + ByteCount, SledBaseboard, SledSystemHardware, SledUpdate, + }; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use nexus_types::deployment::OmicronZonesConfig; + use nexus_types::deployment::{Blueprint, BlueprintTarget}; + use nexus_types::inventory::{ + OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, + }; + use omicron_common::api::external::Generation; + use std::collections::BTreeMap; + use std::collections::BTreeSet; + use std::net::SocketAddr; + use std::sync::Arc; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + fn create_blueprint( + omicron_zones: BTreeMap, + ) -> (BlueprintTarget, Blueprint) { + let id = Uuid::new_v4(); + ( + BlueprintTarget { + target_id: id, + enabled: true, + time_made_target: chrono::Utc::now(), + }, + Blueprint { + id, + omicron_zones, + zones_in_service: BTreeSet::new(), + parent_blueprint_id: None, + time_created: chrono::Utc::now(), + creator: "test".to_string(), + comment: "test blueprint".to_string(), + }, + ) + } + + #[nexus_test] + async fn test_deploy_omicron_zones(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let log = opctx.log.clone(); + + // Get a success result back when the blueprint has an empty set of + // zones. + let blueprint = Arc::new(create_blueprint(BTreeMap::new())); + deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + .await + .expect("failed to deploy no zones"); + + // Create some fake sled-agent servers to respond to zone puts and add + // sleds to CRDB. + let mut s1 = httptest::Server::run(); + let mut s2 = httptest::Server::run(); + let sled_id1 = Uuid::new_v4(); + let sled_id2 = Uuid::new_v4(); + let rack_id = Uuid::new_v4(); + for (i, (sled_id, server)) in + [(sled_id1, &s1), (sled_id2, &s2)].iter().enumerate() + { + let SocketAddr::V6(addr) = server.addr() else { + panic!("Expected Ipv6 address. Got {}", server.addr()); + }; + let update = SledUpdate::new( + *sled_id, + addr, + SledBaseboard { + serial_number: i.to_string(), + part_number: "test".into(), + revision: 1, + }, + SledSystemHardware { + is_scrimlet: false, + usable_hardware_threads: 4, + usable_physical_ram: ByteCount(1000.into()), + reservoir_size: ByteCount(999.into()), + }, + rack_id, + ); + datastore + .sled_upsert(update) + .await + .expect("Failed to insert sled to db"); + } + + // The particular dataset doesn't matter for this test. + // We re-use the same one to not obfuscate things + let dataset = OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()).parse().unwrap(), + }; + + let generation = Generation::new(); + + // Zones are updated in a particular order, but each request contains + // the full set of zones that must be running. + // See `rack_setup::service::ServiceInner::run` for more details. + let mut zones = OmicronZonesConfig { + generation, + zones: vec![OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalDns { + dataset, + dns_address: "oh-hello-internal-dns".into(), + gz_address: "::1".parse().unwrap(), + gz_address_index: 0, + http_address: "some-ipv6-address".into(), + }, + }], + }; + + // Create a blueprint with only the `InternalDns` zone for both servers + // We reuse the same `OmicronZonesConfig` because the details don't + // matter for this test. + let blueprint = Arc::new(create_blueprint(BTreeMap::from([ + (sled_id1, zones.clone()), + (sled_id2, zones.clone()), + ]))); + + // Set expectations for the initial requests sent to the fake + // sled-agents. + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(all_of![ + request::method_path("PUT", "/omicron-zones",), + // Our generation number should be 1 and there should + // be only a single zone. + request::body(json_decoded(|c: &OmicronZonesConfig| { + c.generation == 1u32.into() && c.zones.len() == 1 + })) + ]) + .respond_with(status_code(204)), + ); + } + + // Execute it. + deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + .await + .expect("failed to deploy initial zones"); + + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Do it again. This should trigger the same request. + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-zones", + )) + .respond_with(status_code(204)), + ); + } + deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + .await + .expect("failed to deploy same zones"); + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Take another lap, but this time, have one server fail the request and + // try again. + s1.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-zones", + )) + .respond_with(status_code(204)), + ); + s2.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-zones", + )) + .respond_with(status_code(500)), + ); + + let errors = + deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + .await + .expect_err("unexpectedly succeeded in deploying zones"); + + println!("{:?}", errors); + assert_eq!(errors.len(), 1); + assert!(errors[0] + .to_string() + .starts_with("Failed to put OmicronZonesConfig")); + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Add an `InternalNtp` zone for our next update + zones.generation = generation.next(); + zones.zones.push(OmicronZoneConfig { + id: Uuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: OmicronZoneType::InternalNtp { + address: "::1".into(), + dns_servers: vec!["::1".parse().unwrap()], + domain: None, + ntp_servers: vec!["some-ntp-server-addr".into()], + }, + }); + + let blueprint = Arc::new(create_blueprint(BTreeMap::from([ + (sled_id1, zones.clone()), + (sled_id2, zones.clone()), + ]))); + + // Set our new expectations + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(all_of![ + request::method_path("PUT", "/omicron-zones",), + // Our generation number should be bumped and there should + // be two zones. + request::body(json_decoded(|c: &OmicronZonesConfig| { + c.generation == 2u32.into() && c.zones.len() == 2 + })) + ]) + .respond_with(status_code(204)), + ); + } + + // Activate the task + deploy_zones(&log, &opctx, &datastore, &blueprint.1.omicron_zones) + .await + .expect("failed to deploy last round of zones"); + s1.verify_and_clear(); + s2.verify_and_clear(); + } +} diff --git a/nexus/blueprint-execution/tests/config.test.toml b/nexus/blueprint-execution/tests/config.test.toml new file mode 120000 index 0000000000..52f00171fd --- /dev/null +++ b/nexus/blueprint-execution/tests/config.test.toml @@ -0,0 +1 @@ +../../tests/config.test.toml \ No newline at end of file diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs index 8d6ea8d8ce..84d4cef212 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/blueprint_execution.rs @@ -5,22 +5,14 @@ //! Background task for realizing a plan blueprint use super::common::BackgroundTask; -use anyhow::Context; use futures::future::BoxFuture; -use futures::stream; use futures::FutureExt; -use futures::StreamExt; use nexus_db_queries::context::OpContext; -use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::DataStore; -use nexus_types::deployment::{Blueprint, BlueprintTarget, OmicronZonesConfig}; +use nexus_types::deployment::{Blueprint, BlueprintTarget}; use serde_json::json; -use sled_agent_client::Client as SledAgentClient; -use slog::Logger; -use std::collections::BTreeMap; use std::sync::Arc; use tokio::sync::watch; -use uuid::Uuid; /// Background task that takes a [`Blueprint`] and realizes the change to /// the state of the system based on the `Blueprint`. @@ -38,96 +30,6 @@ impl BlueprintExecutor { ) -> BlueprintExecutor { BlueprintExecutor { datastore, rx_blueprint } } - - // This is a modified copy of the functionality from `nexus/src/app/sled.rs`. - // There's no good way to access this functionality right now since it is a - // method on the `Nexus` type. We want to have a more constrained type we can - // pass into background tasks for this type of functionality, but for now we - // just copy the functionality. - async fn sled_client( - &self, - opctx: &OpContext, - sled_id: &Uuid, - ) -> Result { - let (.., sled) = LookupPath::new(opctx, &self.datastore) - .sled_id(*sled_id) - .fetch() - .await - .with_context(|| { - format!( - "Failed to create sled_agent::Client for sled_id: {}", - sled_id - ) - })?; - let dur = std::time::Duration::from_secs(60); - let client = reqwest::ClientBuilder::new() - .connect_timeout(dur) - .timeout(dur) - .build() - .unwrap(); - Ok(SledAgentClient::new_with_client( - &format!("http://{}", sled.address()), - client, - opctx.log.clone(), - )) - } - - async fn realize_blueprint( - &self, - opctx: &OpContext, - blueprint: &Blueprint, - ) -> Result<(), Vec> { - let log = opctx.log.new(o!("comment" => blueprint.comment.clone())); - self.deploy_zones(&log, opctx, &blueprint.omicron_zones).await - } - - async fn deploy_zones( - &self, - log: &Logger, - opctx: &OpContext, - zones: &BTreeMap, - ) -> Result<(), Vec> { - let errors: Vec<_> = stream::iter(zones.clone()) - .filter_map(|(sled_id, config)| async move { - let client = match self.sled_client(&opctx, &sled_id).await { - Ok(client) => client, - Err(err) => { - warn!(log, "{err:#}"); - return Some(err); - } - }; - let result = client - .omicron_zones_put(&config) - .await - .with_context(|| { - format!("Failed to put {config:#?} to sled {sled_id}") - }); - - match result { - Err(error) => { - warn!(log, "{error:#}"); - Some(error) - } - Ok(_) => { - info!( - log, - "Successfully deployed zones for sled agent"; - "sled_id" => %sled_id, - "generation" => config.generation.to_string() - ); - None - } - } - }) - .collect() - .await; - - if errors.is_empty() { - Ok(()) - } else { - Err(errors) - } - } } impl BackgroundTask for BlueprintExecutor { @@ -159,7 +61,12 @@ impl BackgroundTask for BlueprintExecutor { }); } - let result = self.realize_blueprint(opctx, blueprint).await; + let result = nexus_blueprint_execution::realize_blueprint( + opctx, + &self.datastore, + blueprint, + ) + .await; // Return the result as a `serde_json::Value` match result { @@ -179,24 +86,33 @@ impl BackgroundTask for BlueprintExecutor { .boxed() } } + #[cfg(test)] mod test { - use super::*; + use super::BlueprintExecutor; use crate::app::background::common::BackgroundTask; - use httptest::matchers::{all_of, json_decoded, request}; + use httptest::matchers::{all_of, request}; use httptest::responders::status_code; use httptest::Expectation; use nexus_db_model::{ ByteCount, SledBaseboard, SledSystemHardware, SledUpdate, }; + use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; + use nexus_types::deployment::OmicronZonesConfig; + use nexus_types::deployment::{Blueprint, BlueprintTarget}; use nexus_types::inventory::{ OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, }; use omicron_common::api::external::Generation; use serde::Deserialize; + use serde_json::json; + use std::collections::BTreeMap; use std::collections::BTreeSet; use std::net::SocketAddr; + use std::sync::Arc; + use tokio::sync::watch; + use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -225,6 +141,7 @@ mod test { #[nexus_test(server = crate::Server)] async fn test_deploy_omicron_zones(cptestctx: &ControlPlaneTestContext) { + // Set up the test. let nexus = &cptestctx.server.apictx().nexus; let datastore = nexus.datastore(); let opctx = OpContext::for_tests( @@ -232,19 +149,6 @@ mod test { datastore.clone(), ); - let (blueprint_tx, blueprint_rx) = watch::channel(None); - let mut task = BlueprintExecutor::new(datastore.clone(), blueprint_rx); - - // With no blueprint we should fail with an appropriate message. - let value = task.activate(&opctx).await; - assert_eq!(value, json!({"error": "no blueprint"})); - - // Get a success (empty) result back when the blueprint has an empty set of zones - let blueprint = Arc::new(create_blueprint(BTreeMap::new())); - blueprint_tx.send(Some(blueprint)).unwrap(); - let value = task.activate(&opctx).await; - assert_eq!(value, json!({})); - // Create some fake sled-agent servers to respond to zone puts and add // sleds to CRDB. let mut s1 = httptest::Server::run(); @@ -280,24 +184,40 @@ mod test { .expect("Failed to insert sled to db"); } - // The particular dataset doesn't matter for this test. - // We re-use the same one to not obfuscate things - let dataset = OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()).parse().unwrap(), - }; + let (blueprint_tx, blueprint_rx) = watch::channel(None); + let mut task = BlueprintExecutor::new(datastore.clone(), blueprint_rx); - let generation = Generation::new(); + // Now we're ready. + // + // With no target blueprint, the task should fail with an appropriate + // message. + let value = task.activate(&opctx).await; + assert_eq!(value, json!({"error": "no blueprint"})); - // Zones are updated in a particular order, but each request contains - // the full set of zones that must be running. - // See `rack_setup::service::ServiceInner::run` for more details. - let mut zones = OmicronZonesConfig { - generation, + // With a target blueprint having no zones, the task should trivially + // complete and report a successful (empty) summary. + let blueprint = Arc::new(create_blueprint(BTreeMap::new())); + blueprint_tx.send(Some(blueprint)).unwrap(); + let value = task.activate(&opctx).await; + println!("activating with no zones: {:?}", value); + assert_eq!(value, json!({})); + + // Create a non-empty blueprint describing two servers and verify that + // the task correctly winds up making requests to both of them and + // reporting success. We reuse the same `OmicronZonesConfig` in + // constructing the blueprint because the details don't matter for this + // test. + let zones = OmicronZonesConfig { + generation: Generation::new(), zones: vec![OmicronZoneConfig { id: Uuid::new_v4(), underlay_address: "::1".parse().unwrap(), zone_type: OmicronZoneType::InternalDns { - dataset, + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .unwrap(), + }, dns_address: "oh-hello-internal-dns".into(), gz_address: "::1".parse().unwrap(), gz_address_index: 0, @@ -306,55 +226,53 @@ mod test { }], }; - // Create a blueprint with only the `InternalDns` zone for both servers - // We reuse the same `OmicronZonesConfig` because the details don't - // matter for this test. - let blueprint = Arc::new(create_blueprint(BTreeMap::from([ + let mut blueprint = create_blueprint(BTreeMap::from([ (sled_id1, zones.clone()), (sled_id2, zones.clone()), - ]))); + ])); - // Send the blueprint with the first set of zones to the task - blueprint_tx.send(Some(blueprint)).unwrap(); + blueprint_tx.send(Some(Arc::new(blueprint.clone()))).unwrap(); - // Check that the initial requests were sent to the fake sled-agents + // Make sure that requests get made to the sled agent. This is not a + // careful check of exactly what gets sent. For that, see the tests in + // nexus-blueprint-execution. for s in [&mut s1, &mut s2] { s.expect( - Expectation::matching(all_of![ - request::method_path("PUT", "/omicron-zones",), - // Our generation number should be 1 and there should - // be only a single zone. - request::body(json_decoded(|c: &OmicronZonesConfig| { - c.generation == 1u32.into() && c.zones.len() == 1 - })) - ]) + Expectation::matching(all_of![request::method_path( + "PUT", + "/omicron-zones" + ),]) .respond_with(status_code(204)), ); } // Activate the task to trigger zone configuration on the sled-agents let value = task.activate(&opctx).await; + println!("activating two sled agents: {:?}", value); assert_eq!(value, json!({})); s1.verify_and_clear(); s2.verify_and_clear(); - // Do it again. This should trigger the same request. - for s in [&mut s1, &mut s2] { - s.expect( - Expectation::matching(request::method_path( - "PUT", - "/omicron-zones", - )) - .respond_with(status_code(204)), - ); - } + // Now, disable the target and make sure that we _don't_ invoke the sled + // agent. It's enough to just not set expectations. + blueprint.0.enabled = false; + blueprint_tx.send(Some(Arc::new(blueprint.clone()))).unwrap(); let value = task.activate(&opctx).await; - assert_eq!(value, json!({})); + println!("when disabled: {:?}", value); + assert_eq!( + value, + json!({ + "error": "blueprint disabled", + "target_id": blueprint.1.id.to_string() + }) + ); s1.verify_and_clear(); s2.verify_and_clear(); - // Take another lap, but this time, have one server fail the request and - // try again. + // Do it all again, but configure one of the servers to fail so we can + // verify the task's returned summary of what happened. + blueprint.0.enabled = true; + blueprint_tx.send(Some(Arc::new(blueprint))).unwrap(); s1.expect( Expectation::matching(request::method_path( "PUT", @@ -370,14 +288,13 @@ mod test { .respond_with(status_code(500)), ); - // Define a type we can use to pick stuff out of error objects. #[derive(Deserialize)] struct ErrorResult { errors: Vec, } let value = task.activate(&opctx).await; - println!("{:?}", value); + println!("after failure: {:?}", value); let result: ErrorResult = serde_json::from_value(value).unwrap(); assert_eq!(result.errors.len(), 1); assert!( @@ -385,46 +302,5 @@ mod test { ); s1.verify_and_clear(); s2.verify_and_clear(); - - // Add an `InternalNtp` zone for our next update - zones.generation = generation.next(); - zones.zones.push(OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalNtp { - address: "::1".into(), - dns_servers: vec!["::1".parse().unwrap()], - domain: None, - ntp_servers: vec!["some-ntp-server-addr".into()], - }, - }); - - // Update our watch channel - let blueprint = Arc::new(create_blueprint(BTreeMap::from([ - (sled_id1, zones.clone()), - (sled_id2, zones.clone()), - ]))); - blueprint_tx.send(Some(blueprint)).unwrap(); - - // Set our new expectations - for s in [&mut s1, &mut s2] { - s.expect( - Expectation::matching(all_of![ - request::method_path("PUT", "/omicron-zones",), - // Our generation number should be bumped and there should - // be two zones. - request::body(json_decoded(|c: &OmicronZonesConfig| { - c.generation == 2u32.into() && c.zones.len() == 2 - })) - ]) - .respond_with(status_code(204)), - ); - } - - // Activate the task - let value = task.activate(&opctx).await; - assert_eq!(value, json!({})); - s1.verify_and_clear(); - s2.verify_and_clear(); } } From f54e772b019a892afcc7832aec31e5e10170c2ae Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:09:14 -0800 Subject: [PATCH 87/91] Update Rust crate toml_edit to 0.21.1 (#4969) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 14 +++++++------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0ab396bf4d..ad203e6f85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5380,7 +5380,7 @@ dependencies = [ "toml 0.7.8", "toml_datetime", "toml_edit 0.19.15", - "toml_edit 0.21.0", + "toml_edit 0.21.1", "tracing", "trust-dns-proto", "unicode-bidi", @@ -7846,9 +7846,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12022b835073e5b11e90a14f86838ceb1c8fb0325b72416845c487ac0fa95e80" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" dependencies = [ "serde", ] @@ -9241,7 +9241,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.21.0", + "toml_edit 0.21.1", ] [[package]] @@ -9268,9 +9268,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.21.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34d383cd00a163b4a5b85053df514d45bc330f6de7737edfe0a93311d1eaa03" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" dependencies = [ "indexmap 2.2.2", "serde", @@ -10229,7 +10229,7 @@ dependencies = [ "tokio", "tokio-util", "toml 0.8.8", - "toml_edit 0.21.0", + "toml_edit 0.21.1", "tui-tree-widget", "unicode-width", "update-engine", diff --git a/Cargo.toml b/Cargo.toml index ea679498ce..22972f938a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -390,7 +390,7 @@ tokio-stream = "0.1.14" tokio-tungstenite = "0.20" tokio-util = { version = "0.7.10", features = ["io", "io-util"] } toml = "0.8.8" -toml_edit = "0.21.0" +toml_edit = "0.21.1" tough = { version = "0.16.0", features = [ "http" ] } trust-dns-client = "0.22" trust-dns-proto = "0.22" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index be82bc5fda..b7177e58ac 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -108,7 +108,7 @@ tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } toml = { version = "0.7.8" } -toml_edit-647d43efb71741da = { package = "toml_edit", version = "0.21.0", features = ["serde"] } +toml_edit-647d43efb71741da = { package = "toml_edit", version = "0.21.1", features = ["serde"] } tracing = { version = "0.1.37", features = ["log"] } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } @@ -217,7 +217,7 @@ tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } toml = { version = "0.7.8" } -toml_edit-647d43efb71741da = { package = "toml_edit", version = "0.21.0", features = ["serde"] } +toml_edit-647d43efb71741da = { package = "toml_edit", version = "0.21.1", features = ["serde"] } tracing = { version = "0.1.37", features = ["log"] } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } From 8622ddecdc0ac66e0be7635dfea8aebaaf7f1a9a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 11:09:57 -0800 Subject: [PATCH 88/91] Update Rust crate bb8 to 0.8.3 (#4925) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad203e6f85..df72270b7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -427,9 +427,9 @@ dependencies = [ [[package]] name = "bb8" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98b4b0f25f18bcdc3ac72bdb486ed0acf7e185221fd4dc985bc15db5800b0ba2" +checksum = "df7c2093d15d6a1d33b1f972e1c5ea3177748742b97a5f392aa83a65262c6780" dependencies = [ "async-trait", "futures-channel", diff --git a/Cargo.toml b/Cargo.toml index 22972f938a..19d1a9456a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -160,7 +160,7 @@ atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } base64 = "0.21.7" -bb8 = "0.8.1" +bb8 = "0.8.3" bcs = "0.1.6" bincode = "1.3.3" bootstore = { path = "bootstore" } From 0619af98c8fcd183b4f85e2681f77bf347e3a158 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 2 Feb 2024 14:10:09 -0600 Subject: [PATCH 89/91] Bump web console (no default pool error) (#4971) https://github.com/oxidecomputer/console/compare/1a4f5d81...5bee1f5b * [5bee1f5b](https://github.com/oxidecomputer/console/commit/5bee1f5b) oxidecomputer/console#1936 * [16b5b32a](https://github.com/oxidecomputer/console/commit/16b5b32a) oxidecomputer/console#1935 * [2aa720d2](https://github.com/oxidecomputer/console/commit/2aa720d2) bump API and pull in ipv4/ipv6 zod validation change --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 197d3014a2..e81fb1c03e 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="1a4f5d81af09f238dc094e56ad24dd0aa4fd46a0" -SHA2="97e1ea69a7f2a798c05f1d8e12f7de9bf32d11f70d99a7cbc05dabda1c5b7ce4" +COMMIT="5bee1f5b468b17b579a0dc0c5aee6a3056146b79" +SHA2="66e73112327db5f80e32d6865cf150320ae508985cef421459107713287ed4b8" From 62b2f0bf48a8851f55fd3e8b2477cf308c6bc1df Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 2 Feb 2024 13:37:07 -0800 Subject: [PATCH 90/91] [buildomat] Capture per-crate build timing as build-timings.json (#4960) Part of https://github.com/oxidecomputer/omicron/issues/4471 Captures the per-crate build timing information in a JSON file --- .github/buildomat/build-and-test.sh | 23 +++++++++++++++---- .../buildomat/jobs/build-and-test-helios.sh | 1 + .../buildomat/jobs/build-and-test-linux.sh | 1 + 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 5cf086b1a3..30abd02f90 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -20,9 +20,13 @@ curl -sSfL --retry 10 https://get.nexte.st/"$NEXTEST_VERSION"/"$1" | gunzip | ta # we can check later whether we left detritus around. # TEST_TMPDIR='/var/tmp/omicron_tmp' -echo "tests will store output in $TEST_TMPDIR" >&2 +echo "tests will store ephemeral output in $TEST_TMPDIR" >&2 mkdir "$TEST_TMPDIR" +OUTPUT_DIR='/work' +echo "tests will store non-ephemeral output in $OUTPUT_DIR" >&2 +mkdir -p "$OUTPUT_DIR" + # # Set up our PATH for the test suite. # @@ -50,18 +54,27 @@ ptime -m bash ./tools/install_builder_prerequisites.sh -y # banner build export RUSTFLAGS="-D warnings" +export RUSTDOCFLAGS="-D warnings" # When running on illumos we need to pass an additional runpath that is # usually configured via ".cargo/config" but the `RUSTFLAGS` env variable # takes precedence. This path contains oxide specific libraries such as # libipcc. if [[ $target_os == "illumos" ]]; then - RUSTFLAGS="-D warnings -C link-arg=-R/usr/platform/oxide/lib/amd64" + RUSTFLAGS="$RUSTFLAGS -C link-arg=-R/usr/platform/oxide/lib/amd64" fi -export RUSTDOCFLAGS="-D warnings" -export TMPDIR=$TEST_TMPDIR +export TMPDIR="$TEST_TMPDIR" export RUST_BACKTRACE=1 +# We're building once, so there's no need to incur the overhead of an incremental build. export CARGO_INCREMENTAL=0 -ptime -m cargo test --locked --verbose --no-run +# This allows us to build with unstable options, which gives us access to some +# timing information. +# +# If we remove "--timings=json" below, this would no longer be needed. +export RUSTC_BOOTSTRAP=1 + +# Build all the packages and tests, and keep track of how long each took to build. +# We report build progress to stderr, and the "--timings=json" output goes to stdout. +ptime -m cargo build -Z unstable-options --timings=json --workspace --tests --locked --verbose 1> "$OUTPUT_DIR/crate-build-timings.json" # # We apply our own timeout to ensure that we get a normal failure on timeout diff --git a/.github/buildomat/jobs/build-and-test-helios.sh b/.github/buildomat/jobs/build-and-test-helios.sh index 2c7a1f884d..cfcbb61475 100755 --- a/.github/buildomat/jobs/build-and-test-helios.sh +++ b/.github/buildomat/jobs/build-and-test-helios.sh @@ -5,6 +5,7 @@ #: target = "helios-2.0" #: rust_toolchain = "1.72.1" #: output_rules = [ +#: "%/work/*", #: "%/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", diff --git a/.github/buildomat/jobs/build-and-test-linux.sh b/.github/buildomat/jobs/build-and-test-linux.sh index 4f4ebc1d8a..22332ce65c 100755 --- a/.github/buildomat/jobs/build-and-test-linux.sh +++ b/.github/buildomat/jobs/build-and-test-linux.sh @@ -5,6 +5,7 @@ #: target = "ubuntu-22.04" #: rust_toolchain = "1.72.1" #: output_rules = [ +#: "%/work/*", #: "%/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", From eac7d3d854b32ed4c5bbf62b1cdb03d4bd030f69 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 2 Feb 2024 14:23:28 -0800 Subject: [PATCH 91/91] Integrate Package cache (#4940) This PR pulls in the changes introduced in https://github.com/oxidecomputer/omicron-package/pull/60, which has been published as a breaking version of omicron-package. These changes include caching, a non-TUI logfile, and camino paths. Caching can optionally be disabled with: ```bash omicron-package package --disable-cache ``` Though the cache is now enabled by default --- Cargo.lock | 39 ++- Cargo.toml | 5 +- dev-tools/thing-flinger/Cargo.toml | 1 + .../thing-flinger/src/bin/thing-flinger.rs | 164 +++++------- package/Cargo.toml | 2 + package/src/bin/omicron-package.rs | 236 ++++++++++-------- package/src/dot.rs | 12 +- package/src/lib.rs | 23 +- workspace-hack/Cargo.toml | 2 - 9 files changed, 257 insertions(+), 227 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index df72270b7b..ce31b18823 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -569,6 +569,21 @@ dependencies = [ "constant_time_eq 0.2.6", ] +[[package]] +name = "blake3" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq 0.3.0", + "memmap2", + "rayon", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -4027,6 +4042,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "memmap2" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.7.1" @@ -4844,6 +4868,7 @@ name = "omicron-deploy" version = "0.1.0" dependencies = [ "anyhow", + "camino", "clap 4.4.3", "crossbeam", "omicron-package", @@ -5099,6 +5124,7 @@ name = "omicron-package" version = "0.1.0" dependencies = [ "anyhow", + "camino", "clap 4.4.3", "expectorate", "futures", @@ -5116,6 +5142,7 @@ dependencies = [ "sled-hardware", "slog", "slog-async", + "slog-bunyan", "slog-term", "smf", "strum", @@ -5290,7 +5317,6 @@ dependencies = [ "bstr 1.6.0", "byteorder", "bytes", - "camino", "chrono", "cipher", "clap 4.4.3", @@ -5396,15 +5422,19 @@ dependencies = [ [[package]] name = "omicron-zone-package" -version = "0.10.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdfd257b7067e7a6aa9fba896a89b0f625bac7660213bb830db36e543bd3cdb8" +checksum = "e75ad9eb79bb6a1ec78d2eecf36c67fffcf56264d779c456c6e2cd4b257ee9fe" dependencies = [ "anyhow", "async-trait", + "blake3", + "camino", + "camino-tempfile", "chrono", "filetime", "flate2", + "futures", "futures-util", "hex", "reqwest", @@ -5412,8 +5442,9 @@ dependencies = [ "semver 1.0.21", "serde", "serde_derive", + "serde_json", + "slog", "tar", - "tempfile", "thiserror", "tokio", "toml 0.7.8", diff --git a/Cargo.toml b/Cargo.toml index 19d1a9456a..b8379adbd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -168,7 +168,7 @@ bootstrap-agent-client = { path = "clients/bootstrap-agent-client" } buf-list = { version = "1.0.3", features = ["tokio1"] } byteorder = "1.5.0" bytes = "1.5.0" -camino = "1.1" +camino = { version = "1.1", features = ["serde1"] } camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" chacha20poly1305 = "0.10.1" @@ -272,7 +272,7 @@ omicron-package = { path = "package" } omicron-rpaths = { path = "rpaths" } omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } -omicron-zone-package = "0.10.1" +omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1", features = [ "api", "std" ] } once_cell = "1.19.0" @@ -353,6 +353,7 @@ sled-hardware = { path = "sled-hardware" } sled-storage = { path = "sled-storage" } slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "release_max_level_debug" ] } slog-async = "2.8" +slog-bunyan = "2.5" slog-dtrace = "0.3" slog-envlogger = "2.2" slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } diff --git a/dev-tools/thing-flinger/Cargo.toml b/dev-tools/thing-flinger/Cargo.toml index 2acbaf5659..a427685871 100644 --- a/dev-tools/thing-flinger/Cargo.toml +++ b/dev-tools/thing-flinger/Cargo.toml @@ -7,6 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +camino.workspace = true clap.workspace = true crossbeam.workspace = true omicron-package.workspace = true diff --git a/dev-tools/thing-flinger/src/bin/thing-flinger.rs b/dev-tools/thing-flinger/src/bin/thing-flinger.rs index a13d5cfef7..43b137790d 100644 --- a/dev-tools/thing-flinger/src/bin/thing-flinger.rs +++ b/dev-tools/thing-flinger/src/bin/thing-flinger.rs @@ -6,8 +6,8 @@ use omicron_package::{parse, BuildCommand, DeployCommand}; +use camino::{Utf8Path, Utf8PathBuf}; use std::collections::{BTreeMap, BTreeSet}; -use std::path::{Path, PathBuf}; use std::process::Command; use anyhow::{Context, Result}; @@ -20,7 +20,7 @@ use thiserror::Error; #[derive(Deserialize, Debug)] struct Builder { server: String, - omicron_path: PathBuf, + omicron_path: Utf8PathBuf, } // A server on which an omicron package is deployed. @@ -33,19 +33,19 @@ struct Server { #[derive(Deserialize, Debug)] struct Deployment { rss_server: String, - staging_dir: PathBuf, + staging_dir: Utf8PathBuf, servers: BTreeSet, } #[derive(Debug, Deserialize)] struct Config { - omicron_path: PathBuf, + omicron_path: Utf8PathBuf, builder: Builder, servers: BTreeMap, deployment: Deployment, #[serde(default)] - rss_config_path: Option, + rss_config_path: Option, #[serde(default)] debug: bool, @@ -129,7 +129,7 @@ struct Args { help = "Path to deployment manifest toml file", action )] - config: PathBuf, + config: Utf8PathBuf, #[clap( short, @@ -140,7 +140,7 @@ struct Args { /// The output directory, where artifacts should be built and staged #[clap(long = "artifacts", default_value = "out/")] - artifact_dir: PathBuf, + artifact_dir: Utf8PathBuf, #[clap(subcommand)] subcommand: SubCommand, @@ -152,11 +152,6 @@ enum FlingError { #[error("Servers not listed in configuration: {0:?}")] InvalidServers(Vec), - /// The parameter should be the name of the argument that could not be - /// properly converted to a string. - #[error("{0} is not valid UTF-8")] - BadString(String), - /// Failed to rsync omicron to build host #[error("Failed to sync {src} with {dst}")] FailedSync { src: String, dst: String }, @@ -238,20 +233,14 @@ fn do_sync(config: &Config) -> Result<()> { // trailing slash. let src = format!( "{}/", - config - .omicron_path - .canonicalize() - .with_context(|| format!( - "could not canonicalize {}", - config.omicron_path.display() - ))? - .to_string_lossy() + config.omicron_path.canonicalize_utf8().with_context(|| format!( + "could not canonicalize {}", + config.omicron_path + ))? ); let dst = format!( "{}@{}:{}", - builder.username, - builder.addr, - config.builder.omicron_path.to_str().unwrap() + builder.username, builder.addr, config.builder.omicron_path ); println!("Synchronizing source files to: {}", dst); @@ -301,9 +290,7 @@ fn copy_to_deployment_staging_dir( || { let dst = format!( "{}@{}:{}", - server.username, - server.addr, - config.deployment.staging_dir.to_str().unwrap() + server.username, server.addr, config.deployment.staging_dir ); let mut cmd = partial_cmd(); cmd.arg(&dst); @@ -330,14 +317,10 @@ fn rsync_config_needed_for_tools(config: &Config) -> Result<()> { // the `./` here is load-bearing; it interacts with `--relative` to tell // rsync to create `smf/sled-agent` but none of its parents "{}/./smf/sled-agent/", - config - .omicron_path - .canonicalize() - .with_context(|| format!( - "could not canonicalize {}", - config.omicron_path.display() - ))? - .to_string_lossy() + config.omicron_path.canonicalize_utf8().with_context(|| format!( + "could not canonicalize {}", + config.omicron_path + ))? ); copy_to_deployment_staging_dir(config, src, "Copy smf/sled-agent dir") @@ -351,14 +334,10 @@ fn rsync_tools_dir_to_deployment_servers(config: &Config) -> Result<()> { // the `./` here is load-bearing; it interacts with `--relative` to tell // rsync to create `tools` but none of its parents "{}/./tools/", - config - .omicron_path - .canonicalize() - .with_context(|| format!( - "could not canonicalize {}", - config.omicron_path.display() - ))? - .to_string_lossy() + config.omicron_path.canonicalize_utf8().with_context(|| format!( + "could not canonicalize {}", + config.omicron_path + ))? ); copy_to_deployment_staging_dir(config, src, "Copy tools dir") } @@ -405,7 +384,7 @@ fn do_install_prereqs(config: &Config) -> Result<()> { let cmd = format!( "cd {} && mkdir -p out && pfexec ./tools/{}", - root_path.display(), + root_path.clone(), script ); println!( @@ -426,7 +405,7 @@ fn create_external_tls_cert_on_builder(config: &Config) -> Result<()> { let builder = &config.servers[&config.builder.server]; let cmd = format!( "cd {} && ./tools/create_self_signed_cert.sh", - config.builder.omicron_path.to_string_lossy() + config.builder.omicron_path, ); ssh_exec(&builder, &cmd, SshStrategy::NoForward) } @@ -434,7 +413,7 @@ fn create_external_tls_cert_on_builder(config: &Config) -> Result<()> { fn create_virtual_hardware_on_deployment_servers(config: &Config) { let cmd = format!( "cd {} && pfexec ./tools/create_virtual_hardware.sh", - config.deployment.staging_dir.display() + config.deployment.staging_dir ); let fns = config.deployment_servers().map(|server| { || { @@ -464,7 +443,7 @@ fn do_build_minimal(config: &Config) -> Result<()> { let server = &config.servers[&config.builder.server]; let cmd = format!( "cd {} && cargo build {} -p {} -p {}", - config.builder.omicron_path.to_string_lossy(), + config.builder.omicron_path, config.release_arg(), "omicron-package", "omicron-deploy" @@ -472,11 +451,8 @@ fn do_build_minimal(config: &Config) -> Result<()> { ssh_exec(&server, &cmd, SshStrategy::NoForward) } -fn do_package(config: &Config, artifact_dir: PathBuf) -> Result<()> { +fn do_package(config: &Config, artifact_dir: Utf8PathBuf) -> Result<()> { let builder = &config.servers[&config.builder.server]; - let artifact_dir = artifact_dir - .to_str() - .ok_or_else(|| FlingError::BadString("artifact_dir".to_string()))?; // We use a bash login shell to get a proper environment, so we have a path to // postgres, and $DEP_PQ_LIBDIRS is filled in. This is required for building @@ -487,9 +463,9 @@ fn do_package(config: &Config, artifact_dir: PathBuf) -> Result<()> { "bash -lc \ 'cd {} && \ cargo run {} --bin omicron-package -- package --out {}'", - config.builder.omicron_path.to_string_lossy(), + config.builder.omicron_path, config.release_arg(), - &artifact_dir, + artifact_dir, ); ssh_exec(&builder, &cmd, SshStrategy::NoForward) @@ -506,7 +482,7 @@ fn do_check(config: &Config) -> Result<()> { "bash -lc \ 'cd {} && \ cargo run {} --bin omicron-package -- check'", - config.builder.omicron_path.to_string_lossy(), + config.builder.omicron_path, config.release_arg(), ); @@ -521,7 +497,7 @@ fn do_uninstall(config: &Config) -> Result<()> { // Run `omicron-package uninstall` on the deployment server let cmd = format!( "cd {} && pfexec ./omicron-package uninstall", - config.deployment.staging_dir.to_string_lossy(), + config.deployment.staging_dir, ); println!("$ {}", cmd); ssh_exec(&server, &cmd, SshStrategy::Forward)?; @@ -531,10 +507,10 @@ fn do_uninstall(config: &Config) -> Result<()> { fn do_clean( config: &Config, - artifact_dir: PathBuf, - install_dir: PathBuf, + artifact_dir: Utf8PathBuf, + install_dir: Utf8PathBuf, ) -> Result<()> { - let mut deployment_src = PathBuf::from(&config.deployment.staging_dir); + let mut deployment_src = Utf8PathBuf::from(&config.deployment.staging_dir); deployment_src.push(&artifact_dir); let builder = &config.servers[&config.builder.server]; for server in config.deployment_servers() { @@ -543,9 +519,7 @@ fn do_clean( // Run `omicron-package uninstall` on the deployment server let cmd = format!( "cd {} && pfexec ./omicron-package clean --in {} --out {}", - config.deployment.staging_dir.to_string_lossy(), - deployment_src.to_string_lossy(), - install_dir.to_string_lossy() + config.deployment.staging_dir, deployment_src, install_dir, ); println!("$ {}", cmd); ssh_exec(&server, &cmd, SshStrategy::Forward)?; @@ -586,12 +560,14 @@ where .unwrap(); } -fn do_install(config: &Config, artifact_dir: &Path, install_dir: &Path) { +fn do_install( + config: &Config, + artifact_dir: &Utf8Path, + install_dir: &Utf8Path, +) { let builder = &config.servers[&config.builder.server]; - let mut pkg_dir = PathBuf::from(&config.builder.omicron_path); + let mut pkg_dir = Utf8PathBuf::from(&config.builder.omicron_path); pkg_dir.push(artifact_dir); - let pkg_dir = pkg_dir.to_string_lossy(); - let pkg_dir = &pkg_dir; let fns = config.deployment.servers.iter().map(|server_name| { (server_name, || { @@ -599,7 +575,7 @@ fn do_install(config: &Config, artifact_dir: &Path, install_dir: &Path) { config, &artifact_dir, &install_dir, - &pkg_dir, + pkg_dir.as_str(), builder, server_name, ) @@ -611,7 +587,7 @@ fn do_install(config: &Config, artifact_dir: &Path, install_dir: &Path) { fn do_overlay(config: &Config) -> Result<()> { let builder = &config.servers[&config.builder.server]; - let mut root_path = PathBuf::from(&config.builder.omicron_path); + let mut root_path = Utf8PathBuf::from(&config.builder.omicron_path); // TODO: This needs to match the artifact_dir in `package` root_path.push("out/overlay"); @@ -649,7 +625,7 @@ fn do_overlay(config: &Config) -> Result<()> { fn overlay_rss_config( builder: &Server, config: &Config, - rss_server_dir: &Path, + rss_server_dir: &Utf8Path, ) -> Result<()> { // Sync `config-rss.toml` to the directory for the RSS server on the // builder. @@ -660,9 +636,7 @@ fn overlay_rss_config( }; let dst = format!( "{}@{}:{}/config-rss.toml", - builder.username, - builder.addr, - rss_server_dir.display() + builder.username, builder.addr, rss_server_dir ); let mut cmd = rsync_common(); @@ -671,11 +645,7 @@ fn overlay_rss_config( let status = cmd.status().context(format!("Failed to run command: ({:?})", cmd))?; if !status.success() { - return Err(FlingError::FailedSync { - src: src.to_string_lossy().to_string(), - dst, - } - .into()); + return Err(FlingError::FailedSync { src: src.to_string(), dst }.into()); } Ok(()) @@ -683,8 +653,8 @@ fn overlay_rss_config( fn single_server_install( config: &Config, - artifact_dir: &Path, - install_dir: &Path, + artifact_dir: &Utf8Path, + install_dir: &Utf8Path, pkg_dir: &str, builder: &Server, server_name: &str, @@ -757,7 +727,7 @@ fn copy_package_artifacts_to_staging( pkg_dir, destination.username, destination.addr, - config.deployment.staging_dir.to_string_lossy() + config.deployment.staging_dir ); println!("$ {}", cmd); ssh_exec(builder, &cmd, SshStrategy::Forward) @@ -768,17 +738,17 @@ fn copy_omicron_package_binary_to_staging( builder: &Server, destination: &Server, ) -> Result<()> { - let mut bin_path = PathBuf::from(&config.builder.omicron_path); + let mut bin_path = Utf8PathBuf::from(&config.builder.omicron_path); bin_path.push(format!( "target/{}/omicron-package", if config.debug { "debug" } else { "release" } )); let cmd = format!( "rsync -avz {} {}@{}:{}", - bin_path.to_string_lossy(), + bin_path, destination.username, destination.addr, - config.deployment.staging_dir.to_string_lossy() + config.deployment.staging_dir ); println!("$ {}", cmd); ssh_exec(builder, &cmd, SshStrategy::Forward) @@ -789,14 +759,14 @@ fn copy_package_manifest_to_staging( builder: &Server, destination: &Server, ) -> Result<()> { - let mut path = PathBuf::from(&config.builder.omicron_path); + let mut path = Utf8PathBuf::from(&config.builder.omicron_path); path.push("package-manifest.toml"); let cmd = format!( "rsync {} {}@{}:{}", - path.to_string_lossy(), + path, destination.username, destination.addr, - config.deployment.staging_dir.to_string_lossy() + config.deployment.staging_dir ); println!("$ {}", cmd); ssh_exec(builder, &cmd, SshStrategy::Forward) @@ -805,13 +775,12 @@ fn copy_package_manifest_to_staging( fn run_omicron_package_activate_from_staging( config: &Config, destination: &Server, - install_dir: &Path, + install_dir: &Utf8Path, ) -> Result<()> { // Run `omicron-package activate` on the deployment server let cmd = format!( "cd {} && pfexec ./omicron-package activate --out {}", - config.deployment.staging_dir.to_string_lossy(), - install_dir.to_string_lossy(), + config.deployment.staging_dir, install_dir, ); println!("$ {}", cmd); @@ -821,18 +790,16 @@ fn run_omicron_package_activate_from_staging( fn run_omicron_package_unpack_from_staging( config: &Config, destination: &Server, - artifact_dir: &Path, - install_dir: &Path, + artifact_dir: &Utf8Path, + install_dir: &Utf8Path, ) -> Result<()> { - let mut deployment_src = PathBuf::from(&config.deployment.staging_dir); + let mut deployment_src = Utf8PathBuf::from(&config.deployment.staging_dir); deployment_src.push(&artifact_dir); // Run `omicron-package unpack` on the deployment server let cmd = format!( "cd {} && pfexec ./omicron-package unpack --in {} --out {}", - config.deployment.staging_dir.to_string_lossy(), - deployment_src.to_string_lossy(), - install_dir.to_string_lossy(), + config.deployment.staging_dir, deployment_src, install_dir, ); println!("$ {}", cmd); @@ -852,7 +819,7 @@ fn copy_overlay_files_to_staging( destination_name, destination.username, destination.addr, - config.deployment.staging_dir.to_string_lossy() + config.deployment.staging_dir ); println!("$ {}", cmd); ssh_exec(builder, &cmd, SshStrategy::Forward) @@ -861,12 +828,11 @@ fn copy_overlay_files_to_staging( fn install_overlay_files_from_staging( config: &Config, destination: &Server, - install_dir: &Path, + install_dir: &Utf8Path, ) -> Result<()> { let cmd = format!( "pfexec cp -r {}/overlay/* {}", - config.deployment.staging_dir.to_string_lossy(), - install_dir.to_string_lossy() + config.deployment.staging_dir, install_dir ); println!("$ {}", cmd); ssh_exec(&destination, &cmd, SshStrategy::NoForward) @@ -925,7 +891,7 @@ fn validate_servers( } fn validate_absolute_path( - path: &Path, + path: &Utf8Path, field: &'static str, ) -> Result<(), FlingError> { if path.is_absolute() || path.starts_with("$HOME") { @@ -970,7 +936,7 @@ fn main() -> Result<()> { SubCommand::Builder(BuildCommand::Target { .. }) => { todo!("Setting target not supported through thing-flinger") } - SubCommand::Builder(BuildCommand::Package) => { + SubCommand::Builder(BuildCommand::Package { .. }) => { do_package(&config, args.artifact_dir)?; } SubCommand::Builder(BuildCommand::Stamp { .. }) => { diff --git a/package/Cargo.toml b/package/Cargo.toml index 0dc86ceb8c..8067473aa0 100644 --- a/package/Cargo.toml +++ b/package/Cargo.toml @@ -7,6 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +camino.workspace = true clap.workspace = true futures.workspace = true hex.workspace = true @@ -22,6 +23,7 @@ serde.workspace = true sled-hardware.workspace = true slog.workspace = true slog-async.workspace = true +slog-bunyan.workspace = true slog-term.workspace = true smf.workspace = true strum.workspace = true diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 59c5c6ffe6..3b8bd24918 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -5,6 +5,7 @@ //! Utility for bundling target binaries as tarfiles. use anyhow::{anyhow, bail, Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; use clap::{Parser, Subcommand}; use futures::stream::{self, StreamExt, TryStreamExt}; use illumos_utils::{zfs, zone}; @@ -26,7 +27,6 @@ use slog::{info, warn}; use std::env; use std::fs::create_dir_all; use std::io::Write; -use std::path::{Path, PathBuf}; use std::str::FromStr; use std::sync::Arc; use swrite::{swrite, SWrite}; @@ -60,7 +60,7 @@ struct Args { help = "Path to package manifest toml file", action )] - manifest: PathBuf, + manifest: Utf8PathBuf, #[clap( short, @@ -72,7 +72,7 @@ struct Args { /// The output directory, where artifacts should be built and staged #[clap(long = "artifacts", default_value = "out/")] - artifact_dir: PathBuf, + artifact_dir: Utf8PathBuf, #[clap( short, @@ -203,13 +203,13 @@ async fn do_dot(config: &Config) -> Result<()> { const ACTIVE: &str = "active"; async fn do_target( - artifact_dir: &Path, + artifact_dir: &Utf8Path, name: &str, subcommand: &TargetCommand, ) -> Result<()> { let target_dir = artifact_dir.join("target"); tokio::fs::create_dir_all(&target_dir).await.with_context(|| { - format!("failed to create directory {}", target_dir.display()) + format!("failed to create directory {}", target_dir) })?; match subcommand { TargetCommand::Create { image, machine, switch, rack_topology } => { @@ -224,7 +224,7 @@ async fn do_target( tokio::fs::write(&path, Target::from(target).to_string()) .await .with_context(|| { - format!("failed to write target to {}", path.display()) + format!("failed to write target to {}", path) })?; replace_active_link(&name, &target_dir).await?; @@ -233,7 +233,7 @@ async fn do_target( } TargetCommand::List => { let active = tokio::fs::read_link(target_dir.join(ACTIVE)).await?; - let active = active.to_string_lossy(); + let active = Utf8PathBuf::try_from(active)?; for entry in walkdir::WalkDir::new(&target_dir) .max_depth(1) .sort_by_file_name() @@ -268,9 +268,9 @@ async fn do_target( } async fn get_single_target( - target_dir: impl AsRef, + target_dir: impl AsRef, name: &str, -) -> Result { +) -> Result { if name == ACTIVE { bail!( "The name '{name}' is reserved, please try another (e.g. 'default')\n\ @@ -282,29 +282,25 @@ async fn get_single_target( } async fn replace_active_link( - src: impl AsRef, - target_dir: impl AsRef, + src: impl AsRef, + target_dir: impl AsRef, ) -> Result<()> { let src = src.as_ref(); let target_dir = target_dir.as_ref(); let dst = target_dir.join(ACTIVE); if !target_dir.join(src).exists() { - bail!("Target file {} does not exist", src.display()); + bail!("Target file {} does not exist", src); } let _ = tokio::fs::remove_file(&dst).await; tokio::fs::symlink(src, &dst).await.with_context(|| { - format!( - "failed creating symlink to {} at {}", - src.display(), - dst.display() - ) + format!("failed creating symlink to {} at {}", src, dst) })?; Ok(()) } // Calculates the SHA256 digest for a file. -async fn get_sha256_digest(path: &PathBuf) -> Result { +async fn get_sha256_digest(path: &Utf8PathBuf) -> Result { let mut reader = BufReader::new( tokio::fs::File::open(&path) .await @@ -333,21 +329,21 @@ async fn download_prebuilt( repo: &str, commit: &str, expected_digest: &Vec, - path: &Path, + path: &Utf8Path, ) -> Result<()> { progress.set_message("downloading prebuilt".into()); let url = format!( "https://buildomat.eng.oxide.computer/public/file/oxidecomputer/{}/image/{}/{}", repo, commit, - path.file_name().unwrap().to_string_lossy(), + path.file_name().unwrap(), ); let response = reqwest::Client::new() .get(&url) .send() .await .with_context(|| format!("failed to get {url}"))?; - progress.set_length( + progress.increment_total( response .content_length() .ok_or_else(|| anyhow!("Missing Content Length"))?, @@ -367,7 +363,7 @@ async fn download_prebuilt( .await .with_context(|| format!("failed writing {path:?}"))?; // Record progress in the UI - progress.increment(chunk.len().try_into().unwrap()); + progress.increment_completed(chunk.len().try_into().unwrap()); } let digest = context.finish(); @@ -382,16 +378,16 @@ async fn download_prebuilt( } // Ensures a package exists, either by creating it or downloading it. -async fn get_package( +async fn ensure_package( config: &Config, - target: &Target, ui: &Arc, package_name: &String, package: &Package, - output_directory: &Path, + output_directory: &Utf8Path, + disable_cache: bool, ) -> Result<()> { - let total_work = package.get_total_work_for_target(&target)?; - let progress = ui.add_package(package_name.to_string(), total_work); + let target = &config.target; + let progress = ui.add_package(package_name.to_string()); match &package.source { PackageSource::Prebuilt { repo, commit, sha256 } => { let expected_digest = hex::decode(&sha256)?; @@ -435,19 +431,26 @@ async fn get_package( } } PackageSource::Manual => { + progress.set_message("confirming manual package".into()); let path = package.get_output_path(package_name, &output_directory); if !path.exists() { bail!( "The package for {} (expected at {}) does not exist.", package_name, - path.to_string_lossy(), + path, ); } } PackageSource::Local { .. } | PackageSource::Composite { .. } => { - progress.set_message("bundle package".into()); + progress.set_message("building package".into()); + + let build_config = omicron_zone_package::package::BuildConfig { + target, + progress: &progress, + cache_disabled: disable_cache, + }; package - .create_with_progress_for_target(&progress, &target, package_name, &output_directory) + .create(package_name, &output_directory, &build_config) .await .with_context(|| { let msg = format!("failed to create {package_name} in {output_directory:?}"); @@ -463,11 +466,15 @@ async fn get_package( Ok(()) } -async fn do_package(config: &Config, output_directory: &Path) -> Result<()> { +async fn do_package( + config: &Config, + output_directory: &Utf8Path, + disable_cache: bool, +) -> Result<()> { create_dir_all(&output_directory) .map_err(|err| anyhow!("Cannot create output directory: {}", err))?; - let ui = ProgressUI::new(); + let ui = ProgressUI::new(&config.log); do_build(&config).await?; @@ -482,13 +489,13 @@ async fn do_package(config: &Config, output_directory: &Path) -> Result<()> { .try_for_each_concurrent( None, |((package_name, package), ui)| async move { - get_package( + ensure_package( &config, - &config.target, &ui, package_name, package, output_directory, + disable_cache, ) .await }, @@ -502,7 +509,7 @@ async fn do_package(config: &Config, output_directory: &Path) -> Result<()> { async fn do_stamp( config: &Config, - output_directory: &Path, + output_directory: &Utf8Path, package_name: &str, version: &semver::Version, ) -> Result<()> { @@ -518,14 +525,14 @@ async fn do_stamp( // Stamp it let stamped_path = package.stamp(package_name, output_directory, version).await?; - println!("Created: {}", stamped_path.display()); + println!("Created: {}", stamped_path); Ok(()) } async fn do_unpack( config: &Config, - artifact_dir: &Path, - install_dir: &Path, + artifact_dir: &Utf8Path, + install_dir: &Utf8Path, ) -> Result<()> { create_dir_all(&install_dir).map_err(|err| { anyhow!("Cannot create installation directory: {}", err) @@ -543,14 +550,14 @@ async fn do_unpack( info!( &config.log, "Installing service"; - "src" => %src.to_string_lossy(), - "dst" => %dst.to_string_lossy(), + "src" => %src, + "dst" => %dst, ); std::fs::copy(&src, &dst).map_err(|err| { anyhow!( "Failed to copy {src} to {dst}: {err}", - src = src.display(), - dst = dst.display() + src = src, + dst = dst ) })?; Ok(()) @@ -575,8 +582,8 @@ async fn do_unpack( info!( &config.log, "Unpacking service tarball"; - "tar_path" => %tar_path.to_string_lossy(), - "service_path" => %service_path.to_string_lossy(), + "tar_path" => %tar_path, + "service_path" => %service_path, ); let tar_file = std::fs::File::open(&tar_path)?; @@ -589,7 +596,7 @@ async fn do_unpack( Ok(()) } -fn do_activate(config: &Config, install_dir: &Path) -> Result<()> { +fn do_activate(config: &Config, install_dir: &Utf8Path) -> Result<()> { // Install the bootstrap service, which itself extracts and // installs other services. if let Some(package) = @@ -601,8 +608,7 @@ fn do_activate(config: &Config, install_dir: &Path) -> Result<()> { .join("manifest.xml"); info!( config.log, - "Installing bootstrap service from {}", - manifest_path.to_string_lossy() + "Installing bootstrap service from {}", manifest_path ); smf::Config::import().run(&manifest_path)?; @@ -613,8 +619,8 @@ fn do_activate(config: &Config, install_dir: &Path) -> Result<()> { async fn do_install( config: &Config, - artifact_dir: &Path, - install_dir: &Path, + artifact_dir: &Utf8Path, + install_dir: &Utf8Path, ) -> Result<()> { do_unpack(config, artifact_dir, install_dir).await?; do_activate(config, install_dir) @@ -674,7 +680,9 @@ fn uninstall_all_packages(config: &Config) { } } -fn remove_file_unless_already_removed>(path: P) -> Result<()> { +fn remove_file_unless_already_removed>( + path: P, +) -> Result<()> { if let Err(e) = std::fs::remove_file(path.as_ref()) { match e.kind() { std::io::ErrorKind::NotFound => {} @@ -684,7 +692,9 @@ fn remove_file_unless_already_removed>(path: P) -> Result<()> { Ok(()) } -fn remove_all_unless_already_removed>(path: P) -> Result<()> { +fn remove_all_unless_already_removed>( + path: P, +) -> Result<()> { if let Err(e) = std::fs::remove_dir_all(path.as_ref()) { match e.kind() { std::io::ErrorKind::NotFound => {} @@ -694,22 +704,22 @@ fn remove_all_unless_already_removed>(path: P) -> Result<()> { Ok(()) } -fn remove_all_except>( +fn remove_all_except>( path: P, to_keep: &[&str], log: &Logger, ) -> Result<()> { - let dir = match path.as_ref().read_dir() { + let dir = match path.as_ref().read_dir_utf8() { Ok(dir) => dir, Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()), Err(e) => bail!(e), }; for entry in dir { let entry = entry?; - if to_keep.contains(&&*(entry.file_name().to_string_lossy())) { - info!(log, "Keeping: '{}'", entry.path().to_string_lossy()); + if to_keep.contains(&entry.file_name()) { + info!(log, "Keeping: '{}'", entry.path()); } else { - info!(log, "Removing: '{}'", entry.path().to_string_lossy()); + info!(log, "Removing: '{}'", entry.path()); if entry.metadata()?.is_dir() { remove_all_unless_already_removed(entry.path())?; } else { @@ -739,15 +749,11 @@ async fn do_uninstall(config: &Config) -> Result<()> { async fn do_clean( config: &Config, - artifact_dir: &Path, - install_dir: &Path, + artifact_dir: &Utf8Path, + install_dir: &Utf8Path, ) -> Result<()> { do_uninstall(&config).await?; - info!( - config.log, - "Removing artifacts from {}", - artifact_dir.to_string_lossy() - ); + info!(config.log, "Removing artifacts from {}", artifact_dir); const ARTIFACTS_TO_KEEP: &[&str] = &[ "clickhouse", "cockroachdb", @@ -757,11 +763,7 @@ async fn do_clean( "softnpu", ]; remove_all_except(artifact_dir, ARTIFACTS_TO_KEEP, &config.log)?; - info!( - config.log, - "Removing installed objects in: {}", - install_dir.to_string_lossy() - ); + info!(config.log, "Removing installed objects in: {}", install_dir); const INSTALLED_OBJECTS_TO_KEEP: &[&str] = &["opte"]; remove_all_except(install_dir, INSTALLED_OBJECTS_TO_KEEP, &config.log)?; @@ -770,52 +772,80 @@ async fn do_clean( fn in_progress_style() -> ProgressStyle { ProgressStyle::default_bar() - .template( - "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", - ) + .template("[{elapsed:>3}] {bar:30.cyan/blue} {pos:>7}/{len:7} {msg}") .expect("Invalid template") .progress_chars("#>.") } fn completed_progress_style() -> ProgressStyle { ProgressStyle::default_bar() - .template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg:.green}") + .template( + "[{elapsed:>3}] {bar:30.cyan/blue} {pos:>7}/{len:7} {msg:.green}", + ) .expect("Invalid template") .progress_chars("#>.") } fn error_progress_style() -> ProgressStyle { ProgressStyle::default_bar() - .template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg:.red}") + .template( + "[{elapsed:>3}] {bar:30.cyan/blue} {pos:>7}/{len:7} {msg:.red}", + ) .expect("Invalid template") .progress_chars("#>.") } // Struct managing display of progress to UI. struct ProgressUI { + log: Logger, multi: MultiProgress, style: ProgressStyle, } +impl ProgressUI { + fn new(log: &Logger) -> Arc { + Arc::new(Self { + log: log.clone(), + multi: MultiProgress::new(), + style: in_progress_style(), + }) + } + + fn add_package(&self, service_name: String) -> PackageProgress { + let pb = self.multi.add(ProgressBar::new(1)); + pb.set_style(self.style.clone()); + pb.set_message(service_name.clone()); + pb.tick(); + PackageProgress::new(&self.log, pb, service_name) + } +} + struct PackageProgress { + log: Logger, pb: ProgressBar, service_name: String, } impl PackageProgress { + fn new(log: &Logger, pb: ProgressBar, service_name: String) -> Self { + Self { + log: log.new(o!("package" => service_name.clone())), + pb, + service_name, + } + } + fn finish(&self) { self.pb.set_style(completed_progress_style()); self.pb.finish_with_message(format!("{}: done", self.service_name)); self.pb.tick(); } - fn set_length(&self, total: u64) { - self.pb.set_length(total); - } - fn set_error_message(&self, message: std::borrow::Cow<'static, str>) { self.pb.set_style(error_progress_style()); - self.pb.set_message(format!("{}: {}", self.service_name, message)); + let message = format!("{}: {}", self.service_name, message); + warn!(self.log, "{}", &message); + self.pb.set_message(message); self.pb.tick(); } @@ -827,29 +857,22 @@ impl PackageProgress { impl Progress for PackageProgress { fn set_message(&self, message: std::borrow::Cow<'static, str>) { self.pb.set_style(in_progress_style()); - self.pb.set_message(format!("{}: {}", self.service_name, message)); + let message = format!("{}: {}", self.service_name, message); + info!(self.log, "{}", &message); + self.pb.set_message(message); self.pb.tick(); } - fn increment(&self, delta: u64) { - self.pb.inc(delta); + fn get_log(&self) -> &Logger { + &self.log } -} -impl ProgressUI { - fn new() -> Arc { - Arc::new(Self { - multi: MultiProgress::new(), - style: in_progress_style(), - }) + fn increment_total(&self, delta: u64) { + self.pb.inc_length(delta); } - fn add_package(&self, service_name: String, total: u64) -> PackageProgress { - let pb = self.multi.add(ProgressBar::new(total)); - pb.set_style(self.style.clone()); - pb.set_message(service_name.clone()); - pb.tick(); - PackageProgress { pb, service_name } + fn increment_completed(&self, delta: u64) { + self.pb.inc(delta); } } @@ -891,10 +914,16 @@ async fn main() -> Result<()> { let args = Args::try_parse()?; let package_config = parse::<_, PackageConfig>(&args.manifest)?; - let decorator = slog_term::TermDecorator::new().build(); - let drain = slog_term::CompactFormat::new(decorator).build().fuse(); + let mut open_options = std::fs::OpenOptions::new(); + open_options.write(true).create(true).truncate(true); + tokio::fs::create_dir_all(&args.artifact_dir).await?; + let logpath = args.artifact_dir.join("LOG"); + let logfile = std::io::LineWriter::new(open_options.open(&logpath)?); + println!("Logging to: {}", std::fs::canonicalize(logpath)?.display()); + + let drain = slog_bunyan::new(logfile).build().fuse(); let drain = slog_async::Async::new(drain).build().fuse(); - let log = slog::Logger::root(drain, o!()); + let log = Logger::root(drain, o!()); let target_help_str = || -> String { format!( @@ -909,7 +938,7 @@ async fn main() -> Result<()> { std::fs::read_to_string(&target_path).map_err(|e| { eprintln!( "Failed to read build target: {}\n{}", - target_path.display(), + target_path, target_help_str() ); e @@ -918,7 +947,7 @@ async fn main() -> Result<()> { .map_err(|e| { eprintln!( "Failed to parse {} as target\n{}", - target_path.display(), + target_path, target_help_str() ); e @@ -938,10 +967,10 @@ async fn main() -> Result<()> { // Use a CWD that is the root of the Omicron repository. if let Ok(manifest) = env::var("CARGO_MANIFEST_DIR") { - let manifest_dir = PathBuf::from(manifest); + let manifest_dir = Utf8PathBuf::from(manifest); let root = manifest_dir.parent().unwrap(); env::set_current_dir(root).with_context(|| { - format!("failed to set current directory to {}", root.display()) + format!("failed to set current directory to {}", root) })?; } @@ -952,8 +981,9 @@ async fn main() -> Result<()> { SubCommand::Build(BuildCommand::Dot) => { do_dot(&get_config()?).await?; } - SubCommand::Build(BuildCommand::Package) => { - do_package(&get_config()?, &args.artifact_dir).await?; + SubCommand::Build(BuildCommand::Package { disable_cache }) => { + do_package(&get_config()?, &args.artifact_dir, *disable_cache) + .await?; } SubCommand::Build(BuildCommand::Stamp { package_name, version }) => { do_stamp(&get_config()?, &args.artifact_dir, package_name, version) diff --git a/package/src/dot.rs b/package/src/dot.rs index 133d5c0f00..3307d100ba 100644 --- a/package/src/dot.rs +++ b/package/src/dot.rs @@ -10,7 +10,6 @@ use petgraph::graph::EdgeReference; use petgraph::graph::NodeIndex; use petgraph::Graph; use std::collections::BTreeMap; -use std::path::Path; /// A node in our visual representation of the package manifest /// @@ -132,13 +131,11 @@ pub fn do_dot( let pkg_node = pkg_nodes .get(pkgname) .expect("expected node for package already"); - let output_directory = Path::new("/nonexistent"); + let output_directory = camino::Utf8Path::new("/nonexistent"); let output_basename = pkg .get_output_path(pkgname, output_directory) .file_name() - .unwrap() - .to_str() - .expect("expected package output filename to be UTF-8") + .expect("Missing file name") .to_string(); (output_basename, pkg_node) }) @@ -190,9 +187,8 @@ pub fn do_dot( // on which it depends. if let Some(blobs) = blobs { for b in blobs { - let s3_node = graph.add_node(GraphNode::Blob { - path: b.display().to_string(), - }); + let s3_node = graph + .add_node(GraphNode::Blob { path: b.to_string() }); graph.add_edge(*pkg_node, s3_node, "download"); } } diff --git a/package/src/lib.rs b/package/src/lib.rs index 395f3ed472..bba1a3a0cd 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -1,9 +1,8 @@ //! Common code shared between `omicron-package` and `thing-flinger` binaries. +use camino::{Utf8Path, Utf8PathBuf}; use clap::Subcommand; use serde::de::DeserializeOwned; -use std::path::Path; -use std::path::PathBuf; use thiserror::Error; pub mod dot; @@ -13,12 +12,12 @@ pub mod target; #[derive(Error, Debug)] pub enum ParseError { #[error("Error deserializing toml from {path}: {err}")] - Toml { path: PathBuf, err: toml::de::Error }, + Toml { path: Utf8PathBuf, err: toml::de::Error }, #[error("IO error: {message}: {err}")] Io { message: String, err: std::io::Error }, } -pub fn parse, C: DeserializeOwned>( +pub fn parse, C: DeserializeOwned>( path: P, ) -> Result { let path = path.as_ref(); @@ -93,7 +92,13 @@ pub enum BuildCommand { Dot, /// Builds the packages specified in a manifest, and places them into an /// 'out' directory. - Package, + Package { + /// If true, disables the cache. + /// + /// By default, the cache is used. + #[clap(short, long)] + disable_cache: bool, + }, /// Stamps semver versions onto packages within a manifest Stamp { /// The name of the artifact to be stamped. @@ -116,7 +121,7 @@ pub enum DeployCommand { /// /// Defaults to "/opt/oxide". #[clap(long = "out", default_value = "/opt/oxide", action)] - install_dir: PathBuf, + install_dir: Utf8PathBuf, }, /// Unpacks the files created by `package` to an install directory. /// Issues the `uninstall` command. @@ -134,7 +139,7 @@ pub enum DeployCommand { /// /// Defaults to "/opt/oxide". #[clap(long = "out", default_value = "/opt/oxide", action)] - install_dir: PathBuf, + install_dir: Utf8PathBuf, }, /// Imports and starts the sled-agent illumos service /// @@ -145,7 +150,7 @@ pub enum DeployCommand { /// /// Defaults to "/opt/oxide". #[clap(long = "out", default_value = "/opt/oxide", action)] - install_dir: PathBuf, + install_dir: Utf8PathBuf, }, /// Deletes all Omicron zones and stops all services. /// @@ -166,6 +171,6 @@ pub enum DeployCommand { /// /// Defaults to "/opt/oxide". #[clap(long = "out", default_value = "/opt/oxide", action)] - install_dir: PathBuf, + install_dir: Utf8PathBuf, }, } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b7177e58ac..e606a19dd3 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -26,7 +26,6 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } -camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } @@ -134,7 +133,6 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } -camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] }